## Task : Predict the churn score for a website based on the features provided in the dataset.

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
train = pd.read_csv(r'F:\Projects\Hackathons\HE\churn_risk_rate\dataset\train.csv')
test = pd.read_csv(r'F:\Projects\Hackathons\HE\churn_risk_rate\dataset\test.csv')

# train = pd.read_csv(r'/content/drive/MyDrive/Projects/churn_risk_rate/dataset/train.csv')
# test = pd.read_csv(r'/content/drive/MyDrive/Projects/churn_risk_rate/dataset/test.csv')


# Dropping unimportant columns
output = pd.DataFrame(columns = ['customer_id','churn_risk_score'])
output['customer_id'] = test['customer_id']
cols_to_drop = ['customer_id', 'Name', 'security_no']
train = train.drop(cols_to_drop, axis = 1)
test = test.drop(cols_to_drop, axis = 1)


### Imputation for Missing Values

In [3]:
from sklearn.impute import SimpleImputer
imputer_gender = SimpleImputer(missing_values = 'Unknown', strategy='most_frequent')
train.loc[:,'gender'] = imputer_gender.fit_transform(train.loc[:,'gender'].to_numpy().reshape(-1,1))

test.loc[:,'gender'] = imputer_gender.transform(test.loc[:,'gender'].to_numpy().reshape(-1,1))

In [4]:
from sklearn.impute import SimpleImputer
imputer_medium_of_operation = SimpleImputer(missing_values = '?', strategy='most_frequent')
train.loc[:,'medium_of_operation'] = imputer_medium_of_operation.fit_transform(train.loc[:,'medium_of_operation'].to_numpy().reshape(-1,1))
test.loc[:,'medium_of_operation'] = imputer_medium_of_operation.transform(test.loc[:,'medium_of_operation'].to_numpy().reshape(-1,1))

In [5]:
train. avg_frequency_login_days = train.avg_frequency_login_days.replace({'Error':np.nan})
train.avg_frequency_login_days = pd.to_numeric(train.avg_frequency_login_days)

test.avg_frequency_login_days = test.avg_frequency_login_days.replace({'Error':np.nan})
test.avg_frequency_login_days = pd.to_numeric(test.avg_frequency_login_days)

In [6]:
from sklearn.impute import SimpleImputer
imputer_avg_f_login_days = SimpleImputer(missing_values = np.nan, strategy='mean')
train.loc[:,'avg_frequency_login_days'] = imputer_avg_f_login_days.fit_transform(train.loc[:,'avg_frequency_login_days'].to_numpy().reshape(-1,1))

test.loc[:,'avg_frequency_login_days'] = imputer_avg_f_login_days.transform(test.loc[:,'avg_frequency_login_days'].to_numpy().reshape(-1,1))

In [7]:
for i in range(train.shape[0]): 
    if train.loc[i, 'joined_through_referral'] == '?' : 
        if train.loc[i, 'referral_id'] == 'xxxxxxxx' : 
            train.loc[i, 'joined_through_referral'] = 'No'
        else : 
            train.loc[i, 'joined_through_referral'] = 'Yes'

for i in range(test.shape[0]):           
    if test.loc[i, 'joined_through_referral'] == '?' : 
        if test.loc[i, 'referral_id'] == 'xxxxxxxx' : 
            test.loc[i, 'joined_through_referral'] = 'No'
        else : 
            test.loc[i, 'joined_through_referral'] = 'Yes'

In [8]:
cols  = ['region_category', 'preferred_offer_types']

imputer_cat = SimpleImputer(missing_values = np.nan, strategy = 'most_frequent')
train.loc[:,cols] = imputer_cat.fit_transform(train.loc[:,cols])
test.loc[:,cols] = imputer_cat.fit_transform(test.loc[:,cols])

In [9]:
imputer_points_in_wallet = SimpleImputer(missing_values = np.nan, strategy = 'mean')

train.loc[:,'points_in_wallet'] = imputer_points_in_wallet.fit_transform(train.loc[:,'points_in_wallet'].to_numpy().reshape(-1,1))
test.loc[:,'points_in_wallet'] = imputer_points_in_wallet.fit_transform(test.loc[:,'points_in_wallet'].to_numpy().reshape(-1,1))

In [10]:
train = train.drop(['referral_id', 'joining_date', 'last_visit_time'], axis = 1)
test = test.drop(['referral_id', 'joining_date', 'last_visit_time'], axis = 1)

In [11]:
train.avg_time_spent = abs(train.avg_time_spent)
test.avg_time_spent = abs(test.avg_time_spent)

train.avg_frequency_login_days = abs(train.avg_frequency_login_days)
test.avg_frequency_login_days = abs(test.avg_frequency_login_days)

for i in range(train.shape[0]) :
    if train.loc[i,'days_since_last_login'] < 0 : 
        train.loc[i,'days_since_last_login'] = 0

for i in range(test.shape[0]) : 
    if test.loc[i,'days_since_last_login'] < 0 : 
        test.loc[i,'days_since_last_login'] = 0

# Encoding

In [12]:
X_train, y_train = train.iloc[:,:-1], train.iloc[:,-1]
X_test = test

In [13]:
cols_2_onehotencode = ['gender', 'region_category', 'membership_category',
       'joined_through_referral', 'preferred_offer_types',
       'medium_of_operation', 'internet_option', 'used_special_discount',
       'offer_application_preference', 'past_complaint', 'complaint_status',
       'feedback']


from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

ct = ColumnTransformer(transformers=[('encoder',OneHotEncoder(),cols_2_onehotencode)], remainder='passthrough')
X_train = ct.fit_transform(X_train)
X_test = ct.transform(X_test)


In [14]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [15]:
# y_train = y_train.replace(-1,0)

In [16]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from datetime import datetime
import pickle

start = datetime.now()

param = {
    'C' : np.linspace(0.001,100,20), 
    'penalty' : ['l1','l2']
        }

grids = GridSearchCV(LogisticRegression(multi_class='multinomial', solver = 'saga', max_iter = 4000) ,param_grid= param, cv = 3 ,scoring = 'f1_macro', verbose = 10).fit(X_train, y_train)

end = datetime.now()

with open(r"F:\Projects\Hackathons\HE\churn_risk_rate\pickle\{}.pkl".format('multinomial_log_reg'), "wb") as f:   
    pickle.dump(grids, f)

Fitting 3 folds for each of 40 candidates, totalling 120 fits
[CV] C=0.001, penalty=l1 .............................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    2.6s remaining:    0.0s


[CV] ................. C=0.001, penalty=l1, score=0.384, total=   2.7s
[CV] C=0.001, penalty=l1 .............................................


  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    6.0s remaining:    0.0s


[CV] ................. C=0.001, penalty=l1, score=0.380, total=   3.4s
[CV] C=0.001, penalty=l1 .............................................


  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    9.2s remaining:    0.0s


[CV] ................. C=0.001, penalty=l1, score=0.386, total=   3.1s
[CV] C=0.001, penalty=l2 .............................................


  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:   11.6s remaining:    0.0s


[CV] ................. C=0.001, penalty=l2, score=0.545, total=   2.4s
[CV] C=0.001, penalty=l2 .............................................


  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   13.9s remaining:    0.0s


[CV] ................. C=0.001, penalty=l2, score=0.542, total=   2.3s
[CV] C=0.001, penalty=l2 .............................................


  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:   16.2s remaining:    0.0s


[CV] ................. C=0.001, penalty=l2, score=0.535, total=   2.3s
[CV] C=5.264105263157894, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:  7.1min remaining:    0.0s


[CV] ..... C=5.264105263157894, penalty=l1, score=0.552, total= 6.8min
[CV] C=5.264105263157894, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed: 14.1min remaining:    0.0s


[CV] ..... C=5.264105263157894, penalty=l1, score=0.555, total= 7.0min
[CV] C=5.264105263157894, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed: 21.3min remaining:    0.0s


[CV] ..... C=5.264105263157894, penalty=l1, score=0.548, total= 7.2min
[CV] C=5.264105263157894, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=5.264105263157894, penalty=l2, score=0.552, total=  26.7s
[CV] C=5.264105263157894, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=5.264105263157894, penalty=l2, score=0.555, total=  37.4s
[CV] C=5.264105263157894, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=5.264105263157894, penalty=l2, score=0.548, total=  31.6s
[CV] C=10.527210526315788, penalty=l1 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=10.527210526315788, penalty=l1, score=0.552, total=12.8min
[CV] C=10.527210526315788, penalty=l1 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=10.527210526315788, penalty=l1, score=0.555, total=12.4min
[CV] C=10.527210526315788, penalty=l1 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=10.527210526315788, penalty=l1, score=0.548, total=12.5min
[CV] C=10.527210526315788, penalty=l2 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=10.527210526315788, penalty=l2, score=0.552, total=  27.9s
[CV] C=10.527210526315788, penalty=l2 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=10.527210526315788, penalty=l2, score=0.555, total=  28.7s
[CV] C=10.527210526315788, penalty=l2 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=10.527210526315788, penalty=l2, score=0.548, total=  28.7s
[CV] C=15.790315789473683, penalty=l1 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=15.790315789473683, penalty=l1, score=0.552, total= 6.7min
[CV] C=15.790315789473683, penalty=l1 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=15.790315789473683, penalty=l1, score=0.555, total= 7.6min
[CV] C=15.790315789473683, penalty=l1 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=15.790315789473683, penalty=l1, score=0.548, total= 6.3min
[CV] C=15.790315789473683, penalty=l2 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=15.790315789473683, penalty=l2, score=0.552, total=  33.4s
[CV] C=15.790315789473683, penalty=l2 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=15.790315789473683, penalty=l2, score=0.555, total=  33.9s
[CV] C=15.790315789473683, penalty=l2 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=15.790315789473683, penalty=l2, score=0.548, total=  33.4s
[CV] C=21.053421052631577, penalty=l1 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=21.053421052631577, penalty=l1, score=0.552, total= 7.1min
[CV] C=21.053421052631577, penalty=l1 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=21.053421052631577, penalty=l1, score=0.555, total= 7.2min
[CV] C=21.053421052631577, penalty=l1 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=21.053421052631577, penalty=l1, score=0.548, total= 6.9min
[CV] C=21.053421052631577, penalty=l2 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=21.053421052631577, penalty=l2, score=0.552, total=  38.7s
[CV] C=21.053421052631577, penalty=l2 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=21.053421052631577, penalty=l2, score=0.555, total=  37.7s
[CV] C=21.053421052631577, penalty=l2 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=21.053421052631577, penalty=l2, score=0.548, total=  38.7s
[CV] C=26.31652631578947, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=26.31652631578947, penalty=l1, score=0.552, total= 4.4min
[CV] C=26.31652631578947, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=26.31652631578947, penalty=l1, score=0.555, total= 7.3min
[CV] C=26.31652631578947, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=26.31652631578947, penalty=l1, score=0.548, total= 4.7min
[CV] C=26.31652631578947, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=26.31652631578947, penalty=l2, score=0.552, total=  42.4s
[CV] C=26.31652631578947, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=26.31652631578947, penalty=l2, score=0.555, total=  41.8s
[CV] C=26.31652631578947, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=26.31652631578947, penalty=l2, score=0.548, total=  44.3s
[CV] C=31.579631578947367, penalty=l1 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=31.579631578947367, penalty=l1, score=0.552, total= 4.0min
[CV] C=31.579631578947367, penalty=l1 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=31.579631578947367, penalty=l1, score=0.555, total= 4.3min
[CV] C=31.579631578947367, penalty=l1 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=31.579631578947367, penalty=l1, score=0.548, total= 3.9min
[CV] C=31.579631578947367, penalty=l2 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=31.579631578947367, penalty=l2, score=0.552, total=  46.0s
[CV] C=31.579631578947367, penalty=l2 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=31.579631578947367, penalty=l2, score=0.555, total=  44.2s
[CV] C=31.579631578947367, penalty=l2 ................................


  'precision', 'predicted', average, warn_for)


[CV] .... C=31.579631578947367, penalty=l2, score=0.548, total=  45.2s
[CV] C=36.84273684210525, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=36.84273684210525, penalty=l1, score=0.552, total= 3.2min
[CV] C=36.84273684210525, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=36.84273684210525, penalty=l1, score=0.555, total= 3.2min
[CV] C=36.84273684210525, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=36.84273684210525, penalty=l1, score=0.548, total= 3.2min
[CV] C=36.84273684210525, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=36.84273684210525, penalty=l2, score=0.552, total=  48.1s
[CV] C=36.84273684210525, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=36.84273684210525, penalty=l2, score=0.555, total=  49.5s
[CV] C=36.84273684210525, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=36.84273684210525, penalty=l2, score=0.548, total=  48.0s
[CV] C=42.10584210526315, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=42.10584210526315, penalty=l1, score=0.552, total= 3.1min
[CV] C=42.10584210526315, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=42.10584210526315, penalty=l1, score=0.555, total= 3.0min
[CV] C=42.10584210526315, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=42.10584210526315, penalty=l1, score=0.548, total= 3.0min
[CV] C=42.10584210526315, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=42.10584210526315, penalty=l2, score=0.552, total=  49.5s
[CV] C=42.10584210526315, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=42.10584210526315, penalty=l2, score=0.555, total=  49.8s
[CV] C=42.10584210526315, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=42.10584210526315, penalty=l2, score=0.548, total=  51.1s
[CV] C=47.36894736842105, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=47.36894736842105, penalty=l1, score=0.552, total= 2.9min
[CV] C=47.36894736842105, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=47.36894736842105, penalty=l1, score=0.555, total= 3.0min
[CV] C=47.36894736842105, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=47.36894736842105, penalty=l1, score=0.548, total= 2.9min
[CV] C=47.36894736842105, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=47.36894736842105, penalty=l2, score=0.552, total=  52.0s
[CV] C=47.36894736842105, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=47.36894736842105, penalty=l2, score=0.555, total=  53.1s
[CV] C=47.36894736842105, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=47.36894736842105, penalty=l2, score=0.548, total=  53.1s
[CV] C=52.63205263157894, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=52.63205263157894, penalty=l1, score=0.552, total= 2.8min
[CV] C=52.63205263157894, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=52.63205263157894, penalty=l1, score=0.555, total= 2.9min
[CV] C=52.63205263157894, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=52.63205263157894, penalty=l1, score=0.548, total= 2.9min
[CV] C=52.63205263157894, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=52.63205263157894, penalty=l2, score=0.552, total=  54.4s
[CV] C=52.63205263157894, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=52.63205263157894, penalty=l2, score=0.555, total=  54.8s
[CV] C=52.63205263157894, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=52.63205263157894, penalty=l2, score=0.548, total=  55.6s
[CV] C=57.89515789473683, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=57.89515789473683, penalty=l1, score=0.552, total= 2.9min
[CV] C=57.89515789473683, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=57.89515789473683, penalty=l1, score=0.555, total= 2.0min
[CV] C=57.89515789473683, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=57.89515789473683, penalty=l1, score=0.548, total= 1.4min
[CV] C=57.89515789473683, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=57.89515789473683, penalty=l2, score=0.552, total=  30.5s
[CV] C=57.89515789473683, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=57.89515789473683, penalty=l2, score=0.555, total=  30.6s
[CV] C=57.89515789473683, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=57.89515789473683, penalty=l2, score=0.548, total=  30.9s
[CV] C=63.15826315789473, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=63.15826315789473, penalty=l1, score=0.552, total= 1.4min
[CV] C=63.15826315789473, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=63.15826315789473, penalty=l1, score=0.555, total= 1.4min
[CV] C=63.15826315789473, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=63.15826315789473, penalty=l1, score=0.548, total= 1.4min
[CV] C=63.15826315789473, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=63.15826315789473, penalty=l2, score=0.552, total=  31.1s
[CV] C=63.15826315789473, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=63.15826315789473, penalty=l2, score=0.555, total=  31.3s
[CV] C=63.15826315789473, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=63.15826315789473, penalty=l2, score=0.548, total=  31.5s
[CV] C=68.42136842105263, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=68.42136842105263, penalty=l1, score=0.552, total= 1.4min
[CV] C=68.42136842105263, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=68.42136842105263, penalty=l1, score=0.555, total= 1.4min
[CV] C=68.42136842105263, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=68.42136842105263, penalty=l1, score=0.548, total= 1.4min
[CV] C=68.42136842105263, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=68.42136842105263, penalty=l2, score=0.552, total=  31.9s
[CV] C=68.42136842105263, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=68.42136842105263, penalty=l2, score=0.555, total=  32.2s
[CV] C=68.42136842105263, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=68.42136842105263, penalty=l2, score=0.548, total=  32.3s
[CV] C=73.68447368421052, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=73.68447368421052, penalty=l1, score=0.552, total= 1.4min
[CV] C=73.68447368421052, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=73.68447368421052, penalty=l1, score=0.555, total= 1.4min
[CV] C=73.68447368421052, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=73.68447368421052, penalty=l1, score=0.548, total= 1.4min
[CV] C=73.68447368421052, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=73.68447368421052, penalty=l2, score=0.552, total=  32.3s
[CV] C=73.68447368421052, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=73.68447368421052, penalty=l2, score=0.555, total=  33.7s
[CV] C=73.68447368421052, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=73.68447368421052, penalty=l2, score=0.548, total=  33.0s
[CV] C=78.94757894736841, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=78.94757894736841, penalty=l1, score=0.552, total= 1.4min
[CV] C=78.94757894736841, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=78.94757894736841, penalty=l1, score=0.555, total= 1.4min
[CV] C=78.94757894736841, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=78.94757894736841, penalty=l1, score=0.548, total= 1.3min
[CV] C=78.94757894736841, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=78.94757894736841, penalty=l2, score=0.552, total=  33.3s
[CV] C=78.94757894736841, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=78.94757894736841, penalty=l2, score=0.555, total=  33.2s
[CV] C=78.94757894736841, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=78.94757894736841, penalty=l2, score=0.548, total=  33.5s
[CV] C=84.21068421052631, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=84.21068421052631, penalty=l1, score=0.552, total= 1.3min
[CV] C=84.21068421052631, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=84.21068421052631, penalty=l1, score=0.555, total= 1.4min
[CV] C=84.21068421052631, penalty=l1 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=84.21068421052631, penalty=l1, score=0.548, total= 1.3min
[CV] C=84.21068421052631, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=84.21068421052631, penalty=l2, score=0.552, total=  33.7s
[CV] C=84.21068421052631, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=84.21068421052631, penalty=l2, score=0.555, total=  34.2s
[CV] C=84.21068421052631, penalty=l2 .................................


  'precision', 'predicted', average, warn_for)


[CV] ..... C=84.21068421052631, penalty=l2, score=0.548, total=  34.5s
[CV] C=89.4737894736842, penalty=l1 ..................................


  'precision', 'predicted', average, warn_for)


[CV] ...... C=89.4737894736842, penalty=l1, score=0.552, total= 1.3min
[CV] C=89.4737894736842, penalty=l1 ..................................


  'precision', 'predicted', average, warn_for)


[CV] ...... C=89.4737894736842, penalty=l1, score=0.555, total= 1.4min
[CV] C=89.4737894736842, penalty=l1 ..................................


  'precision', 'predicted', average, warn_for)


[CV] ...... C=89.4737894736842, penalty=l1, score=0.548, total= 1.4min
[CV] C=89.4737894736842, penalty=l2 ..................................


  'precision', 'predicted', average, warn_for)


[CV] ...... C=89.4737894736842, penalty=l2, score=0.552, total=  34.1s
[CV] C=89.4737894736842, penalty=l2 ..................................


  'precision', 'predicted', average, warn_for)


[CV] ...... C=89.4737894736842, penalty=l2, score=0.555, total=  34.8s
[CV] C=89.4737894736842, penalty=l2 ..................................


  'precision', 'predicted', average, warn_for)


[CV] ...... C=89.4737894736842, penalty=l2, score=0.548, total=  34.6s
[CV] C=94.7368947368421, penalty=l1 ..................................


  'precision', 'predicted', average, warn_for)


[CV] ...... C=94.7368947368421, penalty=l1, score=0.552, total= 1.3min
[CV] C=94.7368947368421, penalty=l1 ..................................


  'precision', 'predicted', average, warn_for)


[CV] ...... C=94.7368947368421, penalty=l1, score=0.555, total= 1.4min
[CV] C=94.7368947368421, penalty=l1 ..................................


  'precision', 'predicted', average, warn_for)


[CV] ...... C=94.7368947368421, penalty=l1, score=0.548, total= 1.3min
[CV] C=94.7368947368421, penalty=l2 ..................................


  'precision', 'predicted', average, warn_for)


[CV] ...... C=94.7368947368421, penalty=l2, score=0.552, total=  34.8s
[CV] C=94.7368947368421, penalty=l2 ..................................


  'precision', 'predicted', average, warn_for)


[CV] ...... C=94.7368947368421, penalty=l2, score=0.555, total=  35.6s
[CV] C=94.7368947368421, penalty=l2 ..................................


  'precision', 'predicted', average, warn_for)


[CV] ...... C=94.7368947368421, penalty=l2, score=0.548, total=  35.0s
[CV] C=100.0, penalty=l1 .............................................


  'precision', 'predicted', average, warn_for)


[CV] ................. C=100.0, penalty=l1, score=0.552, total= 1.3min
[CV] C=100.0, penalty=l1 .............................................


  'precision', 'predicted', average, warn_for)


[CV] ................. C=100.0, penalty=l1, score=0.555, total= 1.4min
[CV] C=100.0, penalty=l1 .............................................


  'precision', 'predicted', average, warn_for)


[CV] ................. C=100.0, penalty=l1, score=0.548, total= 1.4min
[CV] C=100.0, penalty=l2 .............................................


  'precision', 'predicted', average, warn_for)


[CV] ................. C=100.0, penalty=l2, score=0.552, total=  34.9s
[CV] C=100.0, penalty=l2 .............................................


  'precision', 'predicted', average, warn_for)


[CV] ................. C=100.0, penalty=l2, score=0.555, total=  35.8s
[CV] C=100.0, penalty=l2 .............................................


  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=1)]: Done 120 out of 120 | elapsed: 241.1min finished


[CV] ................. C=100.0, penalty=l2, score=0.548, total=  35.3s


In [17]:
print('------| Time to fit the model : {}|------'.format(end - start))

print('------| Best paramters : {} |------'.format(grids.best_params_))

print('------| Best Score : {} |------'.format(grids.best_score_))

------| Time to fit the model : 4:06:28.862207|------
------| Best paramters : {'C': 5.264105263157894, 'penalty': 'l1'} |------
------| Best Score : 0.5514638552132007 |------


In [18]:
y_pred = grids.predict(X_test)

In [19]:
output['churn_risk_score'] = y_pred

In [20]:
output.to_csv('F:\Projects\Hackathons\HE\churn_risk_rate\outputs\out_logreg_final.csv', header = True, index = False)

In [21]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_train, grids.predict(X_train))

array([[   0,  116,   41,  359,  181,  466],
       [   0, 2362,  290,    0,    0,    0],
       [   0, 1682, 1059,    0,    0,    0],
       [   0,    0,    0, 9150, 1274,    0],
       [   0,    0,    0, 1280, 3999, 4906],
       [   0,    0,    0,    0,  238, 9589]], dtype=int64)

In [22]:
pd.Series(y_pred).value_counts()

5    7991
3    5835
4    3083
1    2329
2     681
dtype: int64