In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import recall_score
from sklearn.linear_model import SGDClassifier
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from tensorflow import keras
from tensorflow.keras import layers
%matplotlib inline

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
bank_churners = pd.read_csv('/kaggle/input/credit-card-customers/BankChurners.csv')

In [None]:
bank_churners = bank_churners.iloc[:, :-2]

In [None]:
churn_dict = {'Existing Customer':0, 'Attrited Customer':1}
bank_churners['churn'] = bank_churners['Attrition_Flag'].apply(lambda x: churn_dict[str(x)] )
bank_churners.head()

In [None]:
y = bank_churners['churn']
bank_churners = bank_churners.drop(['churn','Attrition_Flag'], axis = 1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(bank_churners, y,
                                   test_size = 0.33, stratify = y, random_state = 42)

In [None]:
categorical = ['Gender', 'Education_Level', 'Marital_Status', 'Income_Category',
              'Card_Category']
numeric = ['Customer_Age', 'Dependent_count', 'Months_on_book', 'Total_Relationship_Count',
          'Months_Inactive_12_mon', 'Contacts_Count_12_mon', 'Credit_Limit', 'Total_Revolving_Bal',
          'Avg_Open_To_Buy', 'Total_Amt_Chng_Q4_Q1', 'Total_Trans_Amt', 'Total_Trans_Ct', 
          'Total_Ct_Chng_Q4_Q1', 'Avg_Utilization_Ratio']
X_train_categorical = X_train[categorical]
X_train_numeric = X_train[numeric]
X_test_categorical = X_test[categorical]
X_test_numeric = X_test[numeric]

In [None]:
encoder = OneHotEncoder(sparse = False)
encoder.fit(X_train_categorical)
X_train_categorical = pd.DataFrame(encoder.transform(X_train_categorical),
                                   columns = encoder.get_feature_names())
X_test_categorical = pd.DataFrame(encoder.transform(X_test_categorical),
                                 columns = encoder.get_feature_names())

In [None]:
scaler = StandardScaler()
scaler.fit(X_train_numeric)
X_train_numeric = pd.DataFrame(scaler.transform(X_train_numeric), columns = numeric)
X_test_numeric = pd.DataFrame(scaler.transform(X_test_numeric), columns = numeric)

In [None]:
X_train = pd.concat([X_train_categorical, X_train_numeric], axis = 1)
X_test = pd.concat([X_test_categorical, X_test_numeric], axis =1)

In [None]:
folds = StratifiedKFold(n_splits = 5)

In [None]:
svc = SVC(C = 1e1)

In [None]:
xgb = XGBClassifier(n_estimators = 260, learning_rate = 0.1)

In [None]:
lgbm = LGBMClassifier(n_estmators = 100, learning_rate = 0.1, num_leaves = 21,
                     colsample_bytree = 0.8)

In [None]:
rfc = RandomForestClassifier(n_estimators = 201, max_depth = 27, max_features = 0.7)

In [None]:
etc = ExtraTreesClassifier(n_estimators = 201, max_depth = 25, max_features = 0.9)

In [None]:
X_train_val, X_test_val, y_train_val, y_test_val = train_test_split(X_train, y_train,
                                   test_size = 0.33, stratify = y_train, random_state = 42)

In [None]:
keras.backend.clear_session()
model = keras.Sequential()
model.add(layers.Dense(64, input_shape = (37,), activation = 'relu',
                      kernel_regularizer = keras.regularizers.l1_l2(l1 = 1e-5,l2=1e-3)))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(64, activation = 'relu',
                       kernel_regularizer = keras.regularizers.l1_l2(l1 = 1e-5,l2=1e-3)))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(1, activation = 'sigmoid',
                       kernel_regularizer = keras.regularizers.l1_l2(l1 = 1e-5,l2=1e-3)))
opt = keras.optimizers.Adam(learning_rate = 0.001)
model.compile(loss = 'binary_crossentropy', metrics = [keras.metrics.Recall()], optimizer = opt)

In [None]:
history = model.fit(X_train_val, y_train_val, batch_size = 64, epochs = 40,
          validation_data = (X_test_val, y_test_val))
history = history.history
loss = history['loss']
val_loss = history['val_loss']
recall = history['recall']
val_recall = history['val_recall']

In [None]:
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

In [None]:
plt.plot(epochs, recall, 'bo', label='Training recall')
plt.plot(epochs, val_recall, 'b', label='Validation recall')
plt.title('Training and validation recoll')
plt.xlabel('Epochs')
plt.ylabel('Recall')
plt.legend()

In [None]:
score = np.mean(cross_val_score(lgbm, X_train, y_train, scoring = 'f1', cv = folds))
score

In [None]:
svc.fit(X_train, y_train)
svc_predictions = svc.predict(X_test)

In [None]:
xgb.fit(X_train, y_train)
xgb_importances = xgb.feature_importances_
xgb_predictions = xgb.predict(X_test)

In [None]:
xgb_importances = pd.Series(xgb_importances, index = X_train.columns)
xgb_importances.plot(kind = 'bar')

In [None]:
lgbm.fit(X_train, y_train)
lgbm_importances = lgbm.feature_importances_
lgbm_predictions = lgbm.predict(X_test)

In [None]:
lgbm_importances = pd.Series(lgbm_importances, index = X_train.columns)
lgbm_importances.plot(kind = 'bar')

In [None]:
rfc.fit(X_train, y_train)
rfc_importances = rfc.feature_importances_
rfc_predictions = rfc.predict(X_test)

In [None]:
rfc_importances = pd.Series(rfc_importances, index = X_train.columns)
rfc_importances.plot(kind = 'bar')

In [None]:
etc.fit(X_train, y_train)
etc_importances = etc.feature_importances_
etc_predictions = etc.predict(X_test)

In [None]:
etc_importances = pd.Series(etc_importances, index = X_train.columns)
etc_importances.plot(kind = 'bar')

In [None]:
model.fit(X_train, y_train, batch_size = 64, epochs = 40)
predictions = model.predict(X_test)

In [None]:
recall_svc = recall_score(y_test, svc_predictions)
recall_svc

In [None]:
recall_xgb = recall_score(y_test, xgb_predictions)
recall_xgb

In [None]:
recall_lgbm = recall_score(y_test, lgbm_predictions)
recall_lgbm

In [None]:
recall_rfc = recall_score(y_test, rfc_predictions)
recall_rfc

In [None]:
recall_etc = recall_score(y_test, rfc_predictions)
recall_etc

In [None]:
neyral_net_recall = recall_score(y_test, (predictions > 0.5).reshape((3342,)))
neyral_net_recall