# **1-Import important libraries**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as ex

#**2-Data exploration**

In [None]:
data = pd.read_csv('../input/credit-card-customers/BankChurners.csv')


In [None]:
data.describe()

In [None]:
data.dtypes

In [None]:
print('shape: {} '.format(data.shape))
data.head(10)

In [None]:
data.info()

In [None]:
data.drop(['CLIENTNUM',
           'Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2',
           'Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1'], axis=1, inplace=True)
print('shape: {} '.format(data.shape))

In [None]:
data.isnull().sum().sort_values() #Check for null

In [None]:
print(data.Attrition_Flag.value_counts())
colors=('#00743f','#FB7B8E')
data.Attrition_Flag.value_counts().plot(kind='pie',shadow=True,colors=colors,autopct='%.2f',figsize=(8,6))
plt.title('Attrition_Flag')
plt.show()

# **3-Data visulation** 

In [None]:
sns.countplot(x='Attrition_Flag', data=data, hue='Gender')
plt.title("Distribution of Gender Among Attrited and Existing Customers")
plt.show()

In [None]:
sns.countplot(x='Attrition_Flag', data=data, hue='Card_Category')
plt.title("Distribution of Card_Category Among Attrited and Existing Customers")
plt.show()

In [None]:
sns.countplot(x='Attrition_Flag', data=data, hue='Marital_Status')
plt.title("Distribution of Marital Status Among Attrited and Existing Customers")
plt.show()

In [None]:
sns.countplot(x='Attrition_Flag', data=data, hue='Income_Category')
plt.title("Distribution of Income Category Among Attrited and Existing Customers")
plt.show()

In [None]:
sns.countplot(x='Attrition_Flag', data=data, hue='Card_Category')
plt.title("Distribution of Card_Category Among Attrited and Existing Customers")
plt.show()

In [None]:
fig = ex.pie(data, values='Credit_Limit',
            names='Education_Level', title='Education Level and Credit Limit ')
fig.show()

In [None]:
sns.countplot(x='Attrition_Flag', data=data, hue='Education_Level')
plt.title("Distribution of Card_Category Among Attrited and Existing Customers")
plt.show()

In [None]:
col = [ 'Customer_Age','Income_Category','Credit_Limit',
       'Total_Amt_Chng_Q4_Q1', 'Total_Trans_Amt','Avg_Utilization_Ratio']
sns.set(style="ticks", color_codes=True)
g = sns.pairplot(data, vars=col)

# **4-Data Preprocessing**

In [None]:
df_object = data.select_dtypes(include="O")
df_number = data.select_dtypes(exclude="O")

In [None]:
from sklearn.preprocessing import LabelEncoder
label = LabelEncoder()
df_object = df_object.astype(str).apply(label.fit_transform)

data = pd.concat([df_object, df_number], axis = 1)

In [None]:
X = data.drop(['Attrition_Flag'], axis=1, inplace=False)
y = data['Attrition_Flag']

In [None]:
plt.figure(figsize=(15,10))
sns.heatmap(X.corr(), linecolor='white', linewidth=1, annot=True)

In [None]:
from sklearn.feature_selection import SelectPercentile
from sklearn.feature_selection import chi2 , f_classif 

print('Original X Shape is ' , X.shape)
FeatureSelection = SelectPercentile(score_func = chi2, percentile=50) # score_func can = f_classif
X = FeatureSelection.fit_transform(X, y)

#showing X Dimension 
print('X Shape is ' , X.shape)
print('Selected Features are : ' , FeatureSelection.get_support())



In [None]:
#Standard Scaler for Data
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
X = scaler.fit_transform(X)

#showing data
print('X \n' , X[:5])

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=44, shuffle =True)

#Splitted Data
print('X_train shape is ' , X_train.shape)
print('X_test shape is ' , X_test.shape)
print('y_train shape is ' , y_train.shape)
print('y_test shape is ' , y_test.shape)

# **5-Model**

**LogisticRegression Model**

In [None]:
from sklearn.linear_model import LogisticRegression

LogisticRegressionModel = LogisticRegression(penalty='l2',solver='sag',C=1.0,random_state=33)
LogisticRegressionModel.fit(X_train, y_train)

#Calculating Details
print('LogisticRegressionModel Train Score is : ' , LogisticRegressionModel.score(X_train, y_train))
print('LogisticRegressionModel Test Score is : ' , LogisticRegressionModel.score(X_test, y_test))
print('LogisticRegressionModel Classes are : ' , LogisticRegressionModel.classes_)
print('LogisticRegressionModel No. of iteratios is : ' , LogisticRegressionModel.n_iter_)
print('----------------------------------------------------')

#Calculating Prediction
y_pred = LogisticRegressionModel.predict(X_test)
y_pred_prob = LogisticRegressionModel.predict_proba(X_test)
print('Predicted Value for LogisticRegressionModel is : ' , y_pred[:10])
print('Prediction Probabilities Value for LogisticRegressionModel is : ' , y_pred_prob[:10])

**SGDClassifier Model**

In [None]:
from sklearn.linear_model import SGDClassifier

SGDClassifierModel = SGDClassifier(penalty='l2',loss='squared_loss',learning_rate='optimal',random_state=33)
SGDClassifierModel.fit(X_train, y_train)

#Calculating Details
print('SGDClassifierModel Train Score is : ' , SGDClassifierModel.score(X_train, y_train))
print('SGDClassifierModel Test Score is : ' , SGDClassifierModel.score(X_test, y_test))
print('SGDClassifierModel loss function is : ' , SGDClassifierModel.loss_function_)
print('SGDClassifierModel No. of iteratios is : ' , SGDClassifierModel.n_iter_)
#print('----------------------------------------------------')

#Calculating Prediction
y_pred = SGDClassifierModel.predict(X_test)
print('Predicted Value for SGDClassifierModel is : ' , y_pred[:10])

**SVC Model**

In [None]:
from sklearn.svm import SVC

SVCModel = SVC(kernel= 'rbf',# it can be also linear,poly,sigmoid,precomputed
               max_iter=10000,C=1.0,gamma='auto')
SVCModel.fit(X_train, y_train)

#Calculating Details
print('SVCModel Train Score is : ' , SVCModel.score(X_train, y_train))
print('SVCModel Test Score is : ' , SVCModel.score(X_test, y_test))
#print('----------------------------------------------------')

#Calculating Prediction
y_pred = SVCModel.predict(X_test)
print('Predicted Value for SVCModel is : ' , y_pred[:10])

**DecisionTreeClassifier Model**

In [None]:
from sklearn.tree import DecisionTreeClassifier

DecisionTreeClassifierModel = DecisionTreeClassifier(criterion='gini',max_depth=3,random_state=33) #criterion can be entropy
DecisionTreeClassifierModel.fit(X_train, y_train)

#Calculating Details
print('DecisionTreeClassifierModel Train Score is : ' , DecisionTreeClassifierModel.score(X_train, y_train))
print('DecisionTreeClassifierModel Test Score is : ' , DecisionTreeClassifierModel.score(X_test, y_test))
print('DecisionTreeClassifierModel Classes are : ' , DecisionTreeClassifierModel.classes_)
print('DecisionTreeClassifierModel feature importances are : ' , DecisionTreeClassifierModel.feature_importances_)
print('----------------------------------------------------')

#Calculating Prediction
y_pred = DecisionTreeClassifierModel.predict(X_test)
y_pred_prob = DecisionTreeClassifierModel.predict_proba(X_test)
print('Predicted Value for DecisionTreeClassifierModel is : ' , y_pred[:10])
print('Prediction Probabilities Value for DecisionTreeClassifierModel is : ' , y_pred_prob[:10])

**GaussianNB Model**

In [None]:
from sklearn.naive_bayes import GaussianNB

GaussianNBModel = GaussianNB()
GaussianNBModel.fit(X_train, y_train)

#Calculating Details
print('GaussianNBModel Train Score is : ' , GaussianNBModel.score(X_train, y_train))
print('GaussianNBModel Test Score is : ' , GaussianNBModel.score(X_test, y_test))
print('----------------------------------------------------')

#Calculating Prediction
y_pred = GaussianNBModel.predict(X_test)
y_pred_prob = GaussianNBModel.predict_proba(X_test)
print('Predicted Value for GaussianNBModel is : ' , y_pred[:10])
print('Prediction Probabilities Value for GaussianNBModel is : ' , y_pred_prob[:10])

**RandomForestClassifier Model**

In [None]:
from sklearn.ensemble import RandomForestClassifier

RandomForestClassifierModel = RandomForestClassifier(criterion = 'gini',n_estimators=100,max_depth=2,random_state=33) #criterion can be also : entropy 
RandomForestClassifierModel.fit(X_train, y_train)

#Calculating Details
print('RandomForestClassifierModel Train Score is : ' , RandomForestClassifierModel.score(X_train, y_train))
print('RandomForestClassifierModel Test Score is : ' , RandomForestClassifierModel.score(X_test, y_test))
print('RandomForestClassifierModel features importances are : ' , RandomForestClassifierModel.feature_importances_)
print('----------------------------------------------------')

#Calculating Prediction
y_pred = RandomForestClassifierModel.predict(X_test)
y_pred_prob = RandomForestClassifierModel.predict_proba(X_test)
print('Predicted Value for RandomForestClassifierModel is : ' , y_pred[:10])
print('Prediction Probabilities Value for RandomForestClassifierModel is : ' , y_pred_prob[:10])