## Importing Libraries

In [2]:
import pandas as pd 
import numpy as np 

In [3]:
df = pd.read_csv('Churn_Modelling-Copy1.csv')

In [5]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
RowNumber          10000 non-null int64
CustomerId         10000 non-null int64
Surname            10000 non-null object
CreditScore        10000 non-null int64
Geography          10000 non-null object
Gender             10000 non-null object
Age                10000 non-null int64
Tenure             10000 non-null int64
Balance            10000 non-null float64
NumOfProducts      10000 non-null int64
HasCrCard          10000 non-null int64
IsActiveMember     10000 non-null int64
EstimatedSalary    10000 non-null float64
Exited             10000 non-null int64
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


## Missing Data 

In [7]:
df.isnull().sum()

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

In [38]:
df.Exited.value_counts()

0    7963
1    2037
Name: Exited, dtype: int64

In [39]:
df.Exited.value_counts()/len(df)

0    0.7963
1    0.2037
Name: Exited, dtype: float64

Therefore Data is unbalanced.

## Independent and Dependent variables 

In [9]:
x = df.loc[:,['CreditScore','Tenure','Balance','NumOfProducts','HasCrCard','IsActiveMember','EstimatedSalary']]

In [10]:
x.head()

Unnamed: 0,CreditScore,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,2,0.0,1,1,1,101348.88
1,608,1,83807.86,1,0,1,112542.58
2,502,8,159660.8,3,1,0,113931.57
3,699,1,0.0,2,0,0,93826.63
4,850,2,125510.82,1,1,1,79084.1


## Feature Scaling

In [20]:
from sklearn.preprocessing import StandardScaler

In [21]:
sc = StandardScaler()

In [22]:
x_sc = sc.fit_transform(x)

In [24]:
x_sc

array([[-0.32622142, -1.04175968, -1.22584767, ...,  0.64609167,
         0.97024255,  0.02188649],
       [-0.44003595, -1.38753759,  0.11735002, ..., -1.54776799,
         0.97024255,  0.21653375],
       [-1.53679418,  1.03290776,  1.33305335, ...,  0.64609167,
        -1.03067011,  0.2406869 ],
       ...,
       [ 0.60498839,  0.68712986, -1.22584767, ..., -1.54776799,
         0.97024255, -1.00864308],
       [ 1.25683526, -0.69598177, -0.02260751, ...,  0.64609167,
        -1.03067011, -0.12523071],
       [ 1.46377078, -0.35020386,  0.85996499, ...,  0.64609167,
        -1.03067011, -1.07636976]])

In [11]:
y = df.iloc[:,-1]

In [13]:
y.head()

0    1
1    0
2    1
3    0
4    0
Name: Exited, dtype: int64

## Spliting  Data into Training,Test

In [16]:
from sklearn.model_selection import train_test_split

In [25]:
x_train,x_test,y_train,y_test = train_test_split(x_sc,y,test_size=0.3)

In [28]:
print(x_train.shape,x_test.shape,y_train.shape,y_test.shape)

(7000, 7) (3000, 7) (7000,) (3000,)


## Random Forest

In [29]:
from sklearn.ensemble import RandomForestClassifier

In [31]:
clf1 = RandomForestClassifier()

In [32]:
clf1.fit(x_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [33]:
predict = clf1.predict(x_test)

### Evaluating Model 

In [40]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report

In [36]:
accuracy_score(y_test,predict)

0.7963333333333333

In [37]:
confusion_matrix(y_test,predict)

array([[2245,  128],
       [ 483,  144]], dtype=int64)

In [41]:
print(classification_report(y_test,predict))

             precision    recall  f1-score   support

          0       0.82      0.95      0.88      2373
          1       0.53      0.23      0.32       627

avg / total       0.76      0.80      0.76      3000



## Ada Boosting using default Decision Tree 

In [42]:
from sklearn.ensemble import AdaBoostClassifier

In [44]:
ada_clf = AdaBoostClassifier(n_estimators=50)

In [45]:
ada_clf.fit(x_train,y_train)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
          learning_rate=1.0, n_estimators=50, random_state=None)

In [46]:
ada_predict = ada_clf.predict(x_test)

In [47]:
accuracy_score(y_test,ada_predict)

0.8136666666666666

In [48]:
print(classification_report(y_test,ada_predict))

             precision    recall  f1-score   support

          0       0.81      0.99      0.89      2373
          1       0.79      0.15      0.25       627

avg / total       0.81      0.81      0.76      3000



## Ada Boosting using base model as SVM 

In [49]:
from sklearn.svm import SVC

In [57]:
svm = SVC(kernel='rbf')

In [67]:
ada_2 = AdaBoostClassifier(base_estimator=svm,n_estimators=60,algorithm='SAMME')

In [68]:
ada_2.fit(x_train,y_train)

AdaBoostClassifier(algorithm='SAMME',
          base_estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
          learning_rate=1.0, n_estimators=60, random_state=None)

In [69]:
ada2_predict= ada_2.predict(x_test) 

In [70]:
accuracy_score(y_test,ada2_predict)

0.791