In [None]:
!git clone https://github.com/sharmaroshan/Churn-Modelling-Dataset.git

Cloning into 'Churn-Modelling-Dataset'...
remote: Enumerating objects: 29, done.[K
remote: Total 29 (delta 0), reused 0 (delta 0), pack-reused 29[K
Unpacking objects: 100% (29/29), done.


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

In [None]:
# read data
path = '/content/Churn-Modelling-Dataset/Churn_Modelling.csv'
df = pd.read_csv(path)

In [None]:
# examine the shape
df.shape

(10000, 14)

In [None]:
# examine the first 10 rows
df.head(10)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0
5,6,15574012,Chu,645,Spain,Male,44,8,113755.78,2,1,0,149756.71,1
6,7,15592531,Bartlett,822,France,Male,50,7,0.0,2,1,1,10062.8,0
7,8,15656148,Obinna,376,Germany,Female,29,4,115046.74,4,1,0,119346.88,1
8,9,15792365,He,501,France,Male,44,4,142051.07,2,0,1,74940.5,0
9,10,15592389,H?,684,France,Male,27,2,134603.88,1,1,1,71725.73,0


In [None]:
# examine the class distribution
df.Exited.value_counts()

0    7963
1    2037
Name: Exited, dtype: int64

The **get_dummies()** function is used to convert categorical variable into dummy/indicator variables.

In [None]:
Convert Categorical Variable to Numeric
df = pd.get_dummies(df,columns = ['Geography','Gender'])

In [None]:
# delete columns by specified their index (RowNumber , CustomerId , Surname)
df = df.drop(df.columns[[0, 1, 2]], axis=1)

In [None]:
df

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain,Gender_Female,Gender_Male
0,619,42,2,0.00,1,1,1,101348.88,1,1,0,0,1,0
1,608,41,1,83807.86,1,0,1,112542.58,0,0,0,1,1,0
2,502,42,8,159660.80,3,1,0,113931.57,1,1,0,0,1,0
3,699,39,1,0.00,2,0,0,93826.63,0,1,0,0,1,0
4,850,43,2,125510.82,1,1,1,79084.10,0,0,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,39,5,0.00,2,1,0,96270.64,0,1,0,0,0,1
9996,516,35,10,57369.61,1,1,1,101699.77,0,1,0,0,0,1
9997,709,36,7,0.00,1,0,1,42085.58,1,1,0,0,1,0
9998,772,42,3,75075.31,2,1,0,92888.52,1,0,1,0,0,1


**Separating Training and Testing Datasets**

In [None]:
# define X and y from the data
X = df.drop(['Exited'],axis=1).values
y = df.Exited
print(X.shape)
print(y.shape)

(10000, 13)
(10000,)


In [None]:
# split X and y into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1,test_size=0.20,stratify=y)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(8000, 13)
(2000, 13)
(8000,)
(2000,)


**`Transforming the Data`**

In [None]:
# scale the training set and the test set
sc = StandardScaler()
sc = sc.fit(X_train)
X_train = sc.transform(X_train)
X_test = sc.transform(X_test)

**Building the Model**

- SVC

In [None]:
#Applying SVC Model 
SVCModel = SVC(kernel= 'rbf',# it can be also linear,poly,sigmoid,precomputed
               max_iter=1000,C=1.0,gamma='auto')
SVCModel.fit(X_train, y_train)



SVC(gamma='auto', max_iter=1000)

In [None]:
#Calculating Details
print('SVCModel Train Score is : ' , SVCModel.score(X_train, y_train))
print('SVCModel Test Score is : ' , SVCModel.score(X_test, y_test))


SVCModel Train Score is :  0.8255
SVCModel Test Score is :  0.817


In [None]:
#Calculating Prediction
y_pred = SVCModel.predict(X_test)
print('Predicted Value for SVCModel is : ' , y_pred[:10])

Predicted Value for SVCModel is :  [0 1 0 0 0 1 0 0 0 0]


**Making a Single Prediction**

In [None]:
X[1]

array([6.0800000e+02, 4.1000000e+01, 1.0000000e+00, 8.3807860e+04,
       1.0000000e+00, 0.0000000e+00, 1.0000000e+00, 1.1254258e+05,
       0.0000000e+00, 0.0000000e+00, 1.0000000e+00, 1.0000000e+00,
       0.0000000e+00])

In [None]:
# new data
col = np.array([[6.08, 4.1, 1.0, 8.38,1.0, 0, 1, 1.12,0, 0, 1, 1,0]])


In [None]:
new_pred = SVCModel.predict(sc.transform(col))

In [None]:
new_pred

array([0])

You can see in your output that the customer won’t leave the bank.