In [1]:
import pandas as pd
import numpy as np
import seaborn as sb
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# import data
churn_data = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn_R2.csv')

input_names = list(churn_data.columns)

all_inputs = churn_data[input_names].values

# set target data
churn_target = churn_data.iloc[:,-1]

# set feature data
churn_data.drop(["customerID","Churn"], axis=1, inplace=True)

# set training and test variables
X = np.array(churn_data)
y = np.array(churn_target)

# set variables for target and features
features = list(churn_data)
targets = list(churn_target)

# divide data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 100)

# set seaborn style and fontsize
sb.set_style('ticks')
sb.set_context('paper', font_scale=1.6)

# data normalization
scaler = StandardScaler()

scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [2]:
import time
from sklearn import svm
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

#create a svm Classifier
clf = svm.SVC(kernel='linear') # Linear Kernel

#fit the model and time it
start = time.time()

#train the model using the training sets
clf.fit(X_train, y_train)

stop = time.time()

#predict the response for test dataset
y_pred = clf.predict(X_test)

print(f"Training: {stop - start} seconds")
print()

print("Predicted values:", y_pred)
print()

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print()

print (f"Accuracy: {accuracy_score(y_test,y_pred)*100}%")
print()

print("Report:")
print(classification_report(y_test, y_pred))
print()

Training: 0.6817870140075684 seconds

Predicted values: [0 0 0 ... 0 0 0]

Confusion Matrix:
[[1364  169]
 [ 273  298]]

Accuracy: 78.99239543726236%

Report:
              precision    recall  f1-score   support

           0       0.83      0.89      0.86      1533
           1       0.64      0.52      0.57       571

    accuracy                           0.79      2104
   macro avg       0.74      0.71      0.72      2104
weighted avg       0.78      0.79      0.78      2104




In [3]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold

parameter_grid = {'C' : [0.1, 0.5, 1, 10, 100], 'kernel': ['linear']}

cross_validation = StratifiedKFold(n_splits=5)

grid_search = GridSearchCV(SVC(), param_grid=parameter_grid, cv=cross_validation, refit=True, verbose=3)

grid_search.fit(X_train, y_train)

print('Best score: {}'.format(grid_search.best_score_))
print('Best parameters: {}'.format(grid_search.best_params_))
print('Best estimator: {}'.format(grid_search.best_estimator_))

Fitting 5 folds for each of 5 candidates, totalling 25 fits
[CV 1/5] END ..............C=0.1, kernel=linear;, score=0.801 total time=   0.3s
[CV 2/5] END ..............C=0.1, kernel=linear;, score=0.802 total time=   0.3s
[CV 3/5] END ..............C=0.1, kernel=linear;, score=0.786 total time=   0.3s
[CV 4/5] END ..............C=0.1, kernel=linear;, score=0.789 total time=   0.4s
[CV 5/5] END ..............C=0.1, kernel=linear;, score=0.802 total time=   0.3s
[CV 1/5] END ..............C=0.5, kernel=linear;, score=0.798 total time=   0.4s
[CV 2/5] END ..............C=0.5, kernel=linear;, score=0.803 total time=   0.4s
[CV 3/5] END ..............C=0.5, kernel=linear;, score=0.784 total time=   0.4s
[CV 4/5] END ..............C=0.5, kernel=linear;, score=0.788 total time=   0.4s
[CV 5/5] END ..............C=0.5, kernel=linear;, score=0.798 total time=   0.4s
[CV 1/5] END ................C=1, kernel=linear;, score=0.798 total time=   0.5s
[CV 2/5] END ................C=1, kernel=linear;,