In [1]:
import pandas as pd
import numpy as np
from numpy.random import RandomState
import matplotlib.pyplot as plt
from tqdm import tqdm

seed = 42
rng = RandomState(seed)

df = pd.read_csv('Model.csv')

X, y = df.iloc[:, 1:-1], df.iloc[:, -1]
print(X.shape, y.shape)

(12529, 59) (12529,)


In [2]:
from sklearn.model_selection import train_test_split, cross_val_score

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2, 
                                                    random_state=seed)
print('Train set: ', X_train.shape, y_train.shape)
print('Test set: ', X_test.shape, y_test.shape)

Train set:  (10023, 59) (10023,)
Test set:  (2506, 59) (2506,)


In [3]:
def cross_validation_score(estimator):
    scores = cross_val_score(
        estimator, X, y, cv=10, n_jobs=-1,
        scoring='roc_auc'
    )
    
    return scores.mean(), scores.std()


def plot_cross_validation(title, X, Y, error, xlabel, ylabel='ROC AUC'):
    plt.title(title)
    plt.grid()
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    
    plt.errorbar(X, Y, error, linestyle='None', marker='o')

In [None]:
from sklearn.svm import SVC

title = 'Change kernel, C=1.0'
xlabel = 'Kernel'
kernels = ['linear', 'poly', 'rbf', 'sigmoid']
means = []
stddevs = []

for kernel in tqdm(kernels):
    clf = SVC(kernel=kernel, C=1.0)
    mean, std = cross_validation_score(clf)
    means.append(mean)
    stddevs.append(std)

plot_cross_validation(title, kernels, means, stddevs, xlabel)
plt.savefig('SVM_change_kernel.png', bbox_inches='tight')
plt.show()

  0%|                                                                                            | 0/4 [00:00<?, ?it/s]

In [None]:
from sklearn.metrics import roc_auc_score, accuracy_score
clf = SVC(kernel='rbf', C=15.0)

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

print('Accuracy on test set: ', accuracy_score(y_test, y_pred))
print('ROC AUC on test set: ', roc_auc_score(y_test, y_pred))

In [None]:
from sklearn.metrics import plot_confusion_matrix

plot_confusion_matrix(clf, X_test, y_test)
plt.savefig('SVM_confmat.png')

In [None]:
print(clf.n_support_)