In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

import seaborn as sns
import matplotlib.pyplot as plt

from time import time
from Loader import train_data_vgg16

In [2]:
df = train_data_vgg16()
df.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,503,504,505,506,507,508,509,510,511,class
0,27.489744,0.0,0.0,0.0,0.0,4.406033,0.0,0.0,0.0,9.508491,...,0.0,0.0,0.0,0.0,3.239264,5.528229,0.0,0.0,0.0,Real
1,0.0,0.0,0.0,27.298634,0.0,21.718521,17.45442,0.0,0.0,14.870244,...,0.0,0.0,0.0,0.0,0.0,22.73053,0.0,0.0,0.0,Real
2,0.0,0.0,0.0,0.0,0.0,7.493243,0.0,1.344108,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Real


In [3]:
df_sample = df.sample(frac=1.0, random_state=0)
df_sample.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,503,504,505,506,507,508,509,510,511,class
3582,0.0,0.0,0.0,0.0,4.260871,18.161793,0.0,0.0,0.0,20.67531,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Real
10498,32.219925,0.0,0.0,24.435274,0.0,0.0,0.0,2.313794,0.0,10.794803,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fake
3227,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,65.122879,Fake


In [4]:
X = df_sample.drop('class', axis=1)
y = df_sample['class']

X.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,502,503,504,505,506,507,508,509,510,511
3582,0.0,0.0,0.0,0.0,4.260871,18.161793,0.0,0.0,0.0,20.67531,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10498,32.219925,0.0,0.0,24.435274,0.0,0.0,0.0,2.313794,0.0,10.794803,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,65.122879


In [5]:
stnd = StandardScaler()
stnd.fit(X)
X = pd.DataFrame(stnd.transform(X), columns = X.columns)

In [None]:
grid = {
    'C': [.1, 1, 10, 25, 50],
    'gamma': [.001, .005, .01, .05, .1]
}

time_start = time()

svm = SVC()
svmCV = GridSearchCV(svm, param_grid=grid,
                     return_train_score=True, n_jobs=-1)
svmCV.fit(X, y)

time_stop = time()
print('Elapsed Time for SVM:', (time_stop - time_start) / 60)

In [None]:
svmCV.best_score_

In [None]:
svmCV.best_params_

In [None]:
# Extract the hyperparameters, scores, and create a results dataframe
results = pd.DataFrame(svmCV.cv_results_)
gamma = results['param_gamma'].unique()
C = results['param_C'].unique()
train = np.array(results['mean_train_score']).reshape(len(gamma), len(C))
valid = np.array(results['mean_test_score']).reshape(len(gamma), len(C))

# Create a heatmap of the results
sns.set(style='whitegrid', font_scale=.8)
fig, ax = plt.subplots(figsize=(10, 7))
ax.set(xlabel='C', ylabel='Gamma', title='Train Scores')
sns.heatmap(train, annot=True, fmt='.2f', cmap='YlOrRd', xticklabels=gamma, yticklabels=gamma)
plt.show()

sns.set(style='whitegrid', font_scale=.8)
fig, ax = plt.subplots(figsize=(10, 7))
ax.set(xlabel='C', ylabel='Gamma', title='Validation Scores')
sns.heatmap(valid, annot=True, fmt='.3f', cmap='YlOrRd', xticklabels=C, yticklabels=gamma)
plt.show()

In [None]:
# Get the hyperparameter values and corresponding scores
C = grid['C']
gamma = grid['gamma']

train = svmCV.cv_results_['mean_train_score'].reshape(len(gamma), len(C))
valid = svmCV.cv_results_['mean_test_score'].reshape(len(gamma), len(C))

# Plot the scores against the hyperparameters
fig, ax = plt.subplots()
ax.set(xlabel='C', ylabel='Mean Train Score', title='Train Score')
for i, g in enumerate(gamma):
    ax.plot(C, train[i], label=g)
ax.legend(title='Gamma')
plt.show()

fig, ax = plt.subplots()
ax.set(xlabel='C', ylabel='Mean Validation Score', title='Validation Score')
for i, g in enumerate(gamma):
    ax.plot(C, valid[i], label=g)
ax.legend(title='Gamma')
plt.show()