In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report
import pickle
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [2]:
# loading the diabetes dataset to a pandas DataFrame
df = pd.read_csv('./diabetes.csv')
X, Y = df.drop(columns='Outcome', axis=1), df['Outcome']

In [3]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=42)

In [5]:
param_grid = {
    'C': [0.1, 1, 10, 100, 1000],
    'gamma': [1, 0.1, 0.01, 0.001, 0.0001]
}

grid = GridSearchCV(SVC(probability=True, cache_size=10000, class_weight={1: 1.1, 0: 1}), param_grid, refit=True, verbose=3)
grid.fit(X_train, Y_train)

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV 1/5] END ....................C=0.1, gamma=1;, score=0.650 total time=   0.5s
[CV 2/5] END ....................C=0.1, gamma=1;, score=0.650 total time=   0.4s
[CV 3/5] END ....................C=0.1, gamma=1;, score=0.650 total time=   0.4s
[CV 4/5] END ....................C=0.1, gamma=1;, score=0.650 total time=   0.3s
[CV 5/5] END ....................C=0.1, gamma=1;, score=0.656 total time=   0.5s
[CV 1/5] END ..................C=0.1, gamma=0.1;, score=0.650 total time=   0.3s
[CV 2/5] END ..................C=0.1, gamma=0.1;, score=0.650 total time=   0.3s
[CV 3/5] END ..................C=0.1, gamma=0.1;, score=0.650 total time=   0.3s
[CV 4/5] END ..................C=0.1, gamma=0.1;, score=0.650 total time=   0.3s
[CV 5/5] END ..................C=0.1, gamma=0.1;, score=0.656 total time=   0.3s
[CV 1/5] END .................C=0.1, gamma=0.01;, score=0.650 total time=   0.2s
[CV 2/5] END .................C=0.1, gamma=0.01

[CV 2/5] END ...................C=1000, gamma=1;, score=0.650 total time=   0.8s
[CV 3/5] END ...................C=1000, gamma=1;, score=0.650 total time=   0.8s
[CV 4/5] END ...................C=1000, gamma=1;, score=0.650 total time=   0.8s
[CV 5/5] END ...................C=1000, gamma=1;, score=0.656 total time=   0.7s
[CV 1/5] END .................C=1000, gamma=0.1;, score=0.650 total time=   0.6s
[CV 2/5] END .................C=1000, gamma=0.1;, score=0.650 total time=   0.7s
[CV 3/5] END .................C=1000, gamma=0.1;, score=0.650 total time=   0.9s
[CV 4/5] END .................C=1000, gamma=0.1;, score=0.650 total time=   0.8s
[CV 5/5] END .................C=1000, gamma=0.1;, score=0.656 total time=   0.7s
[CV 1/5] END ................C=1000, gamma=0.01;, score=0.659 total time=   0.4s
[CV 2/5] END ................C=1000, gamma=0.01;, score=0.659 total time=   0.4s
[CV 3/5] END ................C=1000, gamma=0.01;, score=0.659 total time=   0.3s
[CV 4/5] END ...............

GridSearchCV(estimator=SVC(cache_size=10000, class_weight={0: 1, 1: 1.1},
                           probability=True),
             param_grid={'C': [0.1, 1, 10, 100, 1000],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001]},
             verbose=3)

In [6]:
print(grid.best_params_)
print(grid.best_estimator_)

{'C': 1, 'gamma': 0.0001}
SVC(C=1, cache_size=10000, class_weight={0: 1, 1: 1.1}, gamma=0.0001,
    probability=True)


In [7]:
# accuracy score on the training data
X_train_prediction = grid.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
print('Accuracy score of the training data : ', training_data_accuracy)

Accuracy score of the training data :  0.7833876221498371


In [8]:
# accuracy score on the test data
X_test_prediction = grid.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
print('Accuracy score of the test data : ', test_data_accuracy)

Accuracy score of the test data :  0.7402597402597403


In [9]:
pickle.dump(grid, open('grid_model.sav', 'wb'))