## SVM

In [34]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

In [36]:
tuned_parameters = [
    {"kernel": ["rbf"], "gamma": [1e-3, 1e-4], "C": [1, 10, 100, 1000], "max_iter": [1000]},
    {"kernel": ["linear"], "C": [1, 10, 100, 1000], "max_iter": [1000]},
]

scores = ["precision", "recall"]


for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(SVC(), tuned_parameters, scoring="%s_macro" % score)
    for i in range(0,5):
        clf.fit(X_train_train[i], y_train_train[i])

        print("Best parameters set found on development set:")
        print()
        print(clf.best_params_)
        print()
        print("Grid scores on development set:")
        print()
        means = clf.cv_results_["mean_test_score"]
        stds = clf.cv_results_["std_test_score"]
        for mean, std, params in zip(means, stds, clf.cv_results_["params"]):
            print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
        print()

        print("Detailed classification report:")
        print()
        print("The model is trained on the full development set.")
        print("The scores are computed on the full evaluation set.")
        print()
        y_true, y_pred = y_validate[i], clf.predict(X_validate[i])
        print(classification_report(y_true, y_pred))
        print()

# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 10, 'gamma': 0.001, 'kernel': 'rbf', 'max_iter': 1000}

Grid scores on development set:

0.502 (+/-0.025) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf', 'max_iter': 1000}
0.508 (+/-0.050) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf', 'max_iter': 1000}
0.546 (+/-0.149) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf', 'max_iter': 1000}
0.516 (+/-0.066) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf', 'max_iter': 1000}
0.546 (+/-0.149) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf', 'max_iter': 1000}
0.514 (+/-0.066) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf', 'max_iter': 1000}
0.546 (+/-0.149) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf', 'max_iter': 1000}
0.514 (+/-0.066) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf', 'max_iter': 1000}
0.515 (+/-0.014) for {'C': 1, 'kernel': 'linear', 'max_iter': 1000}
0.515 (+/-0.014) for {'C': 10, 'kernel': 'linear', 'max_iter': 1000}
0.515 (+/-0.014)

Best parameters set found on development set:

{'C': 1, 'gamma': 0.0001, 'kernel': 'rbf', 'max_iter': 1000}

Grid scores on development set:

0.502 (+/-0.008) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf', 'max_iter': 1000}
0.507 (+/-0.016) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf', 'max_iter': 1000}
0.501 (+/-0.002) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf', 'max_iter': 1000}
0.501 (+/-0.005) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf', 'max_iter': 1000}
0.501 (+/-0.002) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf', 'max_iter': 1000}
0.501 (+/-0.005) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf', 'max_iter': 1000}
0.501 (+/-0.002) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf', 'max_iter': 1000}
0.501 (+/-0.005) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf', 'max_iter': 1000}
0.488 (+/-0.022) for {'C': 1, 'kernel': 'linear', 'max_iter': 1000}
0.488 (+/-0.022) for {'C': 10, 'kernel': 'linear', 'max_iter': 1000}
0.488 (+/-0.022) for {'C': 100, 'kernel': 'linear', 'max_

In [37]:
svclassifier1 = SVC(kernel='rbf', C=10, gamma=0.001, max_iter=1000)
svclassifier2 = SVC(kernel='linear', C=1, max_iter=1000)
svclassifier3 = SVC(kernel='rbf', C=1, gamma=0.0001, max_iter=1000)

In [38]:
total_score = 0
for i in range(0,5):
    svclassifier1.fit(X_train_train[i], y_train_train[i])
    y_pred = svclassifier1.predict(X_validate[i])
    print(classification_report(y_validate[i], y_pred))
    total_score += accuracy_score(y_validate[i], y_pred)

print('Average accuracy score:', total_score/5)

              precision    recall  f1-score   support

           0       0.86      1.00      0.93      4896
           1       0.67      0.00      0.01       792

    accuracy                           0.86      5688
   macro avg       0.76      0.50      0.47      5688
weighted avg       0.83      0.86      0.80      5688

              precision    recall  f1-score   support

           0       0.86      1.00      0.92      4896
           1       0.33      0.00      0.01       792

    accuracy                           0.86      5688
   macro avg       0.60      0.50      0.46      5688
weighted avg       0.79      0.86      0.80      5688

              precision    recall  f1-score   support

           0       0.86      1.00      0.92      4896
           1       0.09      0.00      0.00       792

    accuracy                           0.86      5688
   macro avg       0.48      0.50      0.46      5688
weighted avg       0.75      0.86      0.80      5688

              preci

In [40]:
total_score = 0

for i in range(0,5):
    svclassifier2.fit(X_train_train[i], y_train_train[i])
    y_pred = svclassifier2.predict(X_validate[i])
    print(classification_report(y_validate[i], y_pred))
    total_score += accuracy_score(y_validate[i], y_pred)
    
print('Average accuracy score:', total_score/5)

              precision    recall  f1-score   support

           0       0.87      0.80      0.83      4896
           1       0.18      0.27      0.21       792

    accuracy                           0.73      5688
   macro avg       0.52      0.53      0.52      5688
weighted avg       0.77      0.73      0.75      5688

              precision    recall  f1-score   support

           0       0.86      0.88      0.87      4896
           1       0.13      0.11      0.12       792

    accuracy                           0.77      5688
   macro avg       0.49      0.49      0.49      5688
weighted avg       0.76      0.77      0.76      5688

              precision    recall  f1-score   support

           0       0.82      0.12      0.21      4896
           1       0.13      0.84      0.23       792

    accuracy                           0.22      5688
   macro avg       0.48      0.48      0.22      5688
weighted avg       0.72      0.22      0.21      5688

              preci

In [39]:
total_score = 0

for i in range(0,5):
    svclassifier3.fit(X_train_train[i], y_train_train[i])
    y_pred = svclassifier3.predict(X_validate[i])
    print(classification_report(y_validate[i], y_pred))
    total_score += accuracy_score(y_validate[i], y_pred)
    
print('Average accuracy score:', total_score/5)

              precision    recall  f1-score   support

           0       0.86      1.00      0.92      4896
           1       0.13      0.00      0.01       792

    accuracy                           0.86      5688
   macro avg       0.50      0.50      0.47      5688
weighted avg       0.76      0.86      0.80      5688

              precision    recall  f1-score   support

           0       0.86      0.98      0.92      4896
           1       0.16      0.02      0.04       792

    accuracy                           0.85      5688
   macro avg       0.51      0.50      0.48      5688
weighted avg       0.76      0.85      0.79      5688

              precision    recall  f1-score   support

           0       0.87      0.38      0.53      4896
           1       0.14      0.63      0.23       792

    accuracy                           0.42      5688
   macro avg       0.50      0.51      0.38      5688
weighted avg       0.77      0.42      0.49      5688

              preci

The 2nd classifier seems to have the most consistent results

In [42]:
y_pred = svclassifier2.predict(X_test)
print(classification_report(y_test, y_pred))
print('Accuracy score:', accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.87      0.82      0.84      6120
           1       0.18      0.25      0.21       990

    accuracy                           0.74      7110
   macro avg       0.53      0.54      0.53      7110
weighted avg       0.78      0.74      0.76      7110

Accuracy score: 0.7390998593530239
