In [2]:
import numpy as np
import pandas as pd

from sklearn import linear_model
from sklearn import preprocessing
from sklearn import model_selection
from sklearn import metrics
from sklearn import svm

In [3]:
train_data = np.array(pd.read_csv('./data/train_data.csv'))
X_train_val = train_data[:, :-1]
y_train_val = train_data[:, -1]

In [4]:
test_data = np.array(pd.read_csv('./data/test_data.csv'))
X_test = test_data[:, :-1]
y_test = test_data[:, -1]

In [5]:
number_of_games_in_16_17 = 1074

In [6]:
x_train = X_train_val[:-number_of_games_in_16_17, :]
x_validation = X_train_val[-number_of_games_in_16_17:, :]

In [7]:
y_train = y_train_val[:-number_of_games_in_16_17]
y_validation = y_train_val[-number_of_games_in_16_17:]

In [8]:
scaler_tv = preprocessing.StandardScaler()
scaler_tv.fit(x_train)
x_train = scaler_tv.transform(x_train)
x_validation = scaler_tv.transform(x_validation)

In [10]:
scaler_tt = preprocessing.StandardScaler()
scaler_tt.fit(X_train_val)
X_train_val = scaler_tt.transform(X_train_val)
X_test = scaler_tt.transform(X_test)

### Trying Linear SVC

In [15]:
Cs = np.array([10**i for i in range(-5, 5)])
best_score = 0
best_C = 0
for C in Cs:
    model = svm.LinearSVC(loss='hinge', C=C, max_iter=5000)
    model.fit(x_train, y_train)        
    y_predicted = model.predict(x_validation)
    score = metrics.accuracy_score(y_validation, y_predicted)
    if score>best_score: 
        best_score = score
        best_C = C



In [16]:
best_C

0.001

In [17]:
best_score

0.6415270018621974

In [18]:
linear_svc = svm.LinearSVC(loss='hinge', C=best_C)

In [19]:
linear_svc.fit(X_train_val, y_train_val)

LinearSVC(C=0.001, class_weight=None, dual=True, fit_intercept=True,
          intercept_scaling=1, loss='hinge', max_iter=1000, multi_class='ovr',
          penalty='l2', random_state=None, tol=0.0001, verbose=0)

In [20]:
y_predicted = linear_svc.predict(X_test)

In [22]:
print(metrics.classification_report(y_test, y_predicted))

              precision    recall  f1-score   support

         0.0       0.59      0.59      0.59       449
         1.0       0.71      0.71      0.71       626

    accuracy                           0.66      1075
   macro avg       0.65      0.65      0.65      1075
weighted avg       0.66      0.66      0.66      1075



In [23]:
metrics.accuracy_score(y_test, y_predicted)

0.6586046511627907

### Trying Kernelized SVC

In [24]:
Cs = np.array([10**i for i in range(-5, 5)])
gammas = np.array([10**i for i in range(-3, 3)])
kernels = ['poly', 'rbf', 'sigmoid']
best_score = 0 
best_params = {'C':0, 'gamma': 0, 'kernel': 'poly'}

for C in Cs: 
    for gamma in gammas:
        for kernel in kernels:
            model = svm.SVC(C = C, kernel=kernel, gamma = gamma)
            model.fit(x_train, y_train)
            y_predicted = model.predict(x_validation)
            score = metrics.accuracy_score(y_validation, y_predicted)
            if score>best_score: 
                best_score = score
                best_params['C'] = C
                best_params['gamma'] = gamma
                best_params['kernel'] = kernel

In [25]:
best_score

0.638733705772812

In [30]:
best_params

{'C': 1.0, 'gamma': 0.001, 'kernel': 'rbf'}

In [26]:
kernel_svc = svm.SVC(C=best_params['C'], kernel=best_params['kernel'], gamma=best_params['gamma'])

In [27]:
kernel_svc.fit(X_train_val, y_train_val)
y_pred = kernel_svc.predict(X_test)

In [29]:
metrics.accuracy_score(y_test, y_pred)

0.6558139534883721