In [6]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [7]:
file_path = 'connect4_dataset.csv'
dataset = pd.read_csv(file_path)

dataset.head()

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,...,0.32,0.33,0.34,-1,-1.1,0.35,0.36,1.1,1.2,0.37
0,0,0,0,0,0,0,0,1,0,0,...,1,0,1,1,-1,-1,0,-1,1,0
1,0,0,0,0,0,0,0,0,0,0,...,-1,1,1,1,1,1,1,-1,-1,0
2,0,0,0,0,0,0,0,0,0,0,...,0,-1,-1,-1,1,1,0,0,1,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,1,1,0,-1,0,0
4,0,0,0,0,0,0,0,0,0,0,...,-1,-1,-1,1,1,-1,-1,-1,1,0


In [8]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

valid_indices = (y >= 0) & (y <= 6)
X = X[valid_indices]
y = y[valid_indices]

X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.25, random_state=42)  # 0.25 * 0.8 = 0.2

param_grid = {
    'C': [0.01, 0.1, 1, 10], # margin to error
    'kernel': ['linear','rbf','poly'], # kernel type
}


grid_search = GridSearchCV(SVC(), param_grid, refit=True, verbose=2, cv=5)
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

y_train_pred = best_model.predict(X_train)
y_val_pred = best_model.predict(X_val)
y_test_pred = best_model.predict(X_test)


train_accuracy = accuracy_score(y_train, y_train_pred)
val_accuracy = accuracy_score(y_val, y_val_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

print(f'Best Parameters: {best_params}')
print(f'Training Accuracy: {train_accuracy * 100:.2f}%')
print(f'Validation Accuracy: {val_accuracy * 100:.2f}%')
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV] END ..............................C=0.01, kernel=linear; total time=  31.6s
[CV] END ..............................C=0.01, kernel=linear; total time=  31.4s
[CV] END ..............................C=0.01, kernel=linear; total time=  31.7s
[CV] END ..............................C=0.01, kernel=linear; total time=  32.4s
[CV] END ..............................C=0.01, kernel=linear; total time=  32.5s
[CV] END .................................C=0.01, kernel=rbf; total time= 1.4min
[CV] END .................................C=0.01, kernel=rbf; total time= 1.4min
[CV] END .................................C=0.01, kernel=rbf; total time= 1.4min
[CV] END .................................C=0.01, kernel=rbf; total time= 1.4min
[CV] END .................................C=0.01, kernel=rbf; total time= 1.4min
[CV] END ................................C=0.01, kernel=poly; total time= 2.0min
[CV] END ................................C=0.01,

In [9]:
import joblib

joblib.dump(best_model, 'svm_model.pkl')

['svm_model.pkl']