In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

import os
import sys
sys.path.append(os.path.join(
  os.path.abspath(''), '..', '..')
)

from src.preprocess import Preprocess

import warnings
warnings.filterwarnings('ignore')

In [None]:
x_train: pd.DataFrame = pd.read_csv(
  '../../data/train.csv', index_col=0)
x_test: pd.DataFrame = pd.read_csv(
  '../../data/test.csv', index_col=0)
y_train: pd.Series = x_train['Survived']

x_train, x_test = Preprocess(scaler=StandardScaler()) \
  .apply(x_train, x_test)
params = {
  'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
}

base_svc = SVC(random_state=3)
svc = GridSearchCV(
  base_svc,
  params,
  scoring='balanced_accuracy',
  n_jobs=-1,
  cv=5,
  return_train_score=True,
)
svc.fit(x_train, y_train)

results = pd.DataFrame(svc.cv_results_)
results = results[[
  'param_kernel','mean_test_score',
  'rank_test_score', 'mean_train_score'
]]
results.to_csv('svc_results.csv', index=False)

In [None]:
plt.plot(results['param_kernel'],
     results['mean_test_score'], label='test')
plt.plot(results['param_kernel'],
     results['mean_train_score'], label='train')

plt.xlabel('Kernel')
plt.ylabel('Balanced Accuracy')
plt.title('Kernel Analysis')
plt.legend()
plt.savefig('kernel.svg', format='svg')

In [None]:
predictions = pd.DataFrame(svc.predict(x_test))
predictions.to_csv('svc_predictions.csv', index=False)