### Support Vector Machine

In [None]:
# This code appears in every demonstration Notebook.
# By default, when you run each cell, only the last output of the codes will show.
# This code makes all outputs of a cell show.
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

1. Import libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import sklearn.model_selection as skm # This is for cross-validation grid search
from sklearn.svm import SVC # SVM module
from sklearn.metrics import accuracy_score, confusion_matrix

2. Import Carseats dataset

In [None]:
Carseats = pd.read_csv('Carseats.csv')

In [None]:
Carseats.info()
Carseats.head()

3. We intend to classify the sales of carseats into high or low categories.<br>
First, we need to transform the numeric 'Sales' into high/low categories.

In [None]:
# The variable we create will be our y.
Carseats['Sales_c'] = pd.cut(Carseats['Sales'], bins = [-1, Carseats['Sales'].mean(), float('inf')], labels = ['low', 'high'])

In [None]:
Carseats['Sales_c'].value_counts()

4. Prepare X and y

In [None]:
# Drop the dependent variable from Carseats to get X
X = Carseats.drop(['Sales', 'Sales_c', 'CompPrice'], axis = 1)
y = Carseats['Sales_c']

In [None]:
X.head()

In [None]:
X = pd.get_dummies(X, columns = ['ShelveLoc', 'Urban', 'US'], drop_first=True)

5. Split training and test datasets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=30)

In [None]:
y_train.value_counts()

6. Fit a support vector classifier.

#### Attention: the C argument mentioned here is different from the Tuning C parameter mentioned in the textbook and slides. They lead to opposite directions.

In [None]:
# Initiating the classifier
# The C argument allows us to specify the cost of a violation
# to the margin. When the cost argument is small, then the margins will be
# wide and many support vectors will be on the margin or will violate the
# margin. When the C argument is large, then the margins will be narrow and
# there will be few support vectors on the margin or violating the margin.
svm_linear = SVC(C = 10, kernel = 'linear')
svm_linear.fit(X, y)

In [None]:
# Make predictions and check performance
svm_linear_pred = svm_linear.predict(X_test)
accuracy_score(y_test, svm_linear_pred)
confusion_matrix(y_test, svm_linear_pred)

7. Cross validation to select the best C parameter using grid search.<br>
Note: It is computationally expensive.

In [None]:
# First define the grid as a dictionary. Here we only have one parameter C.
param_grid = {'C':[0.001,0.01,0.1,1,5,10,100]}
# Grid search: cv gives the K for K fold cross-validation, scoring gives what metric to evaluate
svm_grid = skm.GridSearchCV(svm_linear ,
{'C':[0.001,0.01,0.1,1,5,10,100]}, refit=True, cv=5, scoring = 'accuracy')
grid.fit(X, y)
grid.best_params_

In [None]:
# Fit the grid
svm_grid.fit(X, y)

# Access grid search results by calling the attributes
# Best C identified
svm_grid.best_params_
# Best resulting accuracy
svm_grid.best_score_
# All CV results. We pay most attention to mean score results
svm_grid.cv_results_

In [None]:
# Then we can use the best selected model to make predictions and examine performance
best_svm = grid.best_estimator_
best_svm_pred = best_svm.predict(X_test)
accuracy_score(y_test, best_svm_pred)
confusion_matrix(y_test, best_svm_pred)

8. Support Vector Machine: using a non-linear kernel.

In [None]:
# Kernel: radial
# Small gamma: Results in a smoother decision boundary, as the influence of a single training
# example extends farther. This can lead to underfitting, especially if the data is complex.
# Large gamma: Results in a more complex and tightly fit decision boundary, as the influence 
# of a single training example is more localized. This can lead to overfitting, 
# especially if the data is noisy.
svm_rbf = SVC(kernel="rbf", gamma = 1, C = 1)

In [None]:
svm_rbf.fit(X_train, y_train)

In [None]:
svm_rbf_pred = svm_rbf.predict(X_test)
accuracy_score(y_test, svm_rbf_pred)
confusion_matrix(y_test, svm_rbf_pred)

9. Change to polynomial kernel

In [None]:
# We initialize the SVC with the polynomial kernel by setting kernel='poly'.
# The degree parameter specifies the degree of the polynomial kernel (default is 3).
# The C parameter controls the regularization strength (default is 1.0).
# The gamma parameter controls the kernel coefficient (default is 'scale').
svc_poly = SVC(kernel='poly', degree=3, C=1.0, gamma='scale')

In [None]:
svc_poly.fit(X_train, y_train)
svc_poly_pred = svc_poly.predict(X_test)
accuracy_score(y_test, svc_poly_pred)
confusion_matrix(y_test, svc_poly_pred)