In [None]:
# Import necessary packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils import shuffle
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

np.set_printoptions(suppress = True)

import warnings
warnings.simplefilter(action='ignore')


# Importing data sets
from data_import import load_fruits, load_chess, load_music, load_lepiota

fruit_X, fruit_Y = load_fruits()
chess_X, chess_Y = load_chess()
music_X, music_Y = load_music()
lep_X, lep_Y = load_lepiota()

### SVM Training Loop

In [None]:
def svm_train(X, Y, size):

    for i in np.arange(3):
    
        print('Splitting data')
        X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = size, shuffle = True)
        accs = np.array([])


        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)

        print('Initializing SVM')
        svm_classifier = SVC(kernel = 'linear', C = 1)

        print('Training SVM')
        # Train the SVM on the training set
        svm_classifier.fit(X_train, y_train)

        print('Making predictions with trained SVM')
        # Make predictions on the test set
        y_pred = svm_classifier.predict(X_train)

        print('Calculating accuracy of SVM')
        # Evaluate the performance on the test set
        accs = np.append(accs, accuracy_score(y_train, y_pred))
    print(f"Training Accuracy: {accs.mean():.3f}")

### SVM CV Loop

In [None]:
def svm_cv(X, y, size):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = size, shuffle = True)
    opt_c = 0
    best_acc = 0
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    
    for c in [0.001, 0.01, 0.1, 1, 10]:
        #print('Testing C: {}'.format(c))
        accs = np.array([])
        for i in np.arange(3):
            #print('Trial {}'.format(i + 1))
            svm_classifier = SVC(kernel = 'linear', C = c)
            cv = KFold(n_splits = 5, shuffle = True)
            cv_scores = cross_val_score(svm_classifier, X_train, y_train, cv = cv)
            accs = np.append(accs, sum(cv_scores) / 5)
        #print(accs, accs.mean())
        if accs.mean() > best_acc:
            opt_c = c
            best_acc = accs.mean()
    print('Validation Accuracy: {}'.format(best_acc))
    return opt_c

### SVM Test Loop

In [None]:
def svm_test(X, y, size, C):

    for i in np.arange(3):
        
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = size, shuffle = True)
        accs = np.array([])
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.fit_transform(X_test)
        
        svm_classifier = SVC(kernel = 'linear', C = C)

        # Train the SVM on the training set
        svm_classifier.fit(X_train, y_train)

        # Make predictions on the test set
        y_pred = svm_classifier.predict(X_test)

        # Evaluate the performance on the test set
        accs = np.append(accs, accuracy_score(y_test, y_pred))
    print(f"Testing Accuracy: {accs.mean():.3f}")

### Pipeline

In [None]:
def run(X, y, size):
    svm_train(X, y, size)
    opt_C = svm_cv(X, y, size)
    svm_test(X, y, size, opt_C)
    print('Best C: {}'.format(opt_C))

<hr style="color:Maroon;background-color:Maroon;border:0 none; height: 3px;">

### Calling Pipeline on each Dataset

In [None]:
run(fruit_X, fruit_Y, 0.8)
run(fruit_X, fruit_Y, 0.5)
run(fruit_X, fruit_Y, 0.2)
print('-'*30)

run(music_X, music_Y, 0.8)
run(music_X, music_Y, 0.5)
run(music_X, music_Y, 0.2)
print('-'*30)

run(lep_X, lep_Y, 0.8)
run(lep_X, lep_Y, 0.5)
run(lep_X, lep_Y, 0.2)
print('-'*30)

run(chess_X, chess_Y, 0.8)
run(chess_X, chess_Y, 0.5)
run(chess_X, chess_Y, 0.2)
print('-'*30)