In [13]:
from ucimlrepo import fetch_ucirepo
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
from sklearn.preprocessing import StandardScaler



In [14]:
# fetch dataset
spambase = fetch_ucirepo(id=94)

In [15]:
# data (as pandas dataframes)
# loading as dataframe
X = spambase.data.features.to_numpy()
Y = spambase.data.targets.to_numpy()
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [16]:
Y = Y.ravel()

In [17]:
x_train, x_test, y_train, y_test = train_test_split(X, Y,test_size=0.2, random_state=42)

In [18]:
svm_model = SVC(kernel='linear',max_iter=100000)


In [19]:
svm_model.fit(x_train, y_train)



In [20]:
# without 0.01
# linear_models = [SVC(kernel='linear', C=0.001),SVC(kernel='linear', C=0.1),SVC(kernel='linear', C=1),SVC(kernel='linear', C=10),SVC(kernel='linear', C=100)]

In [21]:
linear_models = [SVC(kernel='linear', C=0.001), SVC(kernel='linear', C=0.01), SVC(kernel='linear', C=0.1), SVC(kernel='linear', C=1), SVC(kernel='linear', C=10), SVC(kernel='linear', C=100)]


y_pred = []

In [22]:
def getPerformanceMetrics(y_test, y_pred):
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    return [accuracy, precision, recall, f1]

In [23]:
for svm_model in linear_models:
    svm_model.fit(x_train, y_train)
    y_pred.append(svm_model.predict(x_test))

In [24]:
for pred in y_pred:
    print(getPerformanceMetrics(y_test, pred))

[0.8903365906623235, 0.9418960244648318, 0.7897435897435897, 0.8591352859135286]
[0.9131378935939196, 0.9305555555555556, 0.8589743589743589, 0.8933333333333334]
[0.9218241042345277, 0.9368131868131868, 0.8743589743589744, 0.9045092838196286]
[0.9250814332247557, 0.9349593495934959, 0.8846153846153846, 0.909090909090909]
[0.9229098805646037, 0.9299191374663073, 0.8846153846153846, 0.9067017082785809]
[0.9207383279044516, 0.9295392953929539, 0.8794871794871795, 0.9038208168642952]


## (B) Kernel Tricks

In [25]:
svm_models = [ SVC(kernel='poly',degree=2), SVC(kernel='poly',degree=3), SVC(kernel='sigmoid'), SVC(kernel='rbf') ]
y_pred = []

In [26]:
def getModelMetrics(models):
    y_pred = []
    for model in models:
        model.fit(x_train, y_train)
        y_pred.append(model.predict(x_test))
    metrics = []
    for pred in y_pred:
        metrics.append(getPerformanceMetrics(y_test, pred))
      


## (C) Overfitting & Underfitting Analysis

In [27]:
svm_models = [ SVC(kernel='poly',degree=1,C=0.01), SVC(kernel='poly',degree=1,C=100), SVC(kernel='poly',degree=3,C=0.01), SVC(kernel='poly',degree=3,C=100) ]

In [28]:
check_model = SVC(kernel='poly',degree=1,C=0.01,max_iter=1000)

In [37]:
print("",end='\a')

