In [1]:
import numpy
import pandas

from sklearn.svm import SVC

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import confusion_matrix

In [2]:
dataFrame = pandas.read_csv("/content/drive/MyDrive/MLAA/Assignment 2/Datasets/training.csv")

In [3]:
genderDictionary = {"Male" : 1,
                    "Female" : 2,
                    numpy.nan : 3}

ageBandDictionary = {"1. <25" : 1,
                     "2. 25 to 34" : 2,
                     "3. 35 to 44" : 3,
                     "4. 45 to 54" : 4,
                     "5. 55 to 64" : 5,
                     "6. 65 to 74" : 6,
                     "7. 75+": 7,
                     numpy.nan : 0}

carSegmentDictionary = {"Small/Medium" : 1,
                        "Large/SUV" : 2,
                        "LCV" : 3,
                        "Other" : 4}

carModelDictionary = {"model_1" : 1,
                      "model_2" : 2,
                      "model_3" : 3,
                      "model_4" : 4,
                      "model_5" : 5,
                      "model_6" : 6,
                      "model_7" : 7,
                      "model_8" : 8,
                      "model_9" : 9,
                      "model_10" : 10,
                      "model_11" : 11,
                      "model_12" : 12,
                      "model_13" : 13,
                      "model_14" : 14,
                      "model_15" : 15,
                      "model_16" : 16,
                      "model_17" : 17,
                      "model_18" : 18,
                      "model_19" : 19}

In [4]:
dataFrame["gender"].replace(genderDictionary, inplace=True)
dataFrame["age_band"].replace(ageBandDictionary, inplace=True)
dataFrame["car_model"].replace(carModelDictionary, inplace=True)
dataFrame["car_segment"].replace(carSegmentDictionary, inplace=True)

In [5]:
dataFrame.dropna(how='any', inplace=True)
dataFrame.isna().sum()

ID                           0
Target                       0
age_band                     0
gender                       0
car_model                    0
car_segment                  0
age_of_vehicle_years         0
sched_serv_warr              0
non_sched_serv_warr          0
sched_serv_paid              0
non_sched_serv_paid          0
total_paid_services          0
total_services               0
mth_since_last_serv          0
annualised_mileage           0
num_dealers_visited          0
num_serv_dealer_purchased    0
dtype: int64

In [6]:
y = dataFrame.pop("Target")
dataFrame = dataFrame[["age_of_vehicle_years", "sched_serv_warr", "non_sched_serv_warr", "sched_serv_paid", "total_paid_services", "total_services", "mth_since_last_serv", "annualised_mileage"]]

X_train, X_test, y_train, y_test = train_test_split(dataFrame, y, test_size=0.3)

In [7]:
y_mode = y.mode()
y_base = numpy.full(y_train.shape, y_mode)

print("Accuracy Score : ", accuracy_score(y_train, y_base))
print("F1 Score : ", f1_score(y_train, y_base, average='weighted'))

Accuracy Score :  0.9733181051830098
F1 Score :  0.9601575451913116


# Kernel Tuning.

In [8]:
baseSVC = SVC(kernel="rbf")
baseSVC.fit(X_train, y_train)

y_train_preds = baseSVC.predict(X_train)
y_test_preds = baseSVC.predict(X_test)

print("\n Training Data :")
print("Accuracy Score : ", accuracy_score(y_train, y_train_preds))
print("F1 Score : ", f1_score(y_train, y_train_preds, average='weighted'))

print("\n Testing Data :")
print("Accuracy Score : ", accuracy_score(y_test, y_test_preds))
print("F1 Score : ", f1_score(y_test, y_test_preds, average='weighted'))

print("\nConfusion Matrix for Training :\n", confusion_matrix(y_train, y_train_preds, labels=baseSVC.classes_))
print("\nConfusion Matrix for Testing :\n", confusion_matrix(y_test, y_test_preds, labels=baseSVC.classes_))


 Training Data :
Accuracy Score :  0.9845216729210855
F1 Score :  0.9821348666496854

 Testing Data :
Accuracy Score :  0.984417034668291
F1 Score :  0.9820122941081721

Confusion Matrix for Training :
 [[89358   124]
 [ 1299  1154]]

Confusion Matrix for Testing :
 [[38283    51]
 [  563   505]]


In [None]:
baseSVC = SVC(kernel="poly")
baseSVC.fit(X_train, y_train)

y_train_preds = baseSVC.predict(X_train)
y_test_preds = baseSVC.predict(X_test)

print("\n Training Data :")
print("Accuracy Score : ", accuracy_score(y_train, y_train_preds))
print("F1 Score : ", f1_score(y_train, y_train_preds, average='weighted'))

print("\n Testing Data :")
print("Accuracy Score : ", accuracy_score(y_test, y_test_preds))
print("F1 Score : ", f1_score(y_test, y_test_preds, average='weighted'))

print("\nConfusion Matrix for Training :\n", confusion_matrix(y_train, y_train_preds, labels=baseSVC.classes_))
print("\nConfusion Matrix for Testing :\n", confusion_matrix(y_test, y_test_preds, labels=baseSVC.classes_))

In [None]:
baseSVC = SVC(kernel="linear")
baseSVC.fit(X_train, y_train)

y_train_preds = baseSVC.predict(X_train)
y_test_preds = baseSVC.predict(X_test)

print("\n Training Data :")
print("Accuracy Score : ", accuracy_score(y_train, y_train_preds))
print("F1 Score : ", f1_score(y_train, y_train_preds, average='weighted'))

print("\n Testing Data :")
print("Accuracy Score : ", accuracy_score(y_test, y_test_preds))
print("F1 Score : ", f1_score(y_test, y_test_preds, average='weighted'))

print("\nConfusion Matrix for Training :\n", confusion_matrix(y_train, y_train_preds, labels=baseSVC.classes_))
print("\nConfusion Matrix for Testing :\n", confusion_matrix(y_test, y_test_preds, labels=baseSVC.classes_))

# Class Weight Tuning.

In [None]:
balancedSVC = SVC(kernel="rbf", class_weight="balanced")
balancedSVC.fit(X_train, y_train)

y_train_preds = balancedSVC.predict(X_train)
y_test_preds = balancedSVC.predict(X_test)

print("\n Training Data :")
print("Accuracy Score : ", accuracy_score(y_train, y_train_preds))
print("F1 Score : ", f1_score(y_train, y_train_preds, average='weighted'))

print("\n Testing Data :")
print("Accuracy Score : ", accuracy_score(y_test, y_test_preds))
print("F1 Score : ", f1_score(y_test, y_test_preds, average='weighted'))

print("\nConfusion Matrix for Training :\n", confusion_matrix(y_train, y_train_preds, labels=balancedSVC.classes_))
print("\nConfusion Matrix for Testing :\n", confusion_matrix(y_test, y_test_preds, labels=balancedSVC.classes_))


 Training Data :
Accuracy Score :  0.9233741636448429
F1 Score :  0.9439853820940148

 Testing Data :
Accuracy Score :  0.9224151058321912
F1 Score :  0.9440444378448559

Confusion Matrix for Training :
 [[94267  7941]
 [  110  2751]]

Confusion Matrix for Testing :
 [[23610  1998]
 [   40   620]]


# C Tuning.

In [None]:
CSVC = SVC(class_weight="balanced", C=0.5)
CSVC.fit(X_train, y_train)

y_train_preds = CSVC.predict(X_train)
y_test_preds = CSVC.predict(X_test)

print("\n Training Data :")
print("Accuracy Score : ", accuracy_score(y_train, y_train_preds))
print("F1 Score : ", f1_score(y_train, y_train_preds, average='weighted'))

print("\n Testing Data :")
print("Accuracy Score : ", accuracy_score(y_test, y_test_preds))
print("F1 Score : ", f1_score(y_test, y_test_preds, average='weighted'))

print("\nConfusion Matrix for Training :\n", confusion_matrix(y_train, y_train_preds, labels=CSVC.classes_))
print("\nConfusion Matrix for Testing :\n", confusion_matrix(y_test, y_test_preds, labels=CSVC.classes_))

In [None]:
CSVC = SVC(class_weight="balanced", C=0.1)
CSVC.fit(X_train, y_train)

y_train_preds = CSVC.predict(X_train)
y_test_preds = CSVC.predict(X_test)

print("\n Training Data :")
print("Accuracy Score : ", accuracy_score(y_train, y_train_preds))
print("F1 Score : ", f1_score(y_train, y_train_preds, average='weighted'))

print("\n Testing Data :")
print("Accuracy Score : ", accuracy_score(y_test, y_test_preds))
print("F1 Score : ", f1_score(y_test, y_test_preds, average='weighted'))

print("\nConfusion Matrix for Training :\n", confusion_matrix(y_train, y_train_preds, labels=CSVC.classes_))
print("\nConfusion Matrix for Testing :\n", confusion_matrix(y_test, y_test_preds, labels=CSVC.classes_))

In [None]:
CSVC = SVC(class_weight="balanced", C=2)
CSVC.fit(X_train, y_train)

y_train_preds = CSVC.predict(X_train)
y_test_preds = CSVC.predict(X_test)

print("\n Training Data :")
print("Accuracy Score : ", accuracy_score(y_train, y_train_preds))
print("F1 Score : ", f1_score(y_train, y_train_preds, average='weighted'))

print("\n Testing Data :")
print("Accuracy Score : ", accuracy_score(y_test, y_test_preds))
print("F1 Score : ", f1_score(y_test, y_test_preds, average='weighted'))

print("\nConfusion Matrix for Training :\n", confusion_matrix(y_train, y_train_preds, labels=CSVC.classes_))
print("\nConfusion Matrix for Testing :\n", confusion_matrix(y_test, y_test_preds, labels=CSVC.classes_))


 Training Data :
Accuracy Score :  0.925439473108148
F1 Score :  0.9452918058997826

 Testing Data :
Accuracy Score :  0.9243185625095173
F1 Score :  0.945229101142963

Confusion Matrix for Training :
 [[94479  7729]
 [  105  2756]]

Confusion Matrix for Testing :
 [[23659  1949]
 [   39   621]]


# Gamma Tuning.

In [None]:
CGSVC = SVC(class_weight="balanced", C=2, gamma=0.1)
CGSVC.fit(X_train, y_train)

y_train_preds = CGSVC.predict(X_train)
y_test_preds = CGSVC.predict(X_test)

print("\n Training Data :")
print("Accuracy Score : ", accuracy_score(y_train, y_train_preds))
print("F1 Score : ", f1_score(y_train, y_train_preds, average='weighted'))

print("\n Testing Data :")
print("Accuracy Score : ", accuracy_score(y_test, y_test_preds))
print("F1 Score : ", f1_score(y_test, y_test_preds, average='weighted'))

print("\nConfusion Matrix for Training :\n", confusion_matrix(y_train, y_train_preds, labels=CGSVC.classes_))
print("\nConfusion Matrix for Testing :\n", confusion_matrix(y_test, y_test_preds, labels=CGSVC.classes_))


 Training Data :
Accuracy Score :  0.9623104816834651
F1 Score :  0.9695757161471467

 Testing Data :
Accuracy Score :  0.9568296025582458
F1 Score :  0.9653626553065908

Confusion Matrix for Training :
 [[98281  3927]
 [   33  2828]]

Confusion Matrix for Testing :
 [[24575  1033]
 [  101   559]]


In [None]:
CGSVC = SVC(class_weight="balanced", C=2, gamma=0.5)
CGSVC.fit(X_train, y_train)

y_train_preds = CGSVC.predict(X_train)
y_test_preds = CGSVC.predict(X_test)

print("\n Training Data :")
print("Accuracy Score : ", accuracy_score(y_train, y_train_preds))
print("F1 Score : ", f1_score(y_train, y_train_preds, average='weighted'))

print("\n Testing Data :")
print("Accuracy Score : ", accuracy_score(y_test, y_test_preds))
print("F1 Score : ", f1_score(y_test, y_test_preds, average='weighted'))

print("\nConfusion Matrix for Training :\n", confusion_matrix(y_train, y_train_preds, labels=CGSVC.classes_))
print("\nConfusion Matrix for Testing :\n", confusion_matrix(y_test, y_test_preds, labels=CGSVC.classes_))

In [None]:
CGSVC = SVC(class_weight="balanced", C=2, gamma=2)
CGSVC.fit(X_train, y_train)

y_train_preds = CGSVC.predict(X_train)
y_test_preds = CGSVC.predict(X_test)

print("\n Training Data :")
print("Accuracy Score : ", accuracy_score(y_train, y_train_preds))
print("F1 Score : ", f1_score(y_train, y_train_preds, average='weighted'))

print("\n Testing Data :")
print("Accuracy Score : ", accuracy_score(y_test, y_test_preds))
print("F1 Score : ", f1_score(y_test, y_test_preds, average='weighted'))

print("\nConfusion Matrix for Training :\n", confusion_matrix(y_train, y_train_preds, labels=CGSVC.classes_))
print("\nConfusion Matrix for Testing :\n", confusion_matrix(y_test, y_test_preds, labels=CGSVC.classes_))