# Support Vector Machine (SVM) model

This is the code to run the best trained SVM model on the test set.

In [1]:
# Importing libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

sns.set_style('whitegrid')
plt.style.use('ggplot')

from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn import feature_extraction, linear_model, model_selection, preprocessing
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report, confusion_matrix, roc_curve, auc

from sklearn.svm import SVC

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

from skorch import NeuralNetClassifier
from skorch.callbacks import EarlyStopping

import pickle
import time

## Preparing test dataset

In [2]:
# Importing preprocessed test set data
test_dataset = pd.read_csv('test_dataset.csv') 

# Split the test data into features (X_test) and target variable (y_test)
X_test = test_dataset.iloc[:, :-1]  
y_test = test_dataset.iloc[:, -1] 

## Loading pre-trained model

In [3]:
# Load the pre-trained model from a file using 'pickle'. 
with open('best_svm.pkl', 'rb') as file:
    best_model_svm = pickle.load(file)

## Testing on test set

In [None]:
# Record start time
start_time = time.time()

# Make predictions on test data
y_pred_svm = best_model_svm.predict(X_test)

# Record end time
end_time = time.time()

# Compute testing time
testing_time = end_time - start_time
print("Testing Time:", testing_time, "seconds")

## Evaluating the model

In [None]:
# Evaluate the model

accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm)
recall_svm = recall_score(y_test, y_pred_svm)
f1_svm = f1_score(y_test, y_pred_svm)
roc_auc_svm = roc_auc_score(y_test, y_pred_svm)

print("Accuracy:", accuracy_svm)
print("Precision:", precision_svm)
print("Recall:", recall_svm)
print("F1 Score:", f1_svm)
print("ROC AUC Score:", roc_auc_svm)

In [None]:
# Calculate confusion matrix
conf_matrix_svm = confusion_matrix(y_test, y_pred_svm)

# Plot confusion matrix
plt.figure(figsize=(6, 6))
sns.heatmap(conf_matrix_svm, cmap='Blues', annot=True, fmt='d', cbar=False)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
# Adjust the tick labels
plt.xticks(ticks=[0.5, 1.5], labels=['Neutral or dissatisfied', 'Satisfied'])
plt.yticks(ticks=[0.5, 1.5], labels=['Neutral or dissatisfied', 'Satisfied'])

plt.show()


In [None]:
# Calculate ROC curve and AUC
fpr_svm, tpr_svm, _= roc_curve(y_test, y_pred_svm)
roc_auc_svm = auc(fpr_svm, tpr_svm)

# Plot ROC curve
plt.figure(figsize=(8, 6))
plt.plot(fpr_svm, tpr_svm, color='blue', lw=2, label='ROC curve (area = %0.2f)' % roc_auc_svm)
plt.plot([0, 1], [0, 1], color='gray', linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC)')
plt.legend(loc='lower right')
plt.show()