# OneClassSVM Model

In [83]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.svm import OneClassSVM


In [84]:
df = pd.read_csv('../../Final Model/data/preprocessed_data.csv', encoding='windows-1252')

In [85]:
df.head()

Unnamed: 0,TaskName,VMakeModel,VMake,VYear,VMid,AdjustedPrice,Label
0,Front Driver Side Window Regulator Replacement,HYUNDAI I20,HYUNDAI,2012.0,HYU32872,347.0,1
1,Capped Price Service,FORD RANGER,FORD,2018.0,FOR43448,365.0,1
2,Logbook,TOYOTA RAV4,TOYOTA,2016.0,TOY30086,370.0,1
3,Cabin Filter Replacement,NISSAN NAVARA,NISSAN,2018.0,NIS41224,135.0,1
4,Electrical Problem Inspection,MAZDA 3,MAZDA,2015.0,MAZ30646,140.0,1


In [86]:
# Filter the DataFrame to include only approved tasks
df_approved = df[df['Label'] == 1]
# Split the approved data: 50% for One-Class SVM training
df_approved_train, df_approved_holdout = train_test_split(df_approved, test_size=0.5, random_state=42)
features = ['TaskName', 'VMakeModel', 'VYear', 'AdjustedPrice', 'Label']
df_approved_train = df_approved_train[features].dropna()

In [87]:
df_approved.head()

Unnamed: 0,TaskName,VMakeModel,VMake,VYear,VMid,AdjustedPrice,Label
0,Front Driver Side Window Regulator Replacement,HYUNDAI I20,HYUNDAI,2012.0,HYU32872,347.0,1
1,Capped Price Service,FORD RANGER,FORD,2018.0,FOR43448,365.0,1
2,Logbook,TOYOTA RAV4,TOYOTA,2016.0,TOY30086,370.0,1
3,Cabin Filter Replacement,NISSAN NAVARA,NISSAN,2018.0,NIS41224,135.0,1
4,Electrical Problem Inspection,MAZDA 3,MAZDA,2015.0,MAZ30646,140.0,1


In [88]:
# Get the rejected jobs
df_rejected = df[df['Label'] == 0]

# Combine holdout approvals + rejections for evaluation
df_eval = pd.concat([df_approved_holdout, df_rejected], ignore_index=True)
df_eval = df_eval[features].dropna()


In [89]:
# Define the features to be used for the One-Class SVM model
# These features are selected based on their relevance to the task and their ability to represent the data
train_features = ['TaskName', 'VMakeModel', 'VYear', 'AdjustedPrice']
eval_features = ['TaskName', 'VMakeModel', 'VYear', 'AdjustedPrice', 'Label']
X_train = df_approved_train[features]
X_eval = df_eval[eval_features]
# Preprocessing Columns
categorical_cols = ['TaskName', 'VMakeModel']
numerical_cols = ['VYear', 'AdjustedPrice']

In [90]:
# Preprocessor for One-Class SVM
preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols),
    ('num', StandardScaler(), numerical_cols)
])

In [91]:
# Pipeline for One-Class SVM
pipeline = Pipeline([
    ('preprocess', preprocessor),
    ('ocsvm', OneClassSVM(kernel='rbf', gamma='auto', nu=0.05))
])

In [92]:
# Train model on  half the approved data
pipeline.fit(X_train)

In [93]:
# Predict: -1 = anomaly, 1 = normal
preds = pipeline.predict(X_eval)

# Map to approval/rejection for comparison
df_eval['SVM_Prediction'] = preds
df_eval['PredictedLabel'] = df_eval['SVM_Prediction'].map({1: 1, -1: 0})

# Confusion matrix
from sklearn.metrics import confusion_matrix, classification_report

y_true = df_eval['Label']
y_pred = df_eval['PredictedLabel']

print(confusion_matrix(y_true, y_pred))
print(classification_report(y_true, y_pred))

[[  6781 100686]
 [ 13250 251801]]
              precision    recall  f1-score   support

           0       0.34      0.06      0.11    107467
           1       0.71      0.95      0.82    265051

    accuracy                           0.69    372518
   macro avg       0.53      0.51      0.46    372518
weighted avg       0.61      0.69      0.61    372518



In [94]:
df_eval.head()

Unnamed: 0,TaskName,VMakeModel,VYear,AdjustedPrice,Label,SVM_Prediction,PredictedLabel
0,Replace Remote/key Battery,VOLKSWAGEN GOLF,2022.0,15.0,1,-1,0
1,Capped Price Service,RENAULT TRAFIC III,2019.0,836.0,1,1,1
2,Front Brake Pad Replacement,TOYOTA COROLLA,2010.0,558.0,1,1,1
3,Custom Service,CROWN CG15S-5,2008.0,1398.0,1,1,1
4,Tyre Replacement,TOYOTA LANDCRUISER,2021.0,40.0,1,-1,0


In [95]:
# View flagged proposals
print(df[df['Decision'] == 'Reject'])

KeyError: 'Decision'