In [6]:
import pandas as pd

df=pd.read_csv('https://raw.githubusercontent.com/saintsaintsan/Supervised-Machine-Learning/refs/heads/main/data/fraud.csv', index_col = 0)

# Extract 'Class' column as target variable (y) in NumPy array format
y = df['Class'].values
df = df.iloc[:,1:]

# Drop 'Class' column from DataFrame
X = df.drop(columns = 'Class').values

df[['Class']].value_counts()

Unnamed: 0_level_0,count
Class,Unnamed: 1_level_1
0,21337
1,356


In [7]:
# Split data into train and test sets
# test_size=0.40 → 40% for testing, 60% for training
# random_state=1 → ensures reproducibility

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y,
                                    test_size = 0.40,
                                    random_state=1)

In [8]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

#--------------------------------------------------
## ------------ SVM Classifier ------------------##
#--------------------------------------------------

from sklearn.svm import SVC

## Linear Kernel  ---------------
steps = [('scaler', StandardScaler()),
         ('svc', SVC(kernel = 'linear',
                     class_weight='balanced'))]

svcL_pipeline = Pipeline(steps)
svcL_pipeline.fit(X_train, y_train)

## Polynomial Kernel -----------------------
steps = [('scaler', StandardScaler()),
         ('svc', SVC(kernel = 'poly', degree = 3,
                     class_weight='balanced'))]

svcPoly_pipeline = Pipeline(steps)
svcPoly_pipeline.fit(X_train, y_train)

## RBF Kernel -----------------------
steps = [('scaler', StandardScaler()),
         ('svc', SVC(kernel = 'rbf', gamma = 'scale',
                     class_weight='balanced'))]

svcRBF_pipeline = Pipeline(steps)
svcRBF_pipeline.fit(X_train, y_train)

In [9]:
#--------------------------------------------------
## Model Evaluation for linear Kernel Function ##
#--------------------------------------------------
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score


ypred_test = svcL_pipeline.predict(X_test)
mat_clf = confusion_matrix(y_test, ypred_test)
report_clf = classification_report(y_test, ypred_test)

print(mat_clf)
print(report_clf)


auc = roc_auc_score(y_test, ypred_test)
print(auc)

[[8299  242]
 [  16  121]]
              precision    recall  f1-score   support

           0       1.00      0.97      0.98      8541
           1       0.33      0.88      0.48       137

    accuracy                           0.97      8678
   macro avg       0.67      0.93      0.73      8678
weighted avg       0.99      0.97      0.98      8678

0.9274388800436196


In [10]:
ypred_train =svcL_pipeline.predict(X_train)
mat_clf = confusion_matrix(y_train, ypred_train)
report_clf = classification_report(y_train, ypred_train)

print(mat_clf)
print(report_clf)

[[12446   350]
 [   19   200]]
              precision    recall  f1-score   support

           0       1.00      0.97      0.99     12796
           1       0.36      0.91      0.52       219

    accuracy                           0.97     13015
   macro avg       0.68      0.94      0.75     13015
weighted avg       0.99      0.97      0.98     13015



In [11]:
#Use polynomial kernel) to predict labels on the test set #
ypred_test = svcPoly_pipeline.predict(X_test)
mat_clf = confusion_matrix(y_test, ypred_test)
report_clf = classification_report(y_test, ypred_test)

print(mat_clf)
print(report_clf)


auc = roc_auc_score(y_test, ypred_test)
print(auc)

[[8485   56]
 [  28  109]]
              precision    recall  f1-score   support

           0       1.00      0.99      1.00      8541
           1       0.66      0.80      0.72       137

    accuracy                           0.99      8678
   macro avg       0.83      0.89      0.86      8678
weighted avg       0.99      0.99      0.99      8678

0.8945319143299346


In [12]:
#Use polynomial kernel) to predict labels on the train set#
mat_clf = confusion_matrix(y_train, ypred_train)
report_clf = classification_report(y_train, ypred_train)

print(mat_clf)
print(report_clf)

[[12747    49]
 [    6   213]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12796
           1       0.81      0.97      0.89       219

    accuracy                           1.00     13015
   macro avg       0.91      0.98      0.94     13015
weighted avg       1.00      1.00      1.00     13015



In [18]:
#Use Radial Basis Function to predict labels on the test set#

ypred_test = svcRBF_pipeline.predict(X_test)
mat_clf = confusion_matrix(y_test, ypred_test)
report_clf = classification_report(y_test, ypred_test)

print(mat_clf)
print(report_clf)


auc = roc_auc_score(y_test, ypred_test)
print(auc)

[[8450   91]
 [  31  106]]
              precision    recall  f1-score   support

           0       1.00      0.99      0.99      8541
           1       0.54      0.77      0.63       137

    accuracy                           0.99      8678
   macro avg       0.77      0.88      0.81      8678
weighted avg       0.99      0.99      0.99      8678

0.8815340688153409


In [19]:
#Use Radial Basis Function to predict labels on the train set#

ypred_train =svcRBF_pipeline.predict(X_train)
mat_clf = confusion_matrix(y_train, ypred_train)
report_clf = classification_report(y_train, ypred_train)

print(mat_clf)
print(report_clf)


[[12677   119]
 [    2   217]]
              precision    recall  f1-score   support

           0       1.00      0.99      1.00     12796
           1       0.65      0.99      0.78       219

    accuracy                           0.99     13015
   macro avg       0.82      0.99      0.89     13015
weighted avg       0.99      0.99      0.99     13015

