# Baseline SVM Model

**Before Running this notebook, run the notebook 'Pre-Processed.ipynb' (Located in cmse492_project\Data\Pre-Processing) which will create the processed dataframe file 'processed.csv'. Place a copy of that file into this directory (cmse492_project\Models\SVMModels)** 

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score
from tqdm import tqdm

In [2]:
df = pd.read_csv('processed.csv')

**I. Train-Test Split**

In [3]:
X = df.drop(['Class', 'Time', 'TransactionTime'], axis=1)  
y = df['Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

In [6]:
pipeline_linear = make_pipeline(
    StandardScaler(),
    SVC(kernel='linear', class_weight='balanced', random_state=42)
)

In [7]:
print(pipeline_linear)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('svc',
                 SVC(class_weight='balanced', kernel='linear',
                     random_state=42))])


**II. Linear and Non-Linear Pipeline**

In [None]:
pipeline_linear = make_pipeline(
    StandardScaler(),
    SVC(kernel='linear', class_weight='balanced', random_state=42)
)


pipeline_rbf = make_pipeline(
    StandardScaler(),
    SVC(kernel='rbf', class_weight='balanced', random_state=42)
)


In [None]:
cv_scores_linear = []
for fold in tqdm(range(5), desc="Linear Kernel", position=0):
    score = cross_val_score(pipeline_linear, X_train, y_train, cv=5)[fold]
    cv_scores_linear.append(score)

In [None]:
cv_scores_rbf = []
for fold in tqdm(range(5), desc="RBF Kernel", position=1):
    score = cross_val_score(pipeline_rbf, X_train, y_train, cv=5)[fold]
    cv_scores_rbf.append(score)

# Print the results of cross-validation for both kernels
print(f"Linear Kernel - Mean Accuracy: {np.mean(cv_scores_linear):.4f}, Std: {np.std(cv_scores_linear):.4f}")
print(f"RBF Kernel - Mean Accuracy: {np.mean(cv_scores_rbf):.4f}, Std: {np.std(cv_scores_rbf):.4f}")


In [None]:
best_pipeline = pipeline_linear if cv_scores_linear.mean() > cv_scores_rbf.mean() else pipeline_rbf

In [None]:
best_pipeline.fit(X_train, y_train)
y_pred_best = best_pipeline.predict(X_test)