In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import OneClassSVM
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

In [2]:
# Install dependencies as needed:
# pip install kagglehub[pandas-datasets]
import kagglehub
from kagglehub import KaggleDatasetAdapter

# Set the path to the file you'd like to load
file_path = "creditcard.csv"

# Load the latest version
df = kagglehub.load_dataset(
  KaggleDatasetAdapter.PANDAS,
  "mlg-ulb/creditcardfraud",
  file_path,
)
df =df.head(50000)
print("First 5 records:", df.head())

  df = kagglehub.load_dataset(


First 5 records:    Time        V1        V2        V3        V4        V5        V6        V7  \
0   0.0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1   0.0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2   1.0 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3   1.0 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4   2.0 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...       V21       V22       V23       V24       V25  \
0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   
1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   
2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   
3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   
4 -0.270533  0.817739  ... -0.009431  0.798278 -0.137458  0.141267 -0.206010   

        V26    

In [4]:
# Keep only non-fraud (normal) data for training
normal_data = df[df['Class'] == 0].drop(['Class', 'Time'], axis=1)

# Standardize
scaler = StandardScaler()
X_train = scaler.fit_transform(normal_data)

# Train OneClassSVM
clf_svm = OneClassSVM(kernel="rbf", gamma=0.1, nu=0.01)
clf_svm.fit(X_train)

# Test on all data
X_all = scaler.transform(df.drop(['Class', 'Time'], axis=1))
y_pred = clf_svm.predict(X_all)

# Convert prediction to 0 (normal) and 1 (anomaly)
y_pred = np.where(y_pred == -1, 1, 0)

# Evaluate
from sklearn.metrics import classification_report
print(classification_report(df['Class'], y_pred))


              precision    recall  f1-score   support

           0       1.00      0.96      0.98     49852
           1       0.07      0.97      0.14       148

    accuracy                           0.96     50000
   macro avg       0.54      0.96      0.56     50000
weighted avg       1.00      0.96      0.98     50000



In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import OneClassSVM
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler


# 1. Feature Scaling (Important for SVM)
scaler = StandardScaler()
X = df.drop('Class', axis=1)  # Features
y = df['Class']              # Target variable
X_scaled = scaler.fit_transform(X)


# 2. Split Data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


# 3. Train One-Class SVM
# Use OneClassSVM instead of SVC
ocsvm = OneClassSVM(kernel='rbf', nu=0.01) # OneClassSVM doesn't use class_weight or a separate 'y' for training on a single class
# Train only on the majority class (non-fraudulent)
ocsvm.fit(X_train[y_train == 0])

# 4. Predict on the test set.
y_pred_ocsvm = ocsvm.predict(X_test)

# The OneClassSVM output needs to be translated to match the binary classification problem
# -1 for outliers (anomalies), 1 for inliers (normal)
# We map -1 to 1 (anomaly) and 1 to 0 (normal) to match the original 'Class' column
y_pred_ocsvm = np.where(y_pred_ocsvm == -1, 1, 0)


# 5. Evaluate the model
accuracy_ocsvm = accuracy_score(y_test, y_pred_ocsvm)
print(f"Accuracy of the One-Class SVM classifier: {accuracy_ocsvm}")
print(classification_report(y_test, y_pred_ocsvm))

Accuracy of the One-Class SVM classifier: 0.98
              precision    recall  f1-score   support

           0       1.00      0.98      0.99      9966
           1       0.15      1.00      0.25        34

    accuracy                           0.98     10000
   macro avg       0.57      0.99      0.62     10000
weighted avg       1.00      0.98      0.99     10000



In [14]:
# Normal SVM

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the SVM classifier
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = svm_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of the SVM classifier: {accuracy}")
print(classification_report(y_test, y_pred))

Accuracy of the SVM classifier: 0.9981
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9966
           1       0.83      0.56      0.67        34

    accuracy                           1.00     10000
   macro avg       0.91      0.78      0.83     10000
weighted avg       1.00      1.00      1.00     10000



Even if the normal svm has a high accuracy but it did not detect all the anomaly