In [4]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import (
    classification_report,
    accuracy_score,
    confusion_matrix,
    f1_score
)
from imblearn.over_sampling import SMOTE

# Load dataset
data_path = "creditcard.csv"
df = pd.read_csv(data_path)

# Check original class distribution
print("Original class distribution:\n", df['Class'].value_counts())

# ❌ Removed sampling — now using 100% of the dataset

# Split features and target
X = df.drop(columns=['Class'])
y = df['Class']

# Apply SMOTE to balance the dataset (minority class up to 30% of majority)
smote = SMOTE(sampling_strategy=0.3, random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Show class distribution after SMOTE
print("Resampled class distribution:\n", pd.Series(y_resampled).value_counts())

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled
)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define hyperparameter space for SVM
param_dist = {
    'C': [0.1, 1, 10],
    'gamma': ['scale', 'auto', 0.01],
    'kernel': ['linear', 'rbf']
}

# RandomizedSearchCV for hyperparameter tuning
random_search = RandomizedSearchCV(
    estimator=SVC(class_weight='balanced', random_state=42),
    param_distributions=param_dist,
    n_iter=5,
    cv=3,
    scoring='f1_macro',
    n_jobs=-1,
    random_state=42
)

# Train the model
random_search.fit(X_train_scaled, y_train)
best_svm = random_search.best_estimator_

# Make predictions
svm_pred = best_svm.predict(X_test_scaled)

# Show best parameters
print("Best Parameters Found:", random_search.best_params_)

# Evaluate the model
print("SVM Accuracy:", accuracy_score(y_test, svm_pred))
print("SVM F1 Score (Macro):", f1_score(y_test, svm_pred, average='macro'))
print("Classification Report:\n", classification_report(y_test, svm_pred))

# Confusion Matrix
svm_cm = confusion_matrix(y_test, svm_pred)
plt.figure(figsize=(6, 4))
sns.heatmap(svm_cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix - SVM")
plt.tight_layout()
plt.show()


Original class distribution:
 Class
0    284315
1       492
Name: count, dtype: int64
Resampled class distribution:
 Class
0    284315
1     85294
Name: count, dtype: int64


KeyboardInterrupt: 