In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.under_sampling import RandomUnderSampler, NearMiss
from imblearn.combine import SMOTETomek

In [3]:
df = pd.read_csv("Creditcard_data.csv")

X = df.drop("Class", axis=1)
y = df["Class"]

In [4]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [5]:
sampling_methods = {
    "Sampling1_RandomUnder": RandomUnderSampler(random_state=42),
    "Sampling2_RandomOver": RandomOverSampler(random_state=42),
    "Sampling3_SMOTE": SMOTE(random_state=42),
    "Sampling4_NearMiss": NearMiss(),
    "Sampling5_SMOTETomek": SMOTETomek(random_state=42)
}

models = {
    "M1_LogisticRegression": LogisticRegression(max_iter=1000),
    "M2_DecisionTree": DecisionTreeClassifier(),
    "M3_RandomForest": RandomForestClassifier(),
    "M4_KNN": KNeighborsClassifier(),
    "M5_SVM": SVC()
}

In [6]:
results = pd.DataFrame(index=models.keys(), columns=sampling_methods.keys())

for samp_name, sampler in sampling_methods.items():

    X_resampled, y_resampled = sampler.fit_resample(X_scaled, y)

    X_train, X_test, y_train, y_test = train_test_split(
        X_resampled, y_resampled, test_size=0.3, random_state=42
    )

    for model_name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred) * 100
        results.loc[model_name, samp_name] = round(acc, 2)

In [7]:
print("\nFinal Accuracy Comparison Table:\n")
print(results)



Final Accuracy Comparison Table:

                      Sampling1_RandomUnder Sampling2_RandomOver  \
M1_LogisticRegression                 33.33                 91.7   
M2_DecisionTree                       66.67                99.13   
M3_RandomForest                       33.33                99.78   
M4_KNN                                33.33                96.51   
M5_SVM                                16.67                96.51   

                      Sampling3_SMOTE Sampling4_NearMiss Sampling5_SMOTETomek  
M1_LogisticRegression            91.7              100.0                 91.7  
M2_DecisionTree                 97.38              83.33                97.82  
M3_RandomForest                 99.34              66.67                99.34  
M4_KNN                          94.54              83.33                94.54  
M5_SVM                          96.94              16.67                96.94  


In [8]:
results.to_csv("sampling_model_accuracy_results.csv")

