In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from imblearn.under_sampling import RandomUnderSampler, NearMiss
from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.combine import SMOTEENN

df = pd.read_csv("Creditcard_data.csv")

X = df.drop('Class', axis=1)
y = df['Class']

samplers = {
    "Sampling1": RandomUnderSampler(),
    "Sampling2": RandomOverSampler(),
    "Sampling3": SMOTE(),
    "Sampling4": NearMiss(),
    "Sampling5": SMOTEENN()
}

models = {
    "M1": LogisticRegression(max_iter=200, solver='liblinear'),
    "M2": DecisionTreeClassifier(),
    "M3": RandomForestClassifier(n_estimators=50),
    "M4": LinearSVC(max_iter=2000),
    "M5": KNeighborsClassifier()
}

results = {}

for s_name, sampler in samplers.items():
    X_res, y_res = sampler.fit_resample(X, y)
    X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.3, random_state=42)

    results[s_name] = {}

    for m_name, model in models.items():
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        acc = accuracy_score(y_test, preds)*100
        results[s_name][m_name] = round(acc,2)

print(results)




{'Sampling1': {'M1': 66.67, 'M2': 66.67, 'M3': 83.33, 'M4': 50.0, 'M5': 16.67}, 'Sampling2': {'M1': 91.92, 'M2': 99.13, 'M3': 99.56, 'M4': 91.48, 'M5': 98.47}, 'Sampling3': {'M1': 91.7, 'M2': 97.6, 'M3': 99.13, 'M4': 92.79, 'M5': 82.97}, 'Sampling4': {'M1': 16.67, 'M2': 16.67, 'M3': 16.67, 'M4': 100.0, 'M5': 83.33}, 'Sampling5': {'M1': 95.11, 'M2': 98.85, 'M3': 99.14, 'M4': 95.4, 'M5': 94.83}}
