In [1]:
import pandas as pd
from imblearn.over_sampling import SMOTE

# Load dataset
df = pd.read_csv("/content/Creditcard_data.csv")

X = df.drop("Class", axis=1)
y = df["Class"]

# Balance dataset
smote = SMOTE(random_state=42)
X_bal, y_bal = smote.fit_resample(X, y)

balanced_df = pd.concat([X_bal, y_bal], axis=1)

print(balanced_df["Class"].value_counts())


Class
0    763
1    763
Name: count, dtype: int64


In [2]:
samples = []
for i in range(5):
    samples.append(
        balanced_df.sample(frac=0.7, random_state=42+i)
    )


In [3]:
from imblearn.under_sampling import RandomUnderSampler, NearMiss
from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.combine import SMOTEENN

sampling_methods = {
    "Sampling1": RandomUnderSampler(),
    "Sampling2": RandomOverSampler(),
    "Sampling3": SMOTE(),
    "Sampling4": NearMiss(),
    "Sampling5": SMOTEENN()
}


In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

models = {
    "M1": LogisticRegression(max_iter=1000),
    "M2": DecisionTreeClassifier(),
    "M3": RandomForestClassifier(),
    "M4": KNeighborsClassifier(),
    "M5": SVC()
}


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

results = {}

for s_name, sampler in sampling_methods.items():
    results[s_name] = {}
    for m_name, model in models.items():
        X_s, y_s = sampler.fit_resample(
            balanced_df.drop("Class", axis=1),
            balanced_df["Class"]
        )

        X_train, X_test, y_train, y_test = train_test_split(
            X_s, y_s, test_size=0.3, random_state=42
        )

        model.fit(X_train, y_train)
        preds = model.predict(X_test)

        acc = accuracy_score(y_test, preds) * 100
        results[s_name][m_name] = round(acc, 2)

results


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

{'Sampling1': {'M1': 90.17,
  'M2': 96.29,
  'M3': 99.56,
  'M4': 84.72,
  'M5': 69.43},
 'Sampling2': {'M1': 90.39,
  'M2': 97.16,
  'M3': 99.34,
  'M4': 84.72,
  'M5': 67.69},
 'Sampling3': {'M1': 90.39,
  'M2': 98.03,
  'M3': 99.34,
  'M4': 84.72,
  'M5': 67.69},
 'Sampling4': {'M1': 92.14, 'M2': 97.6, 'M3': 99.34, 'M4': 85.59, 'M5': 68.56},
 'Sampling5': {'M1': 94.51,
  'M2': 99.13,
  'M3': 99.71,
  'M4': 96.82,
  'M5': 75.72}}