In [5]:
# ==========================================
# Install all required dependencies safely
# (Runs in the CURRENT Jupyter kernel)
# ==========================================

import sys

!{sys.executable} -m pip install --upgrade pip
!{sys.executable} -m pip install numpy pandas matplotlib seaborn
!{sys.executable} -m pip install scikit-learn imbalanced-learn


Defaulting to user installation because normal site-packages is not writeable
Collecting pip
  Downloading pip-26.0-py3-none-any.whl.metadata (4.7 kB)
Downloading pip-26.0-py3-none-any.whl (1.8 MB)
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ----------------- ---------------------- 0.8/1.8 MB 5.1 MB/s eta 0:00:01
   ---------------------------------------- 1.8/1.8 MB 4.6 MB/s  0:00:00
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 25.3
    Uninstalling pip-25.3:
      Successfully uninstalled pip-25.3
Successfully installed pip-26.0
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Collecting imbalanced-learn
  Downloading imbalanced_learn-0.14.1-py3-none-any.whl.metadata (8.9 kB)
Collecting sklearn-compat<0.2,>=0.1.5 (from imbalanced-learn)
  Downloading sklearn_compat-0.1.5-py3-none-any.whl.metadata 

In [6]:
# 1. Import Libraries
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

from imblearn.under_sampling import RandomUnderSampler, NearMiss
from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.combine import SMOTETomek

# 2. Load Dataset
df = pd.read_csv("Creditcard_data.csv")

# 3. Separate Features & Target
X = df.drop("Class", axis=1)
y = df["Class"]

# 4. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# 5. Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 6. Sampling Techniques
samplers = {
    "Sampling1_RUS": RandomUnderSampler(random_state=42),
    "Sampling2_ROS": RandomOverSampler(random_state=42),
    "Sampling3_SMOTE": SMOTE(random_state=42),
    "Sampling4_NearMiss": NearMiss(),
    "Sampling5_SMOTETomek": SMOTETomek(random_state=42)
}

# 7. Models
models = {
    "M1_LogisticRegression": LogisticRegression(max_iter=1000),
    "M2_DecisionTree": DecisionTreeClassifier(),
    "M3_RandomForest": RandomForestClassifier(),
    "M4_KNN": KNeighborsClassifier(),
    "M5_NaiveBayes": GaussianNB()
}

# 8. Result Storage
results = pd.DataFrame(index=models.keys(), columns=samplers.keys())

# 9. Training Loop
for s_name, sampler in samplers.items():
    X_res, y_res = sampler.fit_resample(X_train, y_train)

    for m_name, model in models.items():
        model.fit(X_res, y_res)
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        results.loc[m_name, s_name] = round(acc * 100, 2)

# 10. Display Results
print("\nAccuracy Comparison Table:\n")
print(results)



Accuracy Comparison Table:

                      Sampling1_RUS Sampling2_ROS Sampling3_SMOTE  \
M1_LogisticRegression          69.4         92.24           93.97   
M2_DecisionTree               58.19         98.71           98.28   
M3_RandomForest               78.88         99.14           98.71   
M4_KNN                        89.66         96.98           94.83   
M5_NaiveBayes                 62.07         95.26           95.26   

                      Sampling4_NearMiss Sampling5_SMOTETomek  
M1_LogisticRegression              42.67                93.97  
M2_DecisionTree                    28.88                96.98  
M3_RandomForest                    30.17                98.71  
M4_KNN                             98.28                94.83  
M5_NaiveBayes                       2.59                95.26  
