In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from imblearn.over_sampling import RandomOverSampler


df = pd.read_csv("/content/Creditcard_data.csv")

X = df.drop("Class", axis=1)
y = df["Class"]


ros = RandomOverSampler(random_state=42)
X_bal, y_bal = ros.fit_resample(X, y)

balanced_df = pd.concat([X_bal, y_bal], axis=1)


def simple_random_sampling(df, frac=0.7):
    return df.sample(frac=frac, random_state=42)

def systematic_sampling(df, step=2):
    return df.iloc[::step]

def stratified_sampling(df, target="Class", frac=0.7):
    return df.groupby(target, group_keys=False).apply(
        lambda x: x.sample(frac=frac, random_state=42)
    )

def cluster_sampling(df, cluster_col="Time"):
    df["cluster"] = pd.qcut(df[cluster_col], q=5, labels=False)
    chosen_cluster = np.random.choice(df["cluster"].unique())
    return df[df["cluster"] == chosen_cluster].drop("cluster", axis=1)

def bootstrap_sampling(df):
    return df.sample(frac=1, replace=True, random_state=42)

sampling_methods = {
    "SimpleRandom": simple_random_sampling(balanced_df),
    "Systematic": systematic_sampling(balanced_df),
    "Stratified": stratified_sampling(balanced_df),
    "Cluster": cluster_sampling(balanced_df),
    "Bootstrap": bootstrap_sampling(balanced_df)
}


models = {
    "M1_LogisticRegression": LogisticRegression(max_iter=1000),
    "M2_KNN": KNeighborsClassifier(),
    "M3_SVM": SVC(),
    "M4_DecisionTree": DecisionTreeClassifier(random_state=42),
    "M5_RandomForest": RandomForestClassifier(random_state=42)
}


results = {}

for samp_name, samp_df in sampling_methods.items():
    X_s = samp_df.drop("Class", axis=1)
    y_s = samp_df["Class"]

    X_train, X_test, y_train, y_test = train_test_split(
        X_s, y_s, test_size=0.3, stratify=y_s, random_state=42
    )

    results[samp_name] = {}

    for model_name, model in models.items():
        pipe = Pipeline([
            ("scaler", StandardScaler()),
            ("model", model)
        ])

        pipe.fit(X_train, y_train)
        y_pred = pipe.predict(X_test)

        acc = accuracy_score(y_test, y_pred)
        results[samp_name][model_name] = round(acc * 100, 2)


results_df = pd.DataFrame(results).T
print("\nAccuracy Table (%):\n")
print(results_df)


  return df.groupby(target, group_keys=False).apply(



Accuracy Table (%):

              M1_LogisticRegression  M2_KNN  M3_SVM  M4_DecisionTree  \
SimpleRandom                  94.08   97.51   97.82            99.38   
Systematic                    88.65   96.51   96.94            98.25   
Stratified                    90.97   95.64   97.51            97.82   
Cluster                      100.00  100.00  100.00           100.00   
Bootstrap                     95.41   98.69   98.69            99.56   

              M5_RandomForest  
SimpleRandom            100.0  
Systematic              100.0  
Stratified              100.0  
Cluster                 100.0  
Bootstrap               100.0  
