In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE
from sklearn.utils import resample
data = pd.read_csv('Creditcard_data.csv')



In [None]:
features = data.drop()
labels = data['Class']
smote_resampler = SMOTE(random_state=14)
balanced_features, balanced_labels = smote_resampler.fit_resample(features, labels)

In [None]:
def random_sample(features, labels, sample_size):
    return resample(features, labels, n_samples=sample_size, random_state=42)

def stratified_sample(features, labels, sample_size):
    from sklearn.model_selection import StratifiedShuffleSplit
    strat_split = StratifiedShuffleSplit(n_splits=1, test_size=sample_size / len(labels), random_state=42)
    for train_idx, _ in strat_split.split(features, labels):
        return features.iloc[train_idx], labels.iloc[train_idx]


In [None]:
sample_sizes = [int(len(balanced_features) * 0.1 * i) for i in range(1, 6)]


sampling_methods = {
    "sample1": random_sample(balanced_features, balanced_labels, sample_sizes[0]),
    "sample2": random_sample(balanced_features, balanced_labels, sample_sizes[1]),
    "sample3": stratified_sample(balanced_features, balanced_labels, sample_sizes[2]),
    "sample4": random_sample(balanced_features, balanced_labels, sample_sizes[3]),
    "sample5": stratified_sample(balanced_features, balanced_labels, sample_sizes[4]),
}
classifiers = {
    "LogReg": LogisticRegression(),
    "RandForest": RandomForestClassifier(),
    "DecTree": DecisionTreeClassifier(),
    "NaiveBayes": GaussianNB(),
    "SVM": SVC()
}


In [None]:


evaluation_results = pd.DataFrame(columns=["Sample", "Classifier", "Accuracy"])


for sample_label, (X_sample, y_sample) in sampling_methods.items():

    X_train, X_test, y_train, y_test = train_test_split(X_sample, y_sample, test_size=0.2, random_state=42)

    for model_label, model in classifiers.items():

        model.fit(X_train, y_train)


        predictions = model.predict(X_test)
        acc = accuracy_score(y_test, predictions)


        evaluation_results = pd.concat([
            evaluation_results,
            pd.DataFrame([{"Sample": sample_label, "Classifier": model_label, "Accuracy": acc}])
        ], ignore_index=True)


In [None]:
evaluation_results.to_csv('results.csv', index=False)


print("Results saved to 'results.csv'")