In [None]:
import pandas as pd
import numpy as np
from scipy.stats import ks_2samp, chi2_contingency


real_df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/processed_fraud_data.csv")  # Real dataset
ctgan_df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/ctgan_t2_balanced.csv")  # Synthetic dataset (CTGAN)

numerical_cols = ["Transaction_Amount", "Account_Balance", "Transaction_Distance", "Avg_Transaction_Amount_7d"]
categorical_cols = ["Transaction_Type", "Device_Type", "Previous_Fraudulent_Activity",
                    "Card_Type", "Authentication_Method", "Is_Weekend", "Fraud_Label", "IP_Address_Flag"]


ks_results = {}
for col in numerical_cols:
    ks_stat, ks_p = ks_2samp(real_df[col], ctgan_df[col])
    ks_results[col] = {"KS Statistic": ks_stat, "P-value": ks_p}

chi_results = {}
for col in categorical_cols:
    contingency_table = pd.crosstab(real_df[col], ctgan_df[col])  
    chi_stat, chi_p, _, _ = chi2_contingency(contingency_table)
    chi_results[col] = {"Chi-Square Statistic": chi_stat, "P-value": chi_p}


ks_results_df = pd.DataFrame(ks_results).T
chi_results_df = pd.DataFrame(chi_results).T


print("Kolmogorov-Smirnov Test Results (Numerical Features):")
print(ks_results_df)

print("\nChi-Square Test Results (Categorical Features):")
print(chi_results_df)


Kolmogorov-Smirnov Test Results (Numerical Features):
                           KS Statistic       P-value
Transaction_Amount             0.028799  4.603090e-21
Account_Balance                0.036396  2.170258e-33
Transaction_Distance           0.016279  5.071147e-07
Avg_Transaction_Amount_7d      0.022539  4.562682e-13

Chi-Square Test Results (Categorical Features):
                              Chi-Square Statistic   P-value
Transaction_Type                         23.303916  0.005549
Device_Type                               7.644014  0.105524
Previous_Fraudulent_Activity              0.354730  0.551448
Card_Type                                13.304525  0.149304
Authentication_Method                     6.652266  0.673272
Is_Weekend                                0.117864  0.731363
Fraud_Label                               0.133040  0.715300
IP_Address_Flag                           3.282657  0.070015


In [None]:
import pandas as pd
import numpy as np
from scipy.stats import ks_2samp, chi2_contingency


real_df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/processed_fraud_data.csv")  # Real dataset
wgan_df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/balanced_WGAN.csv")  # Replace with actual WGAN dataset path


numerical_cols = ["Transaction_Amount", "Account_Balance", "Transaction_Distance", "Avg_Transaction_Amount_7d"]
categorical_cols = ["Transaction_Type", "Device_Type", "Previous_Fraudulent_Activity",
                    "Card_Type", "Authentication_Method", "Is_Weekend", "Fraud_Label", "IP_Address_Flag"]

ks_results = {}
for col in numerical_cols:
    ks_stat, ks_p = ks_2samp(real_df[col], wgan_df[col])
    ks_results[col] = {"KS Statistic": ks_stat, "P-value": ks_p}


chi_results = {}
for col in categorical_cols:
    contingency_table = pd.crosstab(real_df[col], wgan_df[col])  
    chi_stat, chi_p, _, _ = chi2_contingency(contingency_table)
    chi_results[col] = {"Chi-Square Statistic": chi_stat, "P-value": chi_p}


ks_results_df = pd.DataFrame(ks_results).T
chi_results_df = pd.DataFrame(chi_results).T


print("Kolmogorov-Smirnov Test Results (Numerical Features):")
print(ks_results_df)

print("\nChi-Square Test Results (Categorical Features):")
print(chi_results_df)


Kolmogorov-Smirnov Test Results (Numerical Features):
                           KS Statistic       P-value
Transaction_Amount             0.051474  2.257072e-66
Account_Balance                0.049030  2.948182e-60
Transaction_Distance           0.046246  1.185865e-53
Avg_Transaction_Amount_7d      0.043167  8.494633e-47

Chi-Square Test Results (Categorical Features):
                              Chi-Square Statistic  P-value
Transaction_Type                     150000.000000      0.0
Device_Type                          100000.000000      0.0
Previous_Fraudulent_Activity          49988.728894      0.0
Card_Type                            150000.000000      0.0
Authentication_Method                150000.000000      0.0
Is_Weekend                            49995.234938      0.0
Fraud_Label                           49995.414645      0.0
IP_Address_Flag                       49979.029027      0.0


In [None]:
import pandas as pd
import scipy.stats as stats
from scipy.stats import ks_2samp, chi2_contingency


real_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/processed_fraud_data.csv')  # Replace with actual file path
sdggan_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/final_data.csv')  # Replace with actual file path

numerical_cols = ['Transaction_Amount', 'Account_Balance', 'Transaction_Distance', 'Avg_Transaction_Amount_7d']
categorical_cols = ['Transaction_Type', 'Device_Type', 'Previous_Fraudulent_Activity',
                    'Card_Type', 'Authentication_Method', 'Is_Weekend', 'Fraud_Label', 'IP_Address_Flag']


ks_results = {}
for col in numerical_cols:
    ks_stat, ks_pvalue = ks_2samp(real_data[col], sdggan_data[col])
    ks_results[col] = {'KS Statistic': ks_stat, 'P-value': ks_pvalue}


ks_df = pd.DataFrame(ks_results).T
print("\nKolmogorov-Smirnov Test Results (Numerical Features):")
print(ks_df)


chi_results = {}
for col in categorical_cols:
    real_counts = real_data[col].value_counts().sort_index()
    sdggan_counts = sdggan_data[col].value_counts().sort_index()

    
    real_counts, sdggan_counts = real_counts.align(sdggan_counts, fill_value=0)

    
    chi_stat, chi_pvalue, _, _ = chi2_contingency([real_counts, sdggan_counts])
    chi_results[col] = {'Chi-Square Statistic': chi_stat, 'P-value': chi_pvalue}


chi_df = pd.DataFrame(chi_results).T
print("\nChi-Square Test Results (Categorical Features):")
print(chi_df)




Kolmogorov-Smirnov Test Results (Numerical Features):
                           KS Statistic  P-value
Transaction_Amount             0.145272      0.0
Account_Balance                0.145288      0.0
Transaction_Distance           0.122358      0.0
Avg_Transaction_Amount_7d      0.145290      0.0

Chi-Square Test Results (Categorical Features):
                              Chi-Square Statistic  P-value
Transaction_Type                      14100.186649      0.0
Device_Type                            4132.847995      0.0
Previous_Fraudulent_Activity           2984.646091      0.0
Card_Type                             14854.273488      0.0
Authentication_Method                 13076.595921      0.0
Is_Weekend                             3945.355682      0.0
Fraud_Label                           28392.966378      0.0
IP_Address_Flag                        6039.240962      0.0
