# Handling Class Imbalance

In [None]:
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from collections import Counter

# Load the data
df = pd.read_csv(".data/final_fraud_data_Processed.csv")  

# Preview class distribution
print("Class distribution before resampling:")
print(df['class'].value_counts())


## Train-Test Split

In [None]:
X = df.drop(['class', 'user_id', 'device_id', 'ip_address'], axis=1)
y = df['class']

# Encode categoricals
X = pd.get_dummies(X, drop_first=True)

# Scale numerics
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.3, random_state=42, stratify=y
)

print("Training target distribution:", Counter(y_train))


## Apply SMOTE to Handle Imbalance

In [None]:
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

print("Resampled training target distribution:", Counter(y_train_resampled))
