In [None]:
# fraud_detection_smote_xgboost.py

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler

def load_data():
    # Load credit card fraud dataset from Kaggle (or a similar CSV)
    url = "https://www.dropbox.com/s/0vlqot9lct16bjj/creditcard.csv?dl=1"
    df = pd.read_csv(url)
    return df

def preprocess_data(df):
    X = df.drop(['Class'], axis=1)
    y = df['Class']

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    return train_test_split(X_scaled, y, test_size=0.3, random_state=42, stratify=y)

def handle_imbalance(X_train, y_train):
    sm = SMOTE(random_state=42)
    X_res, y_res = sm.fit_resample(X_train, y_train)
    return X_res, y_res

def train_model(X, y):
    model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
    model.fit(X, y)
    return model

def main():
    print("Loading data...")
    df = load_data()

    print("Preprocessing data...")
    X_train, X_test, y_train, y_test = preprocess_data(df)

    print("Balancing data with SMOTE...")
    X_res, y_res = handle_imbalance(X_train, y_train)

    print("Training XGBoost model...")
    model = train_model(X_res, y_res)

    print("Evaluating model...")
    y_pred = model.predict(X_test)

    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("\nClassification Report:\n", classification_report(y_test, y_pred))

if __name__ == "__main__":
    main()
