
# Fraudulent E‑Commerce Transaction Detection (End‑to‑End)

This notebook:
1. Loads **both CSV files**
2. Concatenates them
3. Performs train/test split
4. Builds a leakage‑safe pipeline with **RandomOverSampler**
5. Tunes XGBoost using **Optuna**
6. Trains final model
7. Evaluates using F1, PR‑AUC, and confusion matrix

Files expected in the same directory:
- `Fraudulent_E-Commerce_Transaction_Data.csv`
- `Fraudulent_E-Commerce_Transaction_Data_2.csv`


In [1]:

# =====================
# Imports
# =====================
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.metrics import f1_score, confusion_matrix, average_precision_score

from imblearn.pipeline import Pipeline
from imblearn.over_sampling import RandomOverSampler

import category_encoders as ce
from xgboost import XGBClassifier
import optuna



## Load & concatenate data


In [2]:

# =====================
# Load CSV files
# =====================
df1 = pd.read_csv("Fraudulent_E-Commerce_Transaction_Data.csv")
df2 = pd.read_csv("Fraudulent_E-Commerce_Transaction_Data_2.csv")

df = pd.concat([df1, df2], axis=0, ignore_index=True)

print("Shape after concat:", df.shape)
df.head()


Shape after concat: (1496586, 16)


Unnamed: 0,Transaction ID,Customer ID,Transaction Amount,Transaction Date,Payment Method,Product Category,Quantity,Customer Age,Customer Location,Device Used,IP Address,Shipping Address,Billing Address,Is Fraudulent,Account Age Days,Transaction Hour
0,15d2e414-8735-46fc-9e02-80b472b2580f,d1b87f62-51b2-493b-ad6a-77e0fe13e785,58.09,2024-02-20 05:58:41,bank transfer,electronics,1,17,Amandaborough,tablet,212.195.49.198,Unit 8934 Box 0058\nDPO AA 05437,Unit 8934 Box 0058\nDPO AA 05437,0,30,5
1,0bfee1a0-6d5e-40da-a446-d04e73b1b177,37de64d5-e901-4a56-9ea0-af0c24c069cf,389.96,2024-02-25 08:09:45,debit card,electronics,2,40,East Timothy,desktop,208.106.249.121,"634 May Keys\nPort Cherylview, NV 75063","634 May Keys\nPort Cherylview, NV 75063",0,72,8
2,e588eef4-b754-468e-9d90-d0e0abfc1af0,1bac88d6-4b22-409a-a06b-425119c57225,134.19,2024-03-18 03:42:55,PayPal,home & garden,2,22,Davismouth,tablet,76.63.88.212,"16282 Dana Falls Suite 790\nRothhaven, IL 15564","16282 Dana Falls Suite 790\nRothhaven, IL 15564",0,63,3
3,4de46e52-60c3-49d9-be39-636681009789,2357c76e-9253-4ceb-b44e-ef4b71cb7d4d,226.17,2024-03-16 20:41:31,bank transfer,clothing,5,31,Lynnberg,desktop,207.208.171.73,"828 Strong Loaf Apt. 646\nNew Joshua, UT 84798","828 Strong Loaf Apt. 646\nNew Joshua, UT 84798",0,124,20
4,074a76de-fe2d-443e-a00c-f044cdb68e21,45071bc5-9588-43ea-8093-023caec8ea1c,121.53,2024-01-15 05:08:17,bank transfer,clothing,2,51,South Nicole,tablet,190.172.14.169,"29799 Jason Hills Apt. 439\nWest Richardtown, ...","29799 Jason Hills Apt. 439\nWest Richardtown, ...",0,158,5



## Basic assumptions

- Target column name: **`Fraud`**
- Positive class = 1 (fraudulent)


In [4]:

# =====================
# Target & features
# =====================
df=df.drop(columns=['Transaction ID', 'Customer ID','Transaction Date','IP Address','Shipping Address','Billing Address'])
TARGET_COL = "Is Fraudulent"

X = df.drop(columns=[TARGET_COL])
y = df[TARGET_COL]

print("Positive rate:", y.mean())


Positive rate: 0.05015415084732852



## Train / Test split


In [5]:

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    stratify=y,
    random_state=42
)

print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)


Train shape: (1197268, 9)
Test shape: (299318, 9)



## Categorical columns

Update this list **only if your column names differ**.


In [6]:

cat_cols = [
    "Payment Method",
    "Product Category",
    "Customer Location",
    "Device Used"
]



## Pipeline with RandomOverSampler
Minority class is duplicated to match majority.


In [7]:

pipe = Pipeline([
    ("sampler", RandomOverSampler(
        sampling_strategy=1.0,
        random_state=42
    )),
    ("encoder", ce.TargetEncoder(cols=cat_cols)),
    ("model", XGBClassifier(
        objective="binary:logistic",
        eval_metric="aucpr",
        random_state=42,
        n_jobs=-1
    ))
])



## Optuna objective (CV F1)


In [8]:

def objective(trial):
    params = {
        'model__n_estimators': trial.suggest_int('n_estimators', 200, 800),
        'model__max_depth': trial.suggest_int('max_depth', 3, 8),
        'model__learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1),
        'model__min_child_weight': trial.suggest_int('min_child_weight', 3, 10),
        'model__gamma': trial.suggest_float('gamma', 0, 0.5),
        'model__subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'model__colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 0.9),
    }

    pipe.set_params(**params)

    cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

    scores = cross_val_score(
        pipe,
        X_train,
        y_train,
        cv=cv,
        scoring="f1",
        n_jobs=-1
    )

    return scores.mean()


In [9]:

# =====================
# Run Optuna
# =====================
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=10)

print("Best CV F1:", study.best_value)
print("Best params:", study.best_params)


[I 2025-12-17 00:13:22,955] A new study created in memory with name: no-name-fda8236e-70c3-4946-b7c3-19c9ad760c62
[I 2025-12-17 00:14:47,379] Trial 0 finished with value: 0.20652715000007002 and parameters: {'n_estimators': 426, 'max_depth': 6, 'learning_rate': 0.08193358841425667, 'min_child_weight': 3, 'gamma': 0.17275409991828056, 'subsample': 0.960059771238187, 'colsample_bytree': 0.8223770303262764}. Best is trial 0 with value: 0.20652715000007002.
[I 2025-12-17 00:16:01,928] Trial 1 finished with value: 0.20842799820648414 and parameters: {'n_estimators': 351, 'max_depth': 4, 'learning_rate': 0.042554075238400706, 'min_child_weight': 9, 'gamma': 0.1814390595475885, 'subsample': 0.8335417331636082, 'colsample_bytree': 0.759340293837955}. Best is trial 1 with value: 0.20842799820648414.
[I 2025-12-17 00:17:34,235] Trial 2 finished with value: 0.20534533152316667 and parameters: {'n_estimators': 466, 'max_depth': 5, 'learning_rate': 0.0718425400045305, 'min_child_weight': 3, 'gamma'

Best CV F1: 0.2125025720355904
Best params: {'n_estimators': 230, 'max_depth': 6, 'learning_rate': 0.013400971296775052, 'min_child_weight': 8, 'gamma': 0.15476898811284145, 'subsample': 0.7070080867629875, 'colsample_bytree': 0.8342017919378504}



## Train final model


In [10]:

final_pipe = Pipeline([
    ("sampler", RandomOverSampler(
        sampling_strategy=1.0,
        random_state=42
    )),
    ("encoder", ce.TargetEncoder(cols=cat_cols)),
    ("model", XGBClassifier(
        **study.best_params,
        objective="binary:logistic",
        eval_metric="aucpr",
        random_state=42,
        n_jobs=-1
    ))
])

final_pipe.fit(X_train, y_train)


0,1,2
,steps,"[('sampler', ...), ('encoder', ...), ...]"
,transform_input,
,memory,
,verbose,False

0,1,2
,sampling_strategy,1.0
,random_state,42.0
,shrinkage,

0,1,2
,verbose,0
,cols,"['Payment Method', 'Product Category', ...]"
,drop_invariant,False
,return_df,True
,handle_missing,'value'
,handle_unknown,'value'
,min_samples_leaf,20
,smoothing,10
,hierarchy,

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.8342017919378504
,device,
,early_stopping_rounds,
,enable_categorical,False



## Evaluation on test set


In [11]:
import numpy as np
y_prob=final_pipe.predict_proba(X_train)[:,1]
threshold=np.arange(0.0,1.0,0.05)
f1=0
best_t=0
for t in threshold:
    y_pred=(y_prob>=t).astype(int)
    if(f1_score(y_train,y_pred))>f1:
        f1=f1_score(y_train,y_pred)
        best_t=t
print(best_t, f1)

0.8 0.46371740398233774


In [12]:
y_pred=(final_pipe.predict_proba(X_test)[:,1]>=best_t).astype(int)
from sklearn.metrics import confusion_matrix, average_precision_score,f1_score, accuracy_score, classification_report
print('Average Precision Score:', average_precision_score(y_test, y_pred))
print('F1 Score:', f1_score(y_test, y_pred))
print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test, y_pred))

Average Precision Score: 0.1398921209810808
F1 Score: 0.3236949486421838
[[275491   8815]
 [ 10411   4601]]
0.9357673110203863
              precision    recall  f1-score   support

           0       0.96      0.97      0.97    284306
           1       0.34      0.31      0.32     15012

    accuracy                           0.94    299318
   macro avg       0.65      0.64      0.64    299318
weighted avg       0.93      0.94      0.93    299318

