# Customer Transaction Prediction (PRCP-1003)

Domain: Banking

Objective: Predict whether a customer will make a future transaction.

## Import Required Libraries

In [1]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from xgboost import XGBClassifier


ModuleNotFoundError: No module named 'xgboost'

## Load Dataset

In [None]:

# Keep train.csv in the same directory
df = pd.read_csv("C:\Users\91905\Downloads\train.csv")
df = pd.read_csv("C:\Users\91905\Downloads\test.csv")
df.head()


## Data Understanding

In [None]:

df.shape
df.info()


## Check Missing Values

In [None]:

df.isnull().sum().sum()


## Target Variable Distribution

In [None]:

df['target'].value_counts(normalize=True)


## Data Preprocessing

In [None]:

X = df.drop(columns=['ID_code', 'target'])
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


## Train Models

In [None]:

lr = LogisticRegression(max_iter=1000)
lr.fit(X_train_scaled, y_train)

dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)

rf = RandomForestClassifier(n_estimators=200, random_state=42)
rf.fit(X_train, y_train)

xgb = XGBClassifier(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=6,
    eval_metric='logloss',
    random_state=42
)
xgb.fit(X_train, y_train)


## Model Evaluation

In [None]:

results = pd.DataFrame({
    "Model": ["Logistic Regression", "Decision Tree", "Random Forest", "XGBoost"],
    "Accuracy": [
        accuracy_score(y_test, lr.predict(X_test_scaled)),
        accuracy_score(y_test, dt.predict(X_test)),
        accuracy_score(y_test, rf.predict(X_test)),
        accuracy_score(y_test, xgb.predict(X_test))
    ],
    "F1 Score": [
        f1_score(y_test, lr.predict(X_test_scaled)),
        f1_score(y_test, dt.predict(X_test)),
        f1_score(y_test, rf.predict(X_test)),
        f1_score(y_test, xgb.predict(X_test))
    ],
    "ROC AUC": [
        roc_auc_score(y_test, lr.predict_proba(X_test_scaled)[:,1]),
        roc_auc_score(y_test, dt.predict_proba(X_test)[:,1]),
        roc_auc_score(y_test, rf.predict_proba(X_test)[:,1]),
        roc_auc_score(y_test, xgb.predict_proba(X_test)[:,1])
    ]
})
results


## Conclusion
XGBoost shows the best performance and is recommended for production.