# üß† E-commerce Purchase Intention Prediction

This notebook predicts whether an online shopper will make a purchase based on browsing behavior.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

## üìÇ Load Dataset

In [None]:
df = pd.read_csv('../data/online_shoppers_intention.csv')
df.head()

## üîç Data Overview

In [None]:
df.info()
df.isnull().sum()

## üîÑ Encode Categorical Features

In [None]:
le = LabelEncoder()

categorical_cols = ['Month', 'VisitorType', 'Weekend', 'Revenue']
for col in categorical_cols:
    df[col] = le.fit_transform(df[col])

## üéØ Feature & Target Split

In [None]:
X = df.drop('Revenue', axis=1)
y = df['Revenue']

## üîÄ Train-Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

## ‚öñÔ∏è Feature Scaling

In [None]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## ü§ñ Model Training

In [None]:
lr = LogisticRegression()
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)

dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
dt_pred = dt.predict(X_test)

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)

## üìä Model Evaluation

In [None]:
models = {
    'Logistic Regression': lr_pred,
    'Decision Tree': dt_pred,
    'Random Forest': rf_pred
}

for name, pred in models.items():
    print(name)
    print('Accuracy:', accuracy_score(y_test, pred))
    print(classification_report(y_test, pred))
    print('-' * 40)

## üîç Confusion Matrix (Random Forest)

In [None]:
sns.heatmap(confusion_matrix(y_test, rf_pred), annot=True, fmt='d')
plt.title('Random Forest Confusion Matrix')
plt.show()