# E-Commerce Purchase Prediction - Exploratory Data Analysis

Bu notebook, veri keşfi ve temel istatistikleri içerir.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
sys.path.append('..')

from src.utils.config import TRAIN_PROCESSED, VAL_PROCESSED, TEST_PROCESSED

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## 1. Veri Yükleme

In [None]:
train_df = pd.read_parquet(TRAIN_PROCESSED)
val_df = pd.read_parquet(VAL_PROCESSED)
test_df = pd.read_parquet(TEST_PROCESSED)

print(f"Train: {train_df.shape}")
print(f"Val: {val_df.shape}")
print(f"Test: {test_df.shape}")

train_df.head()

## 2. Target Dağılımı

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

for idx, (df, name) in enumerate([(train_df, 'Train'), (val_df, 'Val'), (test_df, 'Test')]):
    ax = axes[idx]
    df['target'].value_counts().plot(kind='bar', ax=ax)
    ax.set_title(f'{name} - Target Distribution')
    ax.set_xlabel('Target')
    ax.set_ylabel('Count')
    pos_rate = df['target'].mean()
    ax.text(0.5, 0.95, f'Positive: {pos_rate:.2%}', transform=ax.transAxes, ha='center')

plt.tight_layout()
plt.show()

## 3. Feature Dağılımları

In [None]:
numeric_features = ['n_events', 'n_unique_products', 'price_mean', 'session_duration_seconds']

fig, axes = plt.subplots(2, 2, figsize=(14, 10))
axes = axes.ravel()

for idx, feature in enumerate(numeric_features):
    ax = axes[idx]
    train_df[train_df['target'] == 0][feature].hist(ax=ax, bins=50, alpha=0.5, label='No Purchase', density=True)
    train_df[train_df['target'] == 1][feature].hist(ax=ax, bins=50, alpha=0.5, label='Purchase', density=True)
    ax.set_xlabel(feature)
    ax.set_ylabel('Density')
    ax.legend()

plt.tight_layout()
plt.show()

## 4. Özet

In [None]:
print("Dataset Summary")
print(f"Total sessions: {len(train_df):,}")
print(f"Positive rate: {train_df['target'].mean():.2%}")
print(f"Avg events: {train_df['n_events'].mean():.2f}")
print(f"Avg price: ${train_df['price_mean'].mean():.2f}")