# üõí E-Commerce Consumer Behavior Analysis
## üìä Full Visualization Output Generator

**Generates all 10 visualizations ‚Üí `outputs/visualizations/`**

| # | Output File | Description |
|---|-------------|-------------|
| 1 | `viz_age_distribution.png` | Histogram + KDE overlay |
| 2 | `viz_income_vs_purchase.png` | Box plot + avg spend |
| 3 | `viz_top10_categories.png` | Category frequency & spend |
| 4 | `viz_channel_breakdown.png` | Donut + channel spend |
| 5 | `viz_satisfaction_heatmap.png` | Satisfaction √ó Income heatmap |
| 6 | `viz_loyalty_spending.png` | Loyalty vs spending |
| 7 | `viz_social_media_influence.png` | Social influence analysis |
| 8 | `viz_discount_sensitivity.png` | Discount behaviour |
| 9 | `viz_confusion_matrix.png` | Model metrics |
| 10 | `viz_feature_importance.png` | CatBoost feature importance |

---

## üì¶ Section 1 ‚Äî Install Dependencies

In [None]:
!pip install -q imbalanced-learn catboost scipy
print('‚úÖ Dependencies installed')

## üìö Section 2 ‚Äî Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
import os
from scipy.stats import gaussian_kde

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import (
    accuracy_score, f1_score, confusion_matrix,
    precision_score, recall_score
)

from imblearn.over_sampling import SMOTE
from imblearn.ensemble import BalancedRandomForestClassifier
from catboost import CatBoostClassifier

import warnings
warnings.filterwarnings('ignore')

# ‚îÄ‚îÄ Global dark theme ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
plt.rcParams.update({
    'figure.facecolor':  '#0f1117',
    'axes.facecolor':    '#1a1d27',
    'axes.labelcolor':   '#e0e0e0',
    'xtick.color':       '#b0b0b0',
    'ytick.color':       '#b0b0b0',
    'text.color':        '#e0e0e0',
    'axes.spines.top':   False,
    'axes.spines.right': False,
    'grid.color':        '#2a2d3a',
    'grid.linewidth':    0.6,
    'font.family':       'DejaVu Sans',
})

ACCENT = '#7c6af7'
TEAL   = '#26c6da'
GOLD   = '#ffd54f'
CORAL  = '#ff7043'
GREEN  = '#66bb6a'
SAVE   = dict(dpi=150, bbox_inches='tight', facecolor='#0f1117')

print('‚úÖ All libraries imported successfully!')

## üìÇ Section 3 ‚Äî Upload & Load Dataset

> Run the cell below to upload your CSV file, then it loads automatically.

In [None]:
from google.colab import files

print('üìÅ Please upload your CSV file...')
uploaded = files.upload()

filename = list(uploaded.keys())[0]
df = pd.read_csv(filename)

print(f'\n‚úÖ Dataset loaded: {df.shape[0]} rows √ó {df.shape[1]} columns')
print(f'   File: {filename}')
df.head()

## üßπ Section 4 ‚Äî Data Cleaning

In [None]:
# Clean Purchase_Amount (remove $ and commas)
df['Purchase_Amount'] = (
    df['Purchase_Amount'].astype(str)
      .str.replace(r'[$,]', '', regex=True)
      .astype(float)
)

# Fill missing values
df['Social_Media_Influence'] = df['Social_Media_Influence'].fillna('None')
df['Engagement_with_Ads']    = df['Engagement_with_Ads'].fillna('None')

# Ordered categoricals
df['Income_Level'] = pd.Categorical(
    df['Income_Level'], categories=['Low', 'Middle', 'High'], ordered=True)
df['Discount_Sensitivity'] = pd.Categorical(
    df['Discount_Sensitivity'],
    categories=['Not Sensitive', 'Somewhat Sensitive', 'Very Sensitive'],
    ordered=True)
df['Social_Media_Influence'] = pd.Categorical(
    df['Social_Media_Influence'],
    categories=['None', 'Low', 'Medium', 'High'], ordered=True)

# Booleans
df['Discount_Used']                   = df['Discount_Used'].astype(bool)
df['Customer_Loyalty_Program_Member'] = df['Customer_Loyalty_Program_Member'].astype(bool)

# Derived feature
df['Total_Spend'] = df['Purchase_Amount'] * df['Frequency_of_Purchase']

print('‚úÖ Data cleaning complete')
print(f'   Remaining nulls: {df.isnull().sum().sum()}')
df.dtypes

## üìÅ Section 5 ‚Äî Create Output Directory

In [None]:
OUT = 'outputs/visualizations'
os.makedirs(OUT, exist_ok=True)

def save(name):
    path = f'{OUT}/{name}'
    plt.savefig(path, **SAVE)
    plt.show()
    print(f'  ‚úÖ  Saved ‚Üí {path}\n')

print(f'‚úÖ Output directory ready: {OUT}')

---
## üé® Section 6 ‚Äî Visualization 1: Age Distribution

In [None]:
fig, ax = plt.subplots(figsize=(11, 5))
fig.patch.set_facecolor('#0f1117')
ax.set_facecolor('#1a1d27')

n, bins, patches = ax.hist(
    df['Age'], bins=30, color=ACCENT,
    alpha=0.85, edgecolor='#0f1117', linewidth=0.5, zorder=3
)

# Colour bars by age group
group_colors = {(18,30): TEAL, (30,45): ACCENT, (45,60): GOLD, (60,100): CORAL}
for patch, left in zip(patches, bins[:-1]):
    for (lo, hi), c in group_colors.items():
        if lo <= left < hi:
            patch.set_facecolor(c)

# KDE overlay
kde_x = np.linspace(df['Age'].min(), df['Age'].max(), 300)
kde   = gaussian_kde(df['Age'], bw_method=0.3)
ax2   = ax.twinx()
ax2.plot(kde_x, kde(kde_x), color='white', lw=2, alpha=0.7)
ax2.set_ylabel('Density', color='#b0b0b0')
ax2.tick_params(colors='#b0b0b0')
ax2.set_facecolor('#1a1d27')
ax2.spines['top'].set_visible(False)

ax.axvline(df['Age'].median(), color=GOLD, ls='--', lw=1.5,
           label=f"Median {df['Age'].median():.0f}")
ax.set_title('Customer Age Distribution', fontsize=15,
             fontweight='bold', color='white', pad=14)
ax.set_xlabel('Age', fontsize=12)
ax.set_ylabel('Count', fontsize=12)
ax.xaxis.grid(True, linestyle='--', alpha=0.4, zorder=0)

legend_handles = [mpatches.Patch(color=c, label=f'{lo}‚Äì{hi}')
                  for (lo, hi), c in group_colors.items()]
ax.legend(handles=legend_handles, title='Age Group', fontsize=9,
          facecolor='#1a1d27', edgecolor='none', labelcolor='white',
          title_fontsize=9)

plt.tight_layout()
save('viz_age_distribution.png')

## üé® Section 7 ‚Äî Visualization 2: Income vs Purchase

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
fig.patch.set_facecolor('#0f1117')
fig.suptitle('Income Level vs Purchase Behaviour', fontsize=14,
             fontweight='bold', color='white')

pal          = {'Low': CORAL, 'Middle': GOLD, 'High': GREEN}
income_order = ['Low', 'Middle', 'High']

# Box plot
ax = axes[0]
ax.set_facecolor('#1a1d27')
data_by_income = [df[df['Income_Level'] == g]['Purchase_Amount'].values
                  for g in income_order]
bp = ax.boxplot(data_by_income, patch_artist=True,
                medianprops=dict(color='white', linewidth=2),
                whiskerprops=dict(color='#b0b0b0'),
                capprops=dict(color='#b0b0b0'),
                flierprops=dict(marker='o', markerfacecolor='#555', markersize=3))
for patch, g in zip(bp['boxes'], income_order):
    patch.set_facecolor(pal[g]); patch.set_alpha(0.8)
ax.set_xticklabels(income_order)
ax.set_title('Purchase Amount by Income', fontsize=12, color='white')
ax.set_ylabel('Purchase Amount ($)', fontsize=11)
ax.yaxis.grid(True, linestyle='--', alpha=0.4)

# Avg spend bar
ax = axes[1]
ax.set_facecolor('#1a1d27')
avg  = df.groupby('Income_Level', observed=True)['Total_Spend'].mean()
bars = ax.bar(income_order, [avg[g] for g in income_order],
              color=[pal[g] for g in income_order], alpha=0.85, zorder=3)
for bar in bars:
    h = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2, h + 5,
            f'${h:,.0f}', ha='center', fontsize=11,
            color='white', fontweight='bold')
ax.set_title('Average Total Spend by Income', fontsize=12, color='white')
ax.set_ylabel('Avg Total Spend ($)', fontsize=11)
ax.yaxis.grid(True, linestyle='--', alpha=0.4, zorder=0)

plt.tight_layout()
save('viz_income_vs_purchase.png')

## üé® Section 8 ‚Äî Visualization 3: Top 10 Categories

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
fig.patch.set_facecolor('#0f1117')
fig.suptitle('Top 10 Product Categories', fontsize=14,
             fontweight='bold', color='white')

cat_col = next((c for c in df.columns
                if 'category' in c.lower() or 'product' in c.lower()), None)

if cat_col:
    top10       = df[cat_col].value_counts().head(10)
    grad_colors = plt.cm.plasma(np.linspace(0.2, 0.85, 10))

    # Frequency bars
    ax = axes[0]
    ax.set_facecolor('#1a1d27')
    bars = ax.barh(top10.index[::-1], top10.values[::-1],
                   color=grad_colors, alpha=0.88, zorder=3)
    for bar in bars:
        w = bar.get_width()
        ax.text(w + 2, bar.get_y() + bar.get_height()/2,
                f'{w:,}', va='center', fontsize=9, color='#e0e0e0')
    ax.set_title('Purchase Frequency', fontsize=12, color='white')
    ax.set_xlabel('Number of Purchases', fontsize=11)
    ax.xaxis.grid(True, linestyle='--', alpha=0.4, zorder=0)

    # Avg spend bars
    ax = axes[1]
    ax.set_facecolor('#1a1d27')
    avg_cat = (df[df[cat_col].isin(top10.index)]
               .groupby(cat_col)['Purchase_Amount'].mean()
               .reindex(top10.index))
    bars = ax.barh(avg_cat.index[::-1], avg_cat.values[::-1],
                   color=grad_colors, alpha=0.88, zorder=3)
    for bar in bars:
        w = bar.get_width()
        ax.text(w + 0.5, bar.get_y() + bar.get_height()/2,
                f'${w:.0f}', va='center', fontsize=9, color='#e0e0e0')
    ax.set_title('Avg Purchase Amount', fontsize=12, color='white')
    ax.set_xlabel('Average Purchase ($)', fontsize=11)
    ax.xaxis.grid(True, linestyle='--', alpha=0.4, zorder=0)
else:
    for ax in axes:
        ax.set_facecolor('#1a1d27')
        ax.text(0.5, 0.5, 'No category column detected in dataset',
                ha='center', va='center', color='white', transform=ax.transAxes)

plt.tight_layout()
save('viz_top10_categories.png')

## üé® Section 9 ‚Äî Visualization 4: Channel Breakdown

In [None]:
ch_col = next((c for c in df.columns
               if 'channel' in c.lower() or 'platform' in c.lower()
               or 'purchase_channel' in c.lower()), None)

fig, axes = plt.subplots(1, 2, figsize=(14, 6))
fig.patch.set_facecolor('#0f1117')
fig.suptitle('Purchase Channel Breakdown', fontsize=14,
             fontweight='bold', color='white')

if ch_col:
    ch_counts = df[ch_col].value_counts()
    ch_colors = [ACCENT, TEAL, GOLD, CORAL, GREEN,
                 '#ab47bc', '#ef5350', '#26a69a'][:len(ch_counts)]

    # Donut chart
    ax = axes[0]
    ax.set_facecolor('#0f1117')
    wedges, texts, autotexts = ax.pie(
        ch_counts, labels=ch_counts.index, colors=ch_colors,
        autopct='%1.1f%%', startangle=140,
        wedgeprops=dict(width=0.55, edgecolor='#0f1117', linewidth=2),
        textprops=dict(color='white', fontsize=10))
    for at in autotexts:
        at.set_fontsize(9); at.set_color('#0f1117'); at.set_fontweight('bold')
    ax.set_title('Channel Share', fontsize=12, color='white')

    # Avg spend per channel
    ax = axes[1]
    ax.set_facecolor('#1a1d27')
    avg_ch = df.groupby(ch_col)['Purchase_Amount'].mean().sort_values(ascending=False)
    bars   = ax.bar(avg_ch.index, avg_ch.values,
                    color=ch_colors[:len(avg_ch)], alpha=0.88, zorder=3)
    for bar in bars:
        h = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2, h + 0.5,
                f'${h:.0f}', ha='center', fontsize=10,
                color='white', fontweight='bold')
    ax.set_title('Avg Purchase by Channel', fontsize=12, color='white')
    ax.set_ylabel('Average Purchase ($)', fontsize=11)
    ax.yaxis.grid(True, linestyle='--', alpha=0.4, zorder=0)
    plt.setp(ax.get_xticklabels(), rotation=20, ha='right')
else:
    for ax in axes:
        ax.set_facecolor('#1a1d27')
        ax.text(0.5, 0.5, 'No channel column detected in dataset',
                ha='center', va='center', color='white', transform=ax.transAxes)

plt.tight_layout()
save('viz_channel_breakdown.png')

## üé® Section 10 ‚Äî Visualization 5: Satisfaction Heatmap

In [None]:
sat_col = next((c for c in df.columns if 'satisf' in c.lower()), None)

fig, ax = plt.subplots(figsize=(12, 6))
fig.patch.set_facecolor('#0f1117')
ax.set_facecolor('#1a1d27')

if sat_col:
    pivot = (df.groupby([sat_col, 'Income_Level'], observed=True)['Purchase_Amount']
               .mean().unstack('Income_Level', fill_value=0))
    sns.heatmap(pivot, ax=ax, cmap='plasma', annot=True, fmt='.0f',
                linewidths=0.4, linecolor='#0f1117',
                cbar_kws={'label': 'Avg Purchase ($)'})
    ax.set_title('Avg Purchase Amount: Satisfaction √ó Income Level',
                 fontsize=14, fontweight='bold', color='white', pad=14)
    ax.set_xlabel('Income Level', fontsize=12)
    ax.set_ylabel('Satisfaction Score', fontsize=12)
    plt.setp(ax.get_xticklabels(), rotation=0)
else:
    # Fallback: numeric correlation heatmap
    num_df = df.select_dtypes(include='number').iloc[:, :8]
    corr   = num_df.corr()
    mask   = np.triu(np.ones_like(corr, dtype=bool))
    sns.heatmap(corr, ax=ax, mask=mask, cmap='coolwarm', center=0,
                annot=True, fmt='.2f', linewidths=0.4, linecolor='#0f1117',
                cbar_kws={'label': 'Correlation'})
    ax.set_title('Feature Correlation Heatmap', fontsize=14,
                 fontweight='bold', color='white', pad=14)

plt.tight_layout()
save('viz_satisfaction_heatmap.png')

## üé® Section 11 ‚Äî Visualization 6: Loyalty & Spending

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
fig.patch.set_facecolor('#0f1117')
fig.suptitle('Loyalty Program Members vs Spending', fontsize=14,
             fontweight='bold', color='white')

loy_labels = {True: 'Member', False: 'Non-Member'}
loy_colors = {True: GREEN, False: CORAL}

# Violin plot
ax = axes[0]
ax.set_facecolor('#1a1d27')
data_loy = [df[df['Customer_Loyalty_Program_Member'] == v]['Purchase_Amount'].values
            for v in [False, True]]
parts = ax.violinplot(data_loy, positions=[0, 1], widths=0.6, showmedians=True)
for pc, color in zip(parts['bodies'], [CORAL, GREEN]):
    pc.set_facecolor(color); pc.set_alpha(0.7)
ax.set_xticks([0, 1])
ax.set_xticklabels(['Non-Member', 'Member'])
ax.set_title('Purchase Amount Distribution', fontsize=12, color='white')
ax.set_ylabel('Purchase Amount ($)', fontsize=11)
ax.yaxis.grid(True, linestyle='--', alpha=0.4)

# Scatter: frequency vs total spend
ax = axes[1]
ax.set_facecolor('#1a1d27')
for loy, grp in df.groupby('Customer_Loyalty_Program_Member'):
    ax.scatter(grp['Frequency_of_Purchase'], grp['Total_Spend'],
               color=loy_colors[loy], label=loy_labels[loy],
               alpha=0.4, s=18, zorder=3)
ax.set_title('Frequency vs Total Spend', fontsize=12, color='white')
ax.set_xlabel('Frequency of Purchase', fontsize=11)
ax.set_ylabel('Total Spend ($)', fontsize=11)
ax.legend(fontsize=10, facecolor='#1a1d27', edgecolor='none', labelcolor='white')
ax.xaxis.grid(True, linestyle='--', alpha=0.3)
ax.yaxis.grid(True, linestyle='--', alpha=0.3)

plt.tight_layout()
save('viz_loyalty_spending.png')

## üé® Section 12 ‚Äî Visualization 7: Social Media Influence

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
fig.patch.set_facecolor('#0f1117')
fig.suptitle('Social Media Influence on Purchasing', fontsize=14,
             fontweight='bold', color='white')

sm_order  = ['None', 'Low', 'Medium', 'High']
sm_colors = [CORAL, GOLD, TEAL, GREEN]

# Count bars
ax = axes[0]
ax.set_facecolor('#1a1d27')
sm_counts = df['Social_Media_Influence'].value_counts().reindex(sm_order, fill_value=0)
bars = ax.bar(sm_order, sm_counts.values, color=sm_colors, alpha=0.88, zorder=3)
for bar in bars:
    h = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2, h + 3,
            f'{h:,}', ha='center', fontsize=11, color='white', fontweight='bold')
ax.set_title('Customers by Influence Level', fontsize=12, color='white')
ax.set_ylabel('Number of Customers', fontsize=11)
ax.yaxis.grid(True, linestyle='--', alpha=0.4, zorder=0)

# Avg spend bars
ax = axes[1]
ax.set_facecolor('#1a1d27')
avg_sm = (df.groupby('Social_Media_Influence', observed=True)['Purchase_Amount']
           .mean().reindex(sm_order, fill_value=0))
bars = ax.bar(sm_order, avg_sm.values, color=sm_colors, alpha=0.88, zorder=3)
for bar in bars:
    h = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2, h + 0.5,
            f'${h:.0f}', ha='center', fontsize=11, color='white', fontweight='bold')
ax.set_title('Avg Purchase by Influence Level', fontsize=12, color='white')
ax.set_ylabel('Avg Purchase Amount ($)', fontsize=11)
ax.yaxis.grid(True, linestyle='--', alpha=0.4, zorder=0)

plt.tight_layout()
save('viz_social_media_influence.png')

## üé® Section 13 ‚Äî Visualization 8: Discount Sensitivity

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
fig.patch.set_facecolor('#0f1117')
fig.suptitle('Discount Sensitivity Analysis', fontsize=14,
             fontweight='bold', color='white')

ds_order  = ['Not Sensitive', 'Somewhat Sensitive', 'Very Sensitive']
ds_colors = [CORAL, GOLD, GREEN]

# Discount usage rate
ax = axes[0]
ax.set_facecolor('#1a1d27')
usage = (df.groupby('Discount_Sensitivity', observed=True)['Discount_Used']
           .mean().reindex(ds_order, fill_value=0) * 100)
bars = ax.bar(ds_order, usage.values, color=ds_colors, alpha=0.88, zorder=3)
for bar in bars:
    h = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2, h + 0.5,
            f'{h:.1f}%', ha='center', fontsize=11, color='white', fontweight='bold')
ax.set_title('Discount Usage Rate', fontsize=12, color='white')
ax.set_ylabel('% Who Used a Discount', fontsize=11)
ax.yaxis.grid(True, linestyle='--', alpha=0.4, zorder=0)
plt.setp(ax.get_xticklabels(), rotation=12, ha='right')

# Grouped: discount used vs not
ax = axes[1]
ax.set_facecolor('#1a1d27')
x, w = np.arange(len(ds_order)), 0.35
for i, (disc, color, lbl) in enumerate(
        zip([False, True], [TEAL, ACCENT], ['No Discount', 'Used Discount'])):
    vals = [df[(df['Discount_Sensitivity'] == s) & (df['Discount_Used'] == disc)]
            ['Purchase_Amount'].mean() for s in ds_order]
    ax.bar(x + i * w, vals, w, color=color, alpha=0.85, label=lbl, zorder=3)
ax.set_xticks(x + w / 2)
ax.set_xticklabels(ds_order, rotation=12, ha='right')
ax.set_title('Avg Purchase: Discount Used vs Not', fontsize=12, color='white')
ax.set_ylabel('Avg Purchase Amount ($)', fontsize=11)
ax.legend(fontsize=10, facecolor='#1a1d27', edgecolor='none', labelcolor='white')
ax.yaxis.grid(True, linestyle='--', alpha=0.4, zorder=0)

plt.tight_layout()
save('viz_discount_sensitivity.png')

---
## ‚öôÔ∏è Section 14 ‚Äî ML Pipeline (for Confusion Matrix & Feature Importance)

> This trains the CatBoost model needed for the final two visualizations.

In [None]:
print('‚öôÔ∏è  Building ML pipeline ‚Ä¶')

df_ml = df.copy()
df_ml = df_ml.drop(
    columns=['Customer_ID', 'Location', 'Time_of_Purchase'], errors='ignore'
)
df_ml['Discount_Used']                   = df_ml['Discount_Used'].astype(int)
df_ml['Customer_Loyalty_Program_Member'] = df_ml['Customer_Loyalty_Program_Member'].astype(int)

for col in df_ml.select_dtypes(include=['object', 'category']).columns:
    le = LabelEncoder()
    df_ml[col] = le.fit_transform(df_ml[col].astype(str))

TARGET   = 'Discount_Used'
features = [c for c in df_ml.columns if c not in (TARGET, 'Total_Spend')]
X, y     = df_ml[features], df_ml[TARGET]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)

scaler   = StandardScaler()
X_tr_sc  = scaler.fit_transform(X_train)
X_te_sc  = scaler.transform(X_test)
X_tr_df  = pd.DataFrame(X_tr_sc, columns=features)
X_te_df  = pd.DataFrame(X_te_sc, columns=features)

# Train CatBoost
cw = y_train.value_counts()
catboost_model = CatBoostClassifier(
    iterations=500, learning_rate=0.05, depth=6, l2_leaf_reg=3,
    class_weights=[1, cw[0] / cw[1]], random_state=42,
    verbose=False, task_type='CPU'
)

print('üöÄ Training CatBoost (30‚Äì60 seconds) ‚Ä¶')
catboost_model.fit(X_tr_df, y_train)
y_pred = catboost_model.predict(X_te_df)
acc    = accuracy_score(y_test, y_pred)

print(f'\n‚úÖ CatBoost trained!')
print(f'   Accuracy : {acc*100:.2f}%')
print(f'   F1-Score : {f1_score(y_test, y_pred):.4f}')

## üé® Section 15 ‚Äî Visualization 9: Confusion Matrix & Metrics

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
fig.patch.set_facecolor('#0f1117')
fig.suptitle('CatBoost ‚Äî Confusion Matrix & Performance Metrics',
             fontsize=14, fontweight='bold', color='white')

# Confusion matrix heatmap
ax = axes[0]
ax.set_facecolor('#1a1d27')
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, ax=ax, annot=True, fmt='d', cmap='plasma',
            xticklabels=['No Discount', 'Used Discount'],
            yticklabels=['No Discount', 'Used Discount'],
            linewidths=1, linecolor='#0f1117', cbar=False)
ax.set_title('Confusion Matrix', fontsize=12, color='white')
ax.set_xlabel('Predicted', fontsize=11)
ax.set_ylabel('Actual', fontsize=11)

# Metrics bar chart
ax = axes[1]
ax.set_facecolor('#1a1d27')
metrics = {
    'Accuracy':  acc,
    'Precision': precision_score(y_test, y_pred, zero_division=0),
    'Recall':    recall_score(y_test, y_pred, zero_division=0),
    'F1-Score':  f1_score(y_test, y_pred, zero_division=0),
}
bars = ax.bar(metrics.keys(), metrics.values(),
              color=[ACCENT, TEAL, GOLD, GREEN], alpha=0.88, zorder=3)
for bar in bars:
    h = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2, h + 0.008,
            f'{h:.3f}', ha='center', fontsize=12, color='white', fontweight='bold')
ax.set_ylim(0, 1.12)
ax.set_title('Classification Metrics', fontsize=12, color='white')
ax.set_ylabel('Score', fontsize=11)
ax.yaxis.grid(True, linestyle='--', alpha=0.4, zorder=0)
ax.axhline(0.5, color='#ff4444', ls='--', lw=1.2, label='Random Baseline (50%)')
ax.legend(fontsize=10, facecolor='#1a1d27', edgecolor='none', labelcolor='white')

plt.tight_layout()
save('viz_confusion_matrix.png')

## üé® Section 16 ‚Äî Visualization 10: Feature Importance

In [None]:
importances = catboost_model.feature_importances_
fi_df = (pd.DataFrame({'Feature': features, 'Importance': importances})
           .sort_values('Importance', ascending=False)
           .head(15))

fig, ax = plt.subplots(figsize=(11, 8))
fig.patch.set_facecolor('#0f1117')
ax.set_facecolor('#1a1d27')

bar_colors = [ACCENT]*5 + [TEAL]*5 + ['#546e7a']*5
bars = ax.barh(fi_df['Feature'][::-1], fi_df['Importance'][::-1],
               color=bar_colors[::-1], alpha=0.88, zorder=3)
for bar in bars:
    w = bar.get_width()
    ax.text(w + 0.001, bar.get_y() + bar.get_height()/2,
            f'{w:.4f}', va='center', fontsize=9, color='#e0e0e0')

ax.set_title('Top 15 Feature Importances ‚Äî CatBoost Model',
             fontsize=14, fontweight='bold', color='white', pad=15)
ax.set_xlabel('Importance Score', fontsize=11)
ax.xaxis.grid(True, linestyle='--', alpha=0.4, zorder=0)

legend_handles = [
    mpatches.Patch(color=ACCENT,    label='Top 1‚Äì5'),
    mpatches.Patch(color=TEAL,      label='Top 6‚Äì10'),
    mpatches.Patch(color='#546e7a', label='Top 11‚Äì15'),
]
ax.legend(handles=legend_handles, fontsize=10, facecolor='#1a1d27',
          edgecolor='none', labelcolor='white', loc='lower right')

plt.tight_layout()
save('viz_feature_importance.png')

---
## üì• Section 17 ‚Äî Download All Visualizations as ZIP

In [None]:
import shutil
from google.colab import files

zip_path = 'visualizations_output'
shutil.make_archive(zip_path, 'zip', 'outputs/visualizations')

print('\n' + '='*60)
print('üéâ  ALL 10 VISUALIZATIONS COMPLETE!')
print('='*60)

viz_files = [
    'viz_age_distribution.png',
    'viz_income_vs_purchase.png',
    'viz_top10_categories.png',
    'viz_channel_breakdown.png',
    'viz_satisfaction_heatmap.png',
    'viz_loyalty_spending.png',
    'viz_social_media_influence.png',
    'viz_discount_sensitivity.png',
    'viz_confusion_matrix.png',
    'viz_feature_importance.png',
]
for i, f in enumerate(viz_files, 1):
    print(f'  {i:02d}. ‚úÖ  {f}')

print('\nüì¶ Downloading ZIP archive ‚Ä¶')
files.download('visualizations_output.zip')