# Retail Vision Analytics - Data Exploration

This notebook demonstrates how to analyze customer behavior data from the retail vision analytics system.

## Contents
1. Loading Sample Data
2. Customer Journey Analysis
3. Queue Metrics Visualization
4. Heatmap Analysis
5. Conversion Funnel

In [None]:
# Import libraries
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

print('Libraries loaded successfully!')

## 1. Loading Sample Data

First, let's load the sample data generated by our analytics system.

In [None]:
# Load sample data
DATA_DIR = '../data/sample/'

# Load journeys
with open(f'{DATA_DIR}/journeys.json', 'r') as f:
    journeys = json.load(f)
journeys_df = pd.DataFrame(journeys)

# Load queue metrics
with open(f'{DATA_DIR}/queue_metrics.json', 'r') as f:
    queue_metrics = json.load(f)
queue_df = pd.DataFrame(queue_metrics)

# Load alerts
with open(f'{DATA_DIR}/alerts.json', 'r') as f:
    alerts = json.load(f)
alerts_df = pd.DataFrame(alerts)

print(f'Loaded {len(journeys_df)} journeys')
print(f'Loaded {len(queue_df)} queue metrics')
print(f'Loaded {len(alerts_df)} alerts')

## 2. Customer Journey Analysis

In [None]:
# Journey duration distribution
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Duration histogram
axes[0].hist(journeys_df['duration_seconds'] / 60, bins=30, edgecolor='white')
axes[0].set_xlabel('Duration (minutes)')
axes[0].set_ylabel('Count')
axes[0].set_title('Customer Visit Duration Distribution')

# Conversion rate
conversion_rate = journeys_df['converted'].mean() * 100
axes[1].pie([conversion_rate, 100-conversion_rate], 
            labels=['Converted', 'Not Converted'],
            autopct='%1.1f%%',
            colors=['#2ecc71', '#e74c3c'])
axes[1].set_title(f'Conversion Rate: {conversion_rate:.1f}%')

plt.tight_layout()
plt.show()

In [None]:
# Zone popularity analysis
all_zones = []
for zones in journeys_df['zones_visited']:
    all_zones.extend(zones)

zone_counts = pd.Series(all_zones).value_counts()

plt.figure(figsize=(10, 6))
zone_counts.plot(kind='bar', color='steelblue', edgecolor='white')
plt.xlabel('Zone')
plt.ylabel('Visit Count')
plt.title('Zone Popularity')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 3. Queue Metrics Visualization

In [None]:
# Convert timestamp
queue_df['timestamp'] = pd.to_datetime(queue_df['timestamp'])
queue_df['hour'] = queue_df['timestamp'].dt.hour

# Queue length over time
fig, axes = plt.subplots(2, 1, figsize=(14, 8))

# Average queue length by hour
hourly_queue = queue_df.groupby('hour')['queue_length'].mean()
axes[0].plot(hourly_queue.index, hourly_queue.values, marker='o', linewidth=2)
axes[0].fill_between(hourly_queue.index, hourly_queue.values, alpha=0.3)
axes[0].set_xlabel('Hour of Day')
axes[0].set_ylabel('Avg Queue Length')
axes[0].set_title('Average Queue Length by Hour')
axes[0].set_xticks(range(0, 24))

# Wait time distribution
axes[1].hist(queue_df['avg_wait_time_seconds'] / 60, bins=30, edgecolor='white')
axes[1].axvline(x=2, color='orange', linestyle='--', label='2 min target')
axes[1].axvline(x=5, color='red', linestyle='--', label='5 min critical')
axes[1].set_xlabel('Wait Time (minutes)')
axes[1].set_ylabel('Count')
axes[1].set_title('Wait Time Distribution')
axes[1].legend()

plt.tight_layout()
plt.show()

## 4. Heatmap Visualization

In [None]:
# Load heatmap data
with open(f'{DATA_DIR}/heatmap.json', 'r') as f:
    heatmap_data = json.load(f)

heatmap_array = np.array(heatmap_data['data'])

plt.figure(figsize=(12, 8))
plt.imshow(heatmap_array, cmap='hot', interpolation='gaussian')
plt.colorbar(label='Traffic Intensity')
plt.title('Store Traffic Heatmap')
plt.xlabel('X Position')
plt.ylabel('Y Position')

# Mark hotspots
for hotspot in heatmap_data['hotspots']:
    x = hotspot['x'] * heatmap_array.shape[1]
    y = hotspot['y'] * heatmap_array.shape[0]
    plt.scatter(x, y, c='cyan', s=100, marker='*', edgecolors='white')

plt.tight_layout()
plt.show()

## 5. Conversion Funnel Analysis

In [None]:
# Calculate funnel stages
total_visitors = len(journeys_df)
browsed = len(journeys_df[journeys_df['zones_visited'].apply(lambda x: len(x) > 1)])
engaged = len(journeys_df[journeys_df['duration_seconds'] > 300])  # > 5 min
with_cart = len(journeys_df[journeys_df['cart_detected'] == True])
converted = len(journeys_df[journeys_df['converted'] == True])

funnel_stages = ['Entered', 'Browsed', 'Engaged (>5min)', 'Used Cart', 'Converted']
funnel_values = [total_visitors, browsed, engaged, with_cart, converted]

# Create funnel chart
fig, ax = plt.subplots(figsize=(10, 8))

colors = plt.cm.Blues(np.linspace(0.3, 0.9, len(funnel_stages)))

for i, (stage, value) in enumerate(zip(funnel_stages, funnel_values)):
    width = value / total_visitors
    ax.barh(i, width, color=colors[len(funnel_stages)-1-i], edgecolor='white', height=0.7)
    ax.text(width + 0.02, i, f'{value} ({value/total_visitors*100:.1f}%)', va='center')

ax.set_yticks(range(len(funnel_stages)))
ax.set_yticklabels(funnel_stages)
ax.set_xlabel('Proportion of Visitors')
ax.set_title('Customer Conversion Funnel')
ax.invert_yaxis()
ax.set_xlim(0, 1.3)

plt.tight_layout()
plt.show()

## Summary Statistics

In [None]:
# Print summary
print('=' * 50)
print('RETAIL ANALYTICS SUMMARY')
print('=' * 50)
print(f'\nTotal Visitors: {total_visitors}')
print(f'Conversion Rate: {converted/total_visitors*100:.1f}%')
print(f'Cart Usage Rate: {with_cart/total_visitors*100:.1f}%')
print(f'Avg Visit Duration: {journeys_df["duration_seconds"].mean()/60:.1f} minutes')
print(f'\nQueue Performance:')
print(f'  Avg Queue Length: {queue_df["queue_length"].mean():.1f}')
print(f'  Avg Wait Time: {queue_df["avg_wait_time_seconds"].mean()/60:.1f} minutes')
print(f'  Total Abandonments: {queue_df["abandonment_count"].sum()}')
print(f'\nAlerts Generated: {len(alerts_df)}')
print(f'  Critical: {len(alerts_df[alerts_df["severity"] == "critical"])}')
print(f'  Warning: {len(alerts_df[alerts_df["severity"] == "warning"])}')
print(f'  Info: {len(alerts_df[alerts_df["severity"] == "info"])}')