# Retail Layout Optimization System
## Using UWB Tracking + POS Data for Store Layout Decisions

This notebook implements a complete layout optimization system that:
- Predicts purchase probability from exposure (PyTorch models)
- Estimates counterfactual impacts of layout changes
- Optimizes SKU placement to maximize profit
- Generates actionable suggestions with ROI estimates

**Sections:**
1. Data Generation & Understanding
2. KPI Computation
3. Predictive Models (PyTorch)
4. Counterfactual Estimation
5. Optimization
6. Suggestion Generation
7. Validation & Visualization

In [None]:
# Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass
from datetime import datetime, timedelta
import json
from scipy.optimize import linear_sum_assignment
from collections import defaultdict

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

## 1. Data Structures & Generation

We'll create synthetic but realistic data matching the spec:
- Store with zones (aisles, bays, endcaps)
- SKUs with different margins and categories
- Shopping sessions with trajectories
- Purchase transactions

In [None]:
@dataclass
class Zone:
    """Store zone (aisle, bay, endcap)"""
    zone_id: str
    version: int
    zone_type: str  # 'aisle', 'endcap', 'checkout'
    capacity_facings: int
    has_refrigeration: bool
    distance_to_checkout_m: float
    neighbors: List[str]
    area_m2: float
    
    @property
    def versioned_id(self) -> str:
        return f"{self.zone_id}@v{self.version}"

@dataclass
class SKU:
    """Product SKU"""
    sku_id: str
    category: str
    subcategory: str
    price: float
    margin: float  # profit per unit
    requires_refrigeration: bool
    facings_required: int
    velocity: float  # baseline sales rate
    
@dataclass
class ExposureEvent:
    """Shopper exposure to a zone/SKU"""
    session_id: str
    zone_id: str
    sku_id: Optional[str]
    start_ts: datetime
    end_ts: datetime
    dwell_s: float
    visibility_score: float  # 0-1
    
@dataclass
class Purchase:
    """Purchase transaction"""
    basket_id: str
    session_id: str
    sku_id: str
    qty: int
    unit_price: float
    margin: float
    checkout_ts: datetime

In [None]:
# Generate synthetic store layout
def generate_store_layout(n_zones=20) -> Dict[str, Zone]:
    """Create a synthetic store with zones"""
    zones = {}
    
    # Create aisles (main zones)
    for i in range(n_zones - 5):
        zone_id = f"Z-aisle{i//3}-bay{i%3}"
        zones[zone_id] = Zone(
            zone_id=zone_id,
            version=1,
            zone_type='aisle',
            capacity_facings=np.random.randint(20, 50),
            has_refrigeration=(i < 5),  # First few zones refrigerated
            distance_to_checkout_m=np.random.uniform(10, 50),
            neighbors=[],  # Will populate later
            area_m2=np.random.uniform(15, 30)
        )
    
    # Create endcaps
    for i in range(4):
        zone_id = f"Z-endcap{i}"
        zones[zone_id] = Zone(
            zone_id=zone_id,
            version=1,
            zone_type='endcap',
            capacity_facings=10,
            has_refrigeration=False,
            distance_to_checkout_m=np.random.uniform(5, 30),
            neighbors=[],
            area_m2=np.random.uniform(5, 10)
        )
    
    # Add checkout zone
    zones['Z-checkout'] = Zone(
        zone_id='Z-checkout',
        version=1,
        zone_type='checkout',
        capacity_facings=0,
        has_refrigeration=False,
        distance_to_checkout_m=0,
        neighbors=[],
        area_m2=20
    )
    
    # Populate neighbors (adjacent zones)
    zone_ids = list(zones.keys())
    for i, zone_id in enumerate(zone_ids[:-1]):
        # Connect to next zone
        zones[zone_id].neighbors.append(zone_ids[i+1])
        zones[zone_ids[i+1]].neighbors.append(zone_id)
    
    return zones

# Generate SKU catalog
def generate_sku_catalog(n_skus=100) -> Dict[str, SKU]:
    """Create synthetic SKU catalog"""
    categories = ['dairy', 'bakery', 'frozen', 'snacks', 'beverages', 'produce']
    skus = {}
    
    for i in range(n_skus):
        category = np.random.choice(categories)
        price = np.random.uniform(2, 20)
        margin = price * np.random.uniform(0.15, 0.35)  # 15-35% margin
        
        sku_id = f"SKU{i:03d}"
        skus[sku_id] = SKU(
            sku_id=sku_id,
            category=category,
            subcategory=f"{category}_sub{i%3}",
            price=price,
            margin=margin,
            requires_refrigeration=(category in ['dairy', 'frozen']),
            facings_required=np.random.randint(1, 4),
            velocity=np.random.exponential(10)  # sales per week
        )
    
    return skus

# Generate store data
zones = generate_store_layout(n_zones=20)
skus = generate_sku_catalog(n_skus=100)

print(f"Generated {len(zones)} zones and {len(skus)} SKUs")
print(f"\nExample zone: {list(zones.values())[0]}")
print(f"\nExample SKU: {list(skus.values())[0]}")

In [None]:
# Generate shopping sessions with trajectories
def generate_sessions(zones: Dict[str, Zone], skus: Dict[str, SKU], 
                     n_sessions=1000) -> Tuple[List[ExposureEvent], List[Purchase]]:
    """Generate synthetic shopping sessions with exposures and purchases"""
    exposures = []
    purchases = []
    zone_ids = list(zones.keys())
    sku_ids = list(skus.keys())
    
    for sess_idx in range(n_sessions):
        session_id = f"sess_{sess_idx}"
        basket_id = f"basket_{sess_idx}"
        
        # Simulate shopping path (random walk through zones)
        n_zones_visited = np.random.randint(3, 10)
        current_time = datetime.now()
        
        visited_zones = []
        for _ in range(n_zones_visited):
            # Pick a zone
            zone_id = np.random.choice(zone_ids[:-1])  # Exclude checkout
            visited_zones.append(zone_id)
            
            # Generate dwell time (stop if >= 8s)
            dwell_s = np.random.exponential(12)
            visibility = np.random.beta(5, 2)  # Skewed towards higher visibility
            
            # Create exposure event
            end_time = current_time + timedelta(seconds=dwell_s)
            exposures.append(ExposureEvent(
                session_id=session_id,
                zone_id=zone_id,
                sku_id=None,  # Zone-level exposure
                start_ts=current_time,
                end_ts=end_time,
                dwell_s=dwell_s,
                visibility_score=visibility
            ))
            
            current_time = end_time
        
        # Simulate purchases (conversion depends on exposures)
        n_purchases = np.random.poisson(3)  # Average 3 items per basket
        for _ in range(n_purchases):
            sku_id = np.random.choice(sku_ids)
            sku = skus[sku_id]
            
            purchases.append(Purchase(
                basket_id=basket_id,
                session_id=session_id,
                sku_id=sku_id,
                qty=1,
                unit_price=sku.price,
                margin=sku.margin,
                checkout_ts=current_time
            ))
    
    return exposures, purchases

# Generate data
exposures, purchases = generate_sessions(zones, skus, n_sessions=1000)

print(f"Generated {len(exposures)} exposure events and {len(purchases)} purchases")
print(f"\nExample exposure: {exposures[0]}")
print(f"\nExample purchase: {purchases[0]}")

## 2. KPI Computation

Calculate key performance indicators per zone:
- **Reach**: % of sessions visiting zone
- **StopRate**: % of visits that are "stops" (dwell >= threshold)
- **EngageDwell**: Median dwell time on stops
- **Conv_{p|z}**: Purchase probability given exposure

In [None]:
def compute_zone_kpis(exposures: List[ExposureEvent], purchases: List[Purchase],
                      zones: Dict[str, Zone], stop_threshold_s=8.0) -> pd.DataFrame:
    """Compute KPIs for each zone"""
    
    # Convert to dataframes for easier analysis
    exp_df = pd.DataFrame([
        {
            'session_id': e.session_id,
            'zone_id': e.zone_id,
            'dwell_s': e.dwell_s,
            'visibility': e.visibility_score,
            'is_stop': e.dwell_s >= stop_threshold_s
        } for e in exposures
    ])
    
    # Total sessions
    total_sessions = exp_df['session_id'].nunique()
    
    # Compute KPIs per zone
    zone_kpis = []
    for zone_id in zones.keys():
        zone_exp = exp_df[exp_df['zone_id'] == zone_id]
        
        if len(zone_exp) == 0:
            # No visits to this zone
            zone_kpis.append({
                'zone_id': zone_id,
                'reach': 0.0,
                'stop_rate': 0.0,
                'engage_dwell_s': 0.0,
                'total_visits': 0
            })
            continue
        
        # Reach: unique sessions / total sessions
        reach = zone_exp['session_id'].nunique() / total_sessions
        
        # Stop rate: stops / all visits
        stop_rate = zone_exp['is_stop'].mean()
        
        # Engage dwell: median dwell on stops
        stops = zone_exp[zone_exp['is_stop']]
        engage_dwell = stops['dwell_s'].median() if len(stops) > 0 else 0.0
        
        zone_kpis.append({
            'zone_id': zone_id,
            'reach': reach,
            'stop_rate': stop_rate,
            'engage_dwell_s': engage_dwell,
            'total_visits': len(zone_exp)
        })
    
    return pd.DataFrame(zone_kpis)

# Compute KPIs
zone_kpis = compute_zone_kpis(exposures, purchases, zones)
print("\nZone KPIs:")
print(zone_kpis.head(10))
print(f"\nAverage reach: {zone_kpis['reach'].mean():.2%}")
print(f"Average stop rate: {zone_kpis['stop_rate'].mean():.2%}")

In [None]:
# Visualize KPIs
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Reach distribution
axes[0, 0].bar(range(len(zone_kpis)), zone_kpis['reach'].values)
axes[0, 0].set_title('Zone Reach (% Sessions Visiting)')
axes[0, 0].set_xlabel('Zone')
axes[0, 0].set_ylabel('Reach')
axes[0, 0].axhline(zone_kpis['reach'].mean(), color='r', linestyle='--', label='Average')
axes[0, 0].legend()

# Stop rate
axes[0, 1].bar(range(len(zone_kpis)), zone_kpis['stop_rate'].values, color='orange')
axes[0, 1].set_title('Zone Stop Rate')
axes[0, 1].set_xlabel('Zone')
axes[0, 1].set_ylabel('Stop Rate')
axes[0, 1].axhline(zone_kpis['stop_rate'].mean(), color='r', linestyle='--', label='Average')
axes[0, 1].legend()

# Engage dwell
axes[1, 0].bar(range(len(zone_kpis)), zone_kpis['engage_dwell_s'].values, color='green')
axes[1, 0].set_title('Median Engage Dwell (seconds)')
axes[1, 0].set_xlabel('Zone')
axes[1, 0].set_ylabel('Dwell (s)')

# Reach vs Stop Rate scatter
axes[1, 1].scatter(zone_kpis['reach'], zone_kpis['stop_rate'], s=100, alpha=0.6)
axes[1, 1].set_title('Reach vs Stop Rate')
axes[1, 1].set_xlabel('Reach')
axes[1, 1].set_ylabel('Stop Rate')
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\nKey Insights:")
print(f"- High reach zones: {zone_kpis.nlargest(3, 'reach')['zone_id'].tolist()}")
print(f"- High stop rate zones: {zone_kpis.nlargest(3, 'stop_rate')['zone_id'].tolist()}")
print(f"- Low performing zones (low reach & low stop): ", end='')
low_perf = zone_kpis[(zone_kpis['reach'] < zone_kpis['reach'].quantile(0.25)) & 
                     (zone_kpis['stop_rate'] < zone_kpis['stop_rate'].quantile(0.25))]
print(low_perf['zone_id'].tolist()[:3])

## 3. Predictive Models (PyTorch)

We'll implement the core ML models:
1. **Exposure→Purchase Model**: Predicts `P(purchase | exposure, context)`
2. **Next-Zone Forecaster**: Predicts next zone visit (sequence model)
3. **Complementarity Matrix**: Co-occurrence analysis

### 3.1 Exposure→Purchase Prediction Model

In [None]:
# Prepare training data for conversion model
def prepare_conversion_data(exposures: List[ExposureEvent], 
                           purchases: List[Purchase],
                           zones: Dict[str, Zone],
                           skus: Dict[str, SKU]) -> pd.DataFrame:
    """Create training dataset: exposure features → purchase label"""
    
    # Create session-level purchase sets
    session_purchases = defaultdict(set)
    for p in purchases:
        session_purchases[p.session_id].add(p.sku_id)
    
    # Generate training samples
    samples = []
    
    # For each exposure, check if that SKU was purchased
    # We'll create synthetic SKU-level exposures
    for exp in exposures:
        # Sample random SKUs "visible" in this zone
        n_skus_in_zone = np.random.randint(3, 10)
        zone_skus = np.random.choice(list(skus.keys()), size=n_skus_in_zone, replace=False)
        
        for sku_id in zone_skus:
            sku = skus[sku_id]
            zone = zones[exp.zone_id]
            
            # Check if purchased
            purchased = int(sku_id in session_purchases[exp.session_id])
            
            # Extract features
            samples.append({
                'session_id': exp.session_id,
                'sku_id': sku_id,
                'zone_id': exp.zone_id,
                # Exposure features
                'dwell_s': exp.dwell_s,
                'visibility': exp.visibility_score,
                # Zone features
                'distance_to_checkout': zone.distance_to_checkout_m,
                'zone_area': zone.area_m2,
                'is_endcap': int(zone.zone_type == 'endcap'),
                # SKU features
                'price': sku.price,
                'margin': sku.margin,
                'velocity': sku.velocity,
                # Target
                'purchased': purchased
            })
    
    return pd.DataFrame(samples)

# Generate training data
conversion_data = prepare_conversion_data(exposures, purchases, zones, skus)
print(f"\nConversion dataset: {len(conversion_data)} samples")
print(f"Purchase rate: {conversion_data['purchased'].mean():.2%}")
print("\nFeature statistics:")
print(conversion_data[['dwell_s', 'visibility', 'price', 'margin']].describe())