# Experiment Sampling: Error Analysis

Compare actual vs AI estimated values with proper dimension handling.

**Key Insight:**
- Volume alone can hide errors (1x20x1 vs 2x5x2 = same volume 20, different shape)
- Solution: Sort dimensions (L >= W >= H) and compare individually

**Metrics:**
- Weight error
- Volume error
- L/W/H individual errors (sorted dimensions)
- Average dimension error

**Sample Sets:**
- Error 0.5 ~ 1.0 (50-100%)
- Error > 1.0 (over 100%)

## 1. Setup

In [None]:
!pip install -q google-cloud-bigquery pandas matplotlib seaborn

In [None]:
from google.colab import auth
auth.authenticate_user()

from google.cloud import bigquery
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

PROJECT_ID = "sazoshop"
client = bigquery.Client(project=PROJECT_ID)

def run_query(sql):
    return client.query(sql).to_dataframe()

print("✅ Ready")

## 2. Load All Data with Error Metrics

In [None]:
# Main query: Calculate all error metrics with sorted dimensions
QUERY = """
WITH single_item_orders AS (
  SELECT order_item_order_id
  FROM `sazoshop.firestore_snapshot.v2_order_items`
  GROUP BY order_item_order_id
  HAVING COUNT(DISTINCT order_item_id) = 1
),

base_data AS (
  SELECT
    oi.order_item_order_id,
    oi.order_item_title_origin AS title,
    oi.order_item_product_version_info_category AS category,
    ARRAY_TO_STRING(oi.order_item_product_version_thumbnail_urls, '|') AS thumbnail_urls,
    
    -- Actual measurements (from KSE)
    kse.actual_weight,
    CAST(SPLIT(kse.dimensions, 'x')[OFFSET(0)] AS FLOAT64) AS actual_d1,
    CAST(SPLIT(kse.dimensions, 'x')[OFFSET(1)] AS FLOAT64) AS actual_d2,
    CAST(SPLIT(kse.dimensions, 'x')[OFFSET(2)] AS FLOAT64) AS actual_d3,
    kse.dimensions AS actual_dimensions_raw,
    
    -- AI estimated measurements
    SAFE_CAST(JSON_EXTRACT_SCALAR(oi.order_item_product_version_extra, '$.weight') AS FLOAT64) AS ai_weight,
    SAFE_CAST(JSON_EXTRACT_SCALAR(oi.order_item_product_version_extra, '$.width') AS FLOAT64) AS ai_d1,
    SAFE_CAST(JSON_EXTRACT_SCALAR(oi.order_item_product_version_extra, '$.depth') AS FLOAT64) AS ai_d2,
    SAFE_CAST(JSON_EXTRACT_SCALAR(oi.order_item_product_version_extra, '$.height') AS FLOAT64) AS ai_d3,
    
    kse.shipping_date
    
  FROM `sazoshop.firestore_snapshot.v2_order_items` oi
  INNER JOIN `sazoshop.firestore_collection.v2_kse_cost` kse
    ON oi.order_item_order_id = kse.order_id
  WHERE 
    oi.order_item_order_id IN (SELECT order_item_order_id FROM single_item_orders)
    AND kse.dimensions IS NOT NULL
    AND kse.actual_weight IS NOT NULL
    AND kse.actual_weight > 0
    AND kse.actual_weight < 100
    AND REGEXP_CONTAINS(kse.dimensions, r'^[0-9.]+x[0-9.]+x[0-9.]+$')
    AND oi.order_item_title_origin IS NOT NULL
    AND JSON_EXTRACT_SCALAR(oi.order_item_product_version_extra, '$.weight') IS NOT NULL
),

sorted_dimensions AS (
  SELECT
    *,
    -- Sort actual dimensions (descending: L >= W >= H)
    GREATEST(actual_d1, actual_d2, actual_d3) AS actual_L,
    (actual_d1 + actual_d2 + actual_d3) 
      - GREATEST(actual_d1, actual_d2, actual_d3) 
      - LEAST(actual_d1, actual_d2, actual_d3) AS actual_W,
    LEAST(actual_d1, actual_d2, actual_d3) AS actual_H,
    
    -- Sort AI dimensions (descending: L >= W >= H)
    GREATEST(IFNULL(ai_d1,0), IFNULL(ai_d2,0), IFNULL(ai_d3,0)) AS ai_L,
    (IFNULL(ai_d1,0) + IFNULL(ai_d2,0) + IFNULL(ai_d3,0)) 
      - GREATEST(IFNULL(ai_d1,0), IFNULL(ai_d2,0), IFNULL(ai_d3,0)) 
      - LEAST(IFNULL(ai_d1,0), IFNULL(ai_d2,0), IFNULL(ai_d3,0)) AS ai_W,
    LEAST(IFNULL(ai_d1,0), IFNULL(ai_d2,0), IFNULL(ai_d3,0)) AS ai_H,
    
    -- Calculate volumes
    (actual_d1 * actual_d2 * actual_d3) / 1000000 AS actual_volume_m3,
    (IFNULL(ai_d1,0) * IFNULL(ai_d2,0) * IFNULL(ai_d3,0)) / 1000000 AS ai_volume_m3
    
  FROM base_data
),

with_errors AS (
  SELECT
    *,
    -- Weight error
    CASE WHEN actual_weight > 0 
      THEN ABS(ai_weight - actual_weight) / actual_weight 
      ELSE NULL END AS weight_error,
    
    -- Volume error
    CASE WHEN actual_volume_m3 > 0 
      THEN ABS(ai_volume_m3 - actual_volume_m3) / actual_volume_m3 
      ELSE NULL END AS volume_error,
    
    -- Individual dimension errors (sorted)
    CASE WHEN actual_L > 0 
      THEN ABS(ai_L - actual_L) / actual_L 
      ELSE NULL END AS L_error,
    CASE WHEN actual_W > 0 
      THEN ABS(ai_W - actual_W) / actual_W 
      ELSE NULL END AS W_error,
    CASE WHEN actual_H > 0 
      THEN ABS(ai_H - actual_H) / actual_H 
      ELSE NULL END AS H_error,
    
    -- Combined dimension error (average of L/W/H errors)
    (
      CASE WHEN actual_L > 0 THEN ABS(ai_L - actual_L) / actual_L ELSE 0 END +
      CASE WHEN actual_W > 0 THEN ABS(ai_W - actual_W) / actual_W ELSE 0 END +
      CASE WHEN actual_H > 0 THEN ABS(ai_H - actual_H) / actual_H ELSE 0 END
    ) / 3 AS avg_dim_error
    
  FROM sorted_dimensions
  WHERE ai_d1 IS NOT NULL
)

SELECT
  order_item_order_id,
  title,
  category,
  thumbnail_urls,
  
  -- Actual (sorted)
  actual_weight,
  actual_L, actual_W, actual_H,
  ROUND(actual_volume_m3, 6) AS actual_volume_m3,
  
  -- AI estimated (sorted)
  ai_weight,
  ai_L, ai_W, ai_H,
  ROUND(ai_volume_m3, 6) AS ai_volume_m3,
  
  -- Errors
  ROUND(weight_error, 3) AS weight_error,
  ROUND(volume_error, 3) AS volume_error,
  ROUND(L_error, 3) AS L_error,
  ROUND(W_error, 3) AS W_error,
  ROUND(H_error, 3) AS H_error,
  ROUND(avg_dim_error, 3) AS avg_dim_error,
  
  shipping_date

FROM with_errors
ORDER BY shipping_date DESC
"""

df = run_query(QUERY)
print(f"✅ Loaded {len(df):,} records with error metrics")
df.head()

## 3. Error Distribution Overview

In [None]:
# Summary statistics
print("=== Error Statistics ===")
error_cols = ['weight_error', 'volume_error', 'L_error', 'W_error', 'H_error', 'avg_dim_error']
df[error_cols].describe().round(3)

In [None]:
# Count by error range
def categorize_error(x):
    if pd.isna(x):
        return 'N/A'
    elif x < 0.5:
        return '< 50%'
    elif x < 1.0:
        return '50-100%'
    else:
        return '> 100%'

print("=== Error Category Counts ===")
for col in ['weight_error', 'volume_error', 'avg_dim_error']:
    print(f"\n{col}:")
    print(df[col].apply(categorize_error).value_counts())

In [None]:
# Visualize error distributions
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

for ax, col, title in zip(axes, 
    ['weight_error', 'volume_error', 'avg_dim_error'],
    ['Weight Error', 'Volume Error', 'Avg Dimension Error']):
    
    data = df[col].dropna()
    data_capped = data.clip(upper=3)  # Cap at 300% for visualization
    
    ax.hist(data_capped, bins=30, edgecolor='black', alpha=0.7)
    ax.axvline(x=0.5, color='orange', linestyle='--', label='50%')
    ax.axvline(x=1.0, color='red', linestyle='--', label='100%')
    ax.set_title(title)
    ax.set_xlabel('Error Rate')
    ax.set_ylabel('Count')
    ax.legend()

plt.tight_layout()
plt.show()

## 4. Sample Sets by Error Type

In [None]:
# Define sample sets
sample_sets = {
    # By Weight Error
    'weight_50_100': df[(df['weight_error'] >= 0.5) & (df['weight_error'] < 1.0)],
    'weight_over_100': df[df['weight_error'] >= 1.0],
    
    # By Volume Error
    'volume_50_100': df[(df['volume_error'] >= 0.5) & (df['volume_error'] < 1.0)],
    'volume_over_100': df[df['volume_error'] >= 1.0],
    
    # By Dimension Error (L/W/H average)
    'dim_50_100': df[(df['avg_dim_error'] >= 0.5) & (df['avg_dim_error'] < 1.0)],
    'dim_over_100': df[df['avg_dim_error'] >= 1.0],
}

print("=== Sample Set Sizes ===")
for name, sample_df in sample_sets.items():
    print(f"{name}: {len(sample_df):,} items")

## 5. Explore High-Error Samples

In [None]:
# Helper to display samples with images
from IPython.display import display, HTML

def show_samples(df_subset, title, n=10):
    print(f"\n{'='*60}")
    print(f"{title} (showing {min(n, len(df_subset))} of {len(df_subset)})")
    print(f"{'='*60}")
    
    html = "<table style='font-size:12px;'>"
    html += "<tr><th>Image</th><th>Title</th><th>Actual</th><th>AI Est</th><th>Errors</th></tr>"
    
    for _, row in df_subset.head(n).iterrows():
        img_url = row['thumbnail_urls'].split('|')[0] if row['thumbnail_urls'] else ''
        img_html = f'<img src="{img_url}" style="max-width:80px;">' if img_url else 'N/A'
        
        actual = f"W:{row['actual_weight']:.1f}kg<br>L:{row['actual_L']:.0f} W:{row['actual_W']:.0f} H:{row['actual_H']:.0f}"
        ai_est = f"W:{row['ai_weight']:.1f}kg<br>L:{row['ai_L']:.0f} W:{row['ai_W']:.0f} H:{row['ai_H']:.0f}"
        errors = f"Wt:{row['weight_error']*100:.0f}%<br>Vol:{row['volume_error']*100:.0f}%<br>Dim:{row['avg_dim_error']*100:.0f}%"
        
        html += f"<tr>"
        html += f"<td>{img_html}</td>"
        html += f"<td>{row['title'][:40]}...</td>"
        html += f"<td>{actual}</td>"
        html += f"<td>{ai_est}</td>"
        html += f"<td>{errors}</td>"
        html += f"</tr>"
    
    html += "</table>"
    display(HTML(html))

In [None]:
# Show samples: Weight error 50-100%
show_samples(sample_sets['weight_50_100'].sort_values('weight_error', ascending=False), 
             'Weight Error 50-100%')

In [None]:
# Show samples: Weight error > 100%
show_samples(sample_sets['weight_over_100'].sort_values('weight_error', ascending=False), 
             'Weight Error > 100%')

In [None]:
# Show samples: Volume error > 100%
show_samples(sample_sets['volume_over_100'].sort_values('volume_error', ascending=False), 
             'Volume Error > 100%')

In [None]:
# Show samples: Dimension error > 100%
show_samples(sample_sets['dim_over_100'].sort_values('avg_dim_error', ascending=False), 
             'Dimension Error > 100%')

## 6. Select Experiment Samples

In [None]:
# Select balanced samples for experiment
SAMPLE_SIZE = 25  # Per category

experiment_samples = pd.concat([
    sample_sets['weight_50_100'].sample(min(SAMPLE_SIZE, len(sample_sets['weight_50_100']))).assign(sample_group='weight_50_100'),
    sample_sets['weight_over_100'].sample(min(SAMPLE_SIZE, len(sample_sets['weight_over_100']))).assign(sample_group='weight_over_100'),
    sample_sets['volume_over_100'].sample(min(SAMPLE_SIZE, len(sample_sets['volume_over_100']))).assign(sample_group='volume_over_100'),
    sample_sets['dim_over_100'].sample(min(SAMPLE_SIZE, len(sample_sets['dim_over_100']))).assign(sample_group='dim_over_100'),
])

# Remove duplicates (same order might appear in multiple categories)
experiment_samples = experiment_samples.drop_duplicates(subset=['order_item_order_id'])

print(f"=== Experiment Sample Set ===")
print(f"Total unique samples: {len(experiment_samples)}")
print(f"\nBy group:")
print(experiment_samples['sample_group'].value_counts())

## 7. Export Samples

In [None]:
# Export to CSV
from datetime import datetime

timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f'experiment_samples_{timestamp}.csv'

experiment_samples.to_csv(filename, index=False)
print(f"✅ Saved to {filename}")

# Download
from google.colab import files
files.download(filename)

## Next Steps

1. Use `weight_volume_experiment.ipynb` with these samples
2. Re-estimate with improved prompt
3. Compare new vs old error rates