# Error Analysis - Weight & Volume Predictions

Analyze prediction errors from the experiment datasource:
- Top 200 errors (volume, weight, both)
- Dimension order issues (L x W x H twisted)
- Error distribution by category
- Positive vs negative errors

In [None]:
# Upload the TSV file
from google.colab import files
import pandas as pd
import numpy as np

print("Upload 20260128_experiment_datasource.tsv")
uploaded = files.upload()

In [None]:
# Load data
filename = list(uploaded.keys())[0]
df = pd.read_csv(filename, sep='\t')
print(f"Loaded {len(df)} rows")
print(f"\nColumns: {list(df.columns)}")

In [None]:
# Quick overview of error columns
error_cols = ['weight_error', 'volume_error', 'max_error', 'mid_error', 'min_error', 'avg_dim_error']
df[error_cols].describe()

## 1. Top 200 Volume Errors

In [None]:
# Top 200 Volume Errors - POSITIVE (AI overestimated)
top_volume_positive = df.nlargest(200, 'volume_error')[[
    'product_version_id', 'title_origin', 'category', 
    'volume_error', 'weight_error',
    'actual_volume_cm3', 'ai_volume_cm3',
    'actual_max', 'actual_mid', 'actual_min',
    'ai_max', 'ai_mid', 'ai_min',
    'thumbnail_urls'
]]
print("Top 200 POSITIVE Volume Errors (AI overestimated)")
print(f"Error range: {top_volume_positive['volume_error'].min():.2%} ~ {top_volume_positive['volume_error'].max():.2%}")
top_volume_positive.head(20)

In [None]:
# Top 200 Volume Errors - NEGATIVE (AI underestimated)
top_volume_negative = df.nsmallest(200, 'volume_error')[[
    'product_version_id', 'title_origin', 'category', 
    'volume_error', 'weight_error',
    'actual_volume_cm3', 'ai_volume_cm3',
    'actual_max', 'actual_mid', 'actual_min',
    'ai_max', 'ai_mid', 'ai_min',
    'thumbnail_urls'
]]
print("Top 200 NEGATIVE Volume Errors (AI underestimated)")
print(f"Error range: {top_volume_negative['volume_error'].min():.2%} ~ {top_volume_negative['volume_error'].max():.2%}")
top_volume_negative.head(20)

## 2. Top 200 Weight Errors

In [None]:
# Top 200 Weight Errors - POSITIVE (AI overestimated)
top_weight_positive = df.nlargest(200, 'weight_error')[[
    'product_version_id', 'title_origin', 'category', 
    'weight_error', 'volume_error',
    'actual_weight', 'ai_weight_kg',
    'thumbnail_urls'
]]
print("Top 200 POSITIVE Weight Errors (AI overestimated)")
print(f"Error range: {top_weight_positive['weight_error'].min():.2%} ~ {top_weight_positive['weight_error'].max():.2%}")
top_weight_positive.head(20)

In [None]:
# Top 200 Weight Errors - NEGATIVE (AI underestimated)
top_weight_negative = df.nsmallest(200, 'weight_error')[[
    'product_version_id', 'title_origin', 'category', 
    'weight_error', 'volume_error',
    'actual_weight', 'ai_weight_kg',
    'thumbnail_urls'
]]
print("Top 200 NEGATIVE Weight Errors (AI underestimated)")
print(f"Error range: {top_weight_negative['weight_error'].min():.2%} ~ {top_weight_negative['weight_error'].max():.2%}")
top_weight_negative.head(20)

## 3. Top 200 Combined Errors (Both Volume AND Weight)

In [None]:
# Combined error: average of absolute errors
df['combined_error'] = (df['weight_error'].abs() + df['volume_error'].abs()) / 2

# Top 200 Combined Errors
top_combined = df.nlargest(200, 'combined_error')[[
    'product_version_id', 'title_origin', 'category', 
    'weight_error', 'volume_error', 'combined_error',
    'actual_weight', 'ai_weight_kg',
    'actual_volume_cm3', 'ai_volume_cm3',
    'thumbnail_urls'
]]
print("Top 200 Combined Errors (high error in BOTH weight and volume)")
print(f"Combined error range: {top_combined['combined_error'].min():.2%} ~ {top_combined['combined_error'].max():.2%}")
top_combined.head(20)

## 4. Dimension Order Twisted (L x W x H)

Check if AI dimensions are in different order than actual dimensions.

In [None]:
# Check dimension order issues
# Compare which actual dimension matches which AI dimension

def check_dimension_twist(row):
    """Check if dimensions are twisted between actual and AI."""
    actual = [row['actual_max'], row['actual_mid'], row['actual_min']]
    ai = [row['ai_max'], row['ai_mid'], row['ai_min']]
    
    # Check if any are NaN
    if any(pd.isna(actual)) or any(pd.isna(ai)):
        return 'unknown'
    
    # Both should be sorted max >= mid >= min
    # Check if the relative ordering is preserved
    actual_sorted = sorted(actual, reverse=True)
    ai_sorted = sorted(ai, reverse=True)
    
    # Calculate which AI dimension is closest to which actual dimension
    # This detects if AI swapped dimensions
    max_ratio = ai[0] / actual[0] if actual[0] > 0 else 0
    mid_ratio = ai[1] / actual[1] if actual[1] > 0 else 0
    min_ratio = ai[2] / actual[2] if actual[2] > 0 else 0
    
    # If ratios are very different, dimensions might be twisted
    ratios = [max_ratio, mid_ratio, min_ratio]
    ratio_std = np.std(ratios)
    
    if ratio_std > 0.5:  # High variance in ratios suggests twist
        return 'twisted'
    return 'normal'

df['dim_twist'] = df.apply(check_dimension_twist, axis=1)
print("Dimension Twist Analysis:")
print(df['dim_twist'].value_counts())

In [None]:
# Show twisted dimension items
twisted_items = df[df['dim_twist'] == 'twisted'][[
    'product_version_id', 'title_origin', 'category',
    'actual_max', 'actual_mid', 'actual_min',
    'ai_max', 'ai_mid', 'ai_min',
    'max_error', 'mid_error', 'min_error',
    'volume_error',
    'thumbnail_urls'
]]
print(f"Twisted dimension items: {len(twisted_items)}")
twisted_items.head(20)

## 5. Error Distribution by Category

In [None]:
# Categorize by error level
def categorize_error(error):
    abs_err = abs(error)
    if abs_err > 1.0:  # > 100%
        return 'over_100%'
    elif abs_err > 0.5:  # 50% ~ 100%
        return '50%_to_100%'
    elif abs_err > 0.1:  # 10% ~ 50%
        return '10%_to_50%'
    else:  # < 10%
        return 'under_10%'

df['weight_error_level'] = df['weight_error'].apply(categorize_error)
df['volume_error_level'] = df['volume_error'].apply(categorize_error)

print("Weight Error Distribution:")
print(df['weight_error_level'].value_counts())
print("\nVolume Error Distribution:")
print(df['volume_error_level'].value_counts())

### 5.1 Over 100% Error Items by Category

In [None]:
# Over 100% VOLUME error - POSITIVE
over_100_vol_pos = df[(df['volume_error'] > 1.0)]
print(f"Volume Error > +100%: {len(over_100_vol_pos)} items")
print("\nBy Category:")
print(over_100_vol_pos['category'].value_counts())

In [None]:
# Over 100% VOLUME error - NEGATIVE
over_100_vol_neg = df[(df['volume_error'] < -0.5)]  # -50% means actual is 2x AI, -100% would be impossible
print(f"Volume Error < -50%: {len(over_100_vol_neg)} items")
print("\nBy Category:")
print(over_100_vol_neg['category'].value_counts())

In [None]:
# Over 100% WEIGHT error - POSITIVE
over_100_wt_pos = df[(df['weight_error'] > 1.0)]
print(f"Weight Error > +100%: {len(over_100_wt_pos)} items")
print("\nBy Category:")
print(over_100_wt_pos['category'].value_counts())

In [None]:
# Over 100% WEIGHT error - NEGATIVE
over_100_wt_neg = df[(df['weight_error'] < -0.5)]
print(f"Weight Error < -50%: {len(over_100_wt_neg)} items")
print("\nBy Category:")
print(over_100_wt_neg['category'].value_counts())

### 5.2 50% ~ 100% Error Items by Category

In [None]:
# 50% ~ 100% VOLUME error - POSITIVE
mid_vol_pos = df[(df['volume_error'] > 0.5) & (df['volume_error'] <= 1.0)]
print(f"Volume Error +50% ~ +100%: {len(mid_vol_pos)} items")
print("\nBy Category:")
print(mid_vol_pos['category'].value_counts())

In [None]:
# 50% ~ 100% VOLUME error - NEGATIVE
mid_vol_neg = df[(df['volume_error'] < -0.33) & (df['volume_error'] >= -0.5)]  # -33% to -50%
print(f"Volume Error -33% ~ -50%: {len(mid_vol_neg)} items")
print("\nBy Category:")
print(mid_vol_neg['category'].value_counts())

In [None]:
# 50% ~ 100% WEIGHT error - POSITIVE
mid_wt_pos = df[(df['weight_error'] > 0.5) & (df['weight_error'] <= 1.0)]
print(f"Weight Error +50% ~ +100%: {len(mid_wt_pos)} items")
print("\nBy Category:")
print(mid_wt_pos['category'].value_counts())

In [None]:
# 50% ~ 100% WEIGHT error - NEGATIVE
mid_wt_neg = df[(df['weight_error'] < -0.33) & (df['weight_error'] >= -0.5)]
print(f"Weight Error -33% ~ -50%: {len(mid_wt_neg)} items")
print("\nBy Category:")
print(mid_wt_neg['category'].value_counts())

### 5.3 Under 10% Error Items by Category (Good predictions)

In [None]:
# Under 10% VOLUME error - POSITIVE
good_vol_pos = df[(df['volume_error'] > 0) & (df['volume_error'] <= 0.1)]
print(f"Volume Error 0% ~ +10%: {len(good_vol_pos)} items")
print("\nBy Category:")
print(good_vol_pos['category'].value_counts())

In [None]:
# Under 10% VOLUME error - NEGATIVE
good_vol_neg = df[(df['volume_error'] < 0) & (df['volume_error'] >= -0.1)]
print(f"Volume Error -10% ~ 0%: {len(good_vol_neg)} items")
print("\nBy Category:")
print(good_vol_neg['category'].value_counts())

In [None]:
# Under 10% WEIGHT error - POSITIVE
good_wt_pos = df[(df['weight_error'] > 0) & (df['weight_error'] <= 0.1)]
print(f"Weight Error 0% ~ +10%: {len(good_wt_pos)} items")
print("\nBy Category:")
print(good_wt_pos['category'].value_counts())

In [None]:
# Under 10% WEIGHT error - NEGATIVE
good_wt_neg = df[(df['weight_error'] < 0) & (df['weight_error'] >= -0.1)]
print(f"Weight Error -10% ~ 0%: {len(good_wt_neg)} items")
print("\nBy Category:")
print(good_wt_neg['category'].value_counts())

## 6. Summary Statistics by Category

In [None]:
# Summary by category
category_stats = df.groupby('category').agg({
    'weight_error': ['mean', 'std', 'min', 'max', 'count'],
    'volume_error': ['mean', 'std', 'min', 'max'],
}).round(4)

print("Error Statistics by Category:")
category_stats

In [None]:
# Categories with highest average absolute error
df['abs_weight_error'] = df['weight_error'].abs()
df['abs_volume_error'] = df['volume_error'].abs()

worst_categories = df.groupby('category').agg({
    'abs_weight_error': 'mean',
    'abs_volume_error': 'mean',
    'product_version_id': 'count'
}).rename(columns={'product_version_id': 'count'}).sort_values('abs_volume_error', ascending=False)

print("Categories by Average Absolute Error (worst first):")
worst_categories

## 7. Visualizations

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Weight error histogram
ax1 = axes[0, 0]
df['weight_error'].clip(-2, 2).hist(bins=50, ax=ax1, color='steelblue', edgecolor='white')
ax1.axvline(x=0, color='red', linestyle='--', linewidth=1)
ax1.set_title('Weight Error Distribution (clipped to ±200%)')
ax1.set_xlabel('Error (signed)')
ax1.set_ylabel('Count')

# Volume error histogram
ax2 = axes[0, 1]
df['volume_error'].clip(-2, 2).hist(bins=50, ax=ax2, color='darkorange', edgecolor='white')
ax2.axvline(x=0, color='red', linestyle='--', linewidth=1)
ax2.set_title('Volume Error Distribution (clipped to ±200%)')
ax2.set_xlabel('Error (signed)')
ax2.set_ylabel('Count')

# Scatter: weight vs volume error
ax3 = axes[1, 0]
ax3.scatter(df['weight_error'].clip(-2, 2), df['volume_error'].clip(-2, 2), alpha=0.3, s=10)
ax3.axhline(y=0, color='red', linestyle='--', linewidth=0.5)
ax3.axvline(x=0, color='red', linestyle='--', linewidth=0.5)
ax3.set_title('Weight Error vs Volume Error')
ax3.set_xlabel('Weight Error')
ax3.set_ylabel('Volume Error')

# Error level pie chart
ax4 = axes[1, 1]
error_counts = df['volume_error_level'].value_counts()
ax4.pie(error_counts.values, labels=error_counts.index, autopct='%1.1f%%', startangle=90)
ax4.set_title('Volume Error Level Distribution')

plt.tight_layout()
plt.show()

In [None]:
# Error by category bar chart
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Top 10 categories by count
top_cats = df['category'].value_counts().head(10).index
df_top = df[df['category'].isin(top_cats)]

# Weight error by category
ax1 = axes[0]
df_top.boxplot(column='weight_error', by='category', ax=ax1)
ax1.set_title('Weight Error by Category (Top 10)')
ax1.set_xlabel('Category')
ax1.set_ylabel('Weight Error')
ax1.set_ylim(-2, 2)
plt.suptitle('')
ax1.tick_params(axis='x', rotation=45)

# Volume error by category
ax2 = axes[1]
df_top.boxplot(column='volume_error', by='category', ax=ax2)
ax2.set_title('Volume Error by Category (Top 10)')
ax2.set_xlabel('Category')
ax2.set_ylabel('Volume Error')
ax2.set_ylim(-2, 2)
plt.suptitle('')
ax2.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## 8. Export Analysis Results

In [None]:
# Export top errors for review
top_volume_positive.to_csv('top200_volume_error_positive.csv', index=False)
top_volume_negative.to_csv('top200_volume_error_negative.csv', index=False)
top_weight_positive.to_csv('top200_weight_error_positive.csv', index=False)
top_weight_negative.to_csv('top200_weight_error_negative.csv', index=False)
top_combined.to_csv('top200_combined_error.csv', index=False)
twisted_items.to_csv('twisted_dimensions.csv', index=False)

print("Exported CSV files:")
print("- top200_volume_error_positive.csv")
print("- top200_volume_error_negative.csv")
print("- top200_weight_error_positive.csv")
print("- top200_weight_error_negative.csv")
print("- top200_combined_error.csv")
print("- twisted_dimensions.csv")

In [None]:
# Download exported files
from google.colab import files

files.download('top200_volume_error_positive.csv')
files.download('top200_volume_error_negative.csv')
files.download('top200_weight_error_positive.csv')
files.download('top200_weight_error_negative.csv')
files.download('top200_combined_error.csv')
files.download('twisted_dimensions.csv')