# 05 - Visualization & Report

Generate publication-ready visualizations and summary reports.

**Key Outputs:**
1. Stacked area charts of import composition
2. Line charts of key country trends
3. Heatmaps of industry x country shifts
4. Summary statistics tables

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pathlib import Path
import sys

sys.path.insert(0, str(Path.cwd().parent / 'src'))
from data_loader import DATA_PROCESSED, DATA_EXPORTS

# Output directory
REPORTS_DIR = Path.cwd().parent / 'reports'
REPORTS_DIR.mkdir(exist_ok=True)

print(f"Reports will be saved to: {REPORTS_DIR}")

## Visualization Functions

In [None]:
def create_stacked_area_chart(df, top_n=10, title="US Import Sources Over Time"):
    """Create stacked area chart showing top countries' share of imports."""
    # Get top countries
    top_countries = df.groupby('country')['value_real'].sum().nlargest(top_n).index.tolist()
    
    # Create "Other" category
    df_viz = df.copy()
    df_viz['country_group'] = df_viz['country'].apply(lambda x: x if x in top_countries else 'Other')
    
    # Aggregate
    grouped = df_viz.groupby(['year', 'country_group'])['share_pct'].sum().reset_index()
    
    # Pivot for stacking
    pivot = grouped.pivot(index='year', columns='country_group', values='share_pct').fillna(0)
    
    # Reorder columns (top countries first, Other last)
    cols = [c for c in top_countries if c in pivot.columns] + ['Other'] if 'Other' in pivot.columns else []
    pivot = pivot[cols]
    
    # Create chart
    fig = go.Figure()
    colors = px.colors.qualitative.Set3
    
    for i, col in enumerate(pivot.columns):
        fig.add_trace(go.Scatter(
            x=pivot.index, y=pivot[col], name=col,
            stackgroup='one', mode='none',
            fillcolor=colors[i % len(colors)]
        ))
    
    fig.update_layout(
        title=title, xaxis_title='Year', yaxis_title='Share of Imports (%)',
        legend=dict(orientation='h', yanchor='bottom', y=1.02)
    )
    return fig

def create_trend_lines(df, countries, title="Import Share Trends"):
    """Create line chart for selected countries."""
    filtered = df[df['country'].isin(countries)]
    fig = px.line(filtered, x='year', y='share_pct', color='country', title=title)
    fig.update_layout(xaxis_title='Year', yaxis_title='Share of Imports (%)')
    return fig

def create_heatmap(df, countries, title="Import Shares by Year and Country"):
    """Create heatmap of country shares over time."""
    filtered = df[df['country'].isin(countries)]
    pivot = filtered.pivot_table(index='country', columns='year', values='share_pct', aggfunc='sum').fillna(0)
    
    fig = go.Figure(data=go.Heatmap(
        z=pivot.values, x=pivot.columns, y=pivot.index,
        colorscale='Blues', colorbar=dict(title='Share %')
    ))
    fig.update_layout(title=title, xaxis_title='Year', yaxis_title='Country')
    return fig

print("Visualization functions defined")

## Generate Visualizations

Run these cells after loading processed data to generate charts.

In [None]:
# Load data and generate charts (uncomment with data)
# imports_df = pd.read_csv(DATA_PROCESSED / 'imports_processed.csv')

# Chart 1: Stacked area of import sources
# fig1 = create_stacked_area_chart(imports_df)
# fig1.write_html(REPORTS_DIR / 'import_sources_stacked.html')
# fig1.show()

# Chart 2: Key country trends  
# key_countries = ['China', 'Mexico', 'Canada', 'Vietnam', 'Japan', 'Germany']
# fig2 = create_trend_lines(imports_df, key_countries)
# fig2.write_html(REPORTS_DIR / 'key_country_trends.html')
# fig2.show()

# Chart 3: Heatmap
# fig3 = create_heatmap(imports_df, key_countries)
# fig3.write_html(REPORTS_DIR / 'country_heatmap.html')
# fig3.show()

print("Chart generation code ready")

## Export Processed Data

In [None]:
# Export final datasets (uncomment with data)
# DATA_EXPORTS.mkdir(exist_ok=True)
# 
# # Full processed dataset
# imports_df.to_csv(DATA_EXPORTS / 'us_imports_1995_2025.csv', index=False)
# 
# # Summary by year
# yearly_summary = imports_df.groupby('year').agg({
#     'value_real': 'sum',
#     'country': 'nunique'
# }).reset_index()
# yearly_summary.columns = ['year', 'total_imports_real', 'num_countries']
# yearly_summary.to_csv(DATA_EXPORTS / 'yearly_summary.csv', index=False)
# 
# # Top 20 countries each year
# top_by_year = imports_df.groupby('year').apply(
#     lambda x: x.nlargest(20, 'value_real')[['country', 'value_real', 'share_pct']]
# ).reset_index(drop=True)
# top_by_year.to_csv(DATA_EXPORTS / 'top_countries_by_year.csv', index=False)
# 
# print("Data exported to data/exports/")