In [208]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
import warnings
warnings.filterwarnings('ignore')

In [209]:
# Loading inflation data with pre-calculated MoM and YoY changes
if_df_analysis = pd.read_csv('inflation_analysis_results.csv')

# Convert date column to datetime
if_df_analysis['date'] = pd.to_datetime(if_df_analysis['date'])

print(f"Data loaded: {len(if_df_analysis)} rows")
print(f"Date range: {if_df_analysis['date'].min()} to {if_df_analysis['date'].max()}")
print(f"\nColumns: {list(if_df_analysis.columns)}")

Data loaded: 8409 rows
Date range: 2025-01-01 00:00:00 to 2026-01-01 00:00:00

Columns: ['base_year', 'series', 'year', 'month', 'state', 'sector', 'division', 'group', 'class', 'sub_class', 'item', 'code', 'index', 'inflation', 'imputation', 'date', 'mom_change', 'yoy_change']


In [210]:
# Quick data overview
print("Data shape:", if_df_analysis.shape)
print("\nSample data:")
print(if_df_analysis.head(10))
print("\nData types:")
print(if_df_analysis.dtypes)

Data shape: (8409, 18)

Sample data:
   base_year   series  year      month      state    sector       division  \
0     2024.0  Current  2025    January  All India  Combined  CPI (General)   
1     2024.0  Current  2025   February  All India  Combined  CPI (General)   
2     2024.0  Current  2025      March  All India  Combined  CPI (General)   
3     2024.0  Current  2025      April  All India  Combined  CPI (General)   
4     2024.0  Current  2025        May  All India  Combined  CPI (General)   
5     2024.0  Current  2025       June  All India  Combined  CPI (General)   
6     2024.0  Current  2025       July  All India  Combined  CPI (General)   
7     2024.0  Current  2025     August  All India  Combined  CPI (General)   
8     2024.0  Current  2025  September  All India  Combined  CPI (General)   
9     2024.0  Current  2025    October  All India  Combined  CPI (General)   

  group class sub_class item code   index inflation imputation       date  \
0     *     *         *    

# Visuals


In [211]:
## Plan Implementation

### Part 1: Division-Level YoY Change Tables
#We'll start with the highest level of aggregation - divisions where all sub-categories are marked with '*'

In [212]:
# Filter for Division-level data (highest aggregation)
# At division level: group, class, sub_class, and item should all be '*'
division_level = if_df_analysis[
    (if_df_analysis['state'] == 'All India') &
    (if_df_analysis['sector'] == 'Combined') &
    (if_df_analysis['group'] == '*') & 
    (if_df_analysis['class'] == '*') & 
    (if_df_analysis['sub_class'] == '*') & 
    (if_df_analysis['item'] == '*')
].copy()

print(f"Division-level records found: {len(division_level)}")
print(f"Unique divisions: {division_level['division'].nunique()}")
print(f"\nDivisions in data:")
print(division_level['division'].unique())

Division-level records found: 260
Unique divisions: 20

Divisions in data:
['CPI (General)' 'Clothing and footwear' 'Education services'
 'Food and beverages'
 'Furnishings, household equipment and routine household maintenance'
 'Health' 'Housing, water, electricity, gas and other fuels'
 'Information and communication' 'Paan, tobacco and intoxicants'
 'Personal care, social protection and miscellaneous goods and services'
 'Recreation, sport and culture' 'Restaurants and accommodation services'
 'Transport' 'CPI Excluding Food and Beverages'
 'CPI (Excluding Food and Beverages)' 'Core CPI' 'Core (Ex Gold)'
 'Core (Ex Gold and Silver)'
 'CPI (Excluding Paan Tobacco and Intoxicants)'
 'CPI (Ex Veggies and Cereals)']


In [213]:
# Create Division YoY Change Table - Latest month only
latest_date = division_level['date'].max()
print(f"Latest data available for: {latest_date.strftime('%B %Y')}")

# Filter for latest month
division_latest = division_level[division_level['date'] == latest_date].copy()

# Create clean table with relevant columns
division_yoy_table = division_latest[['division', 'index', 'mom_change', 'yoy_change']].copy()
division_yoy_table = division_yoy_table.sort_values('yoy_change', ascending=False)
division_yoy_table = division_yoy_table.reset_index(drop=True)

# Rename columns for better readability
division_yoy_table.columns = ['Division', 'Index Value', 'MoM Change (%)', 'YoY Change (%)']

# Round values for clean display
division_yoy_table['Index Value'] = division_yoy_table['Index Value'].round(2)
division_yoy_table['MoM Change (%)'] = division_yoy_table['MoM Change (%)'].round(2)
division_yoy_table['YoY Change (%)'] = division_yoy_table['YoY Change (%)'].round(2)

print(f"\n{division_yoy_table.to_string(index=False)}")

# Save to CSV
output_filename = f'division_yoy_{latest_date.strftime("%Y%m")}.csv'
division_yoy_table.to_csv(output_filename, index=False)
print(f"\n‚úì Table saved to: {output_filename}")

Latest data available for: January 2026

                                                             Division  Index Value  MoM Change (%)  YoY Change (%)
Personal care, social protection and miscellaneous goods and services       122.63            5.23           19.02
                                         CPI (Ex Veggies and Cereals)       105.42            0.63            3.80
                                                             Core CPI       105.05            0.67            3.41
                                                   Education services       105.33            0.08            3.35
                                   CPI (Excluding Food and Beverages)       104.66            0.61            3.14
                                     CPI Excluding Food and Beverages       104.72            0.58            3.12
                         CPI (Excluding Paan Tobacco and Intoxicants)       104.47            0.35            3.07
                                       

## Part 2: Interactive Month-on-Month Heatmaps

Visualize MoM inflation changes across different hierarchy levels with dropdown selection

In [214]:
# Prepare data for different hierarchy levels
# We'll create datasets for Division, Group, Class, and Sub-class levels

# Division level (already have this)
div_heatmap_data = division_level.copy()

# Group level: division + group (rest are *)
group_level = if_df_analysis[
    (if_df_analysis['state'] == 'All India') &
    (if_df_analysis['sector'] == 'Combined') &
    (if_df_analysis['group'] != '*') & 
    (if_df_analysis['class'] == '*') & 
    (if_df_analysis['sub_class'] == '*') & 
    (if_df_analysis['item'] == '*')
].copy()

# Class level: division + group + class (sub_class and item are *)
class_level = if_df_analysis[
    (if_df_analysis['state'] == 'All India') &
    (if_df_analysis['sector'] == 'Combined') &
    (if_df_analysis['class'] != '*') & 
    (if_df_analysis['sub_class'] == '*') & 
    (if_df_analysis['item'] == '*')
].copy()

# Sub-class level: division + group + class + sub_class (item is *)
subclass_level = if_df_analysis[
    (if_df_analysis['state'] == 'All India') &
    (if_df_analysis['sector'] == 'Combined') &
    (if_df_analysis['sub_class'] != '*') & 
    (if_df_analysis['item'] == '*')
].copy()

print(f"Division level: {len(div_heatmap_data)} records, {div_heatmap_data['division'].nunique()} divisions")
print(f"Group level: {len(group_level)} records, {group_level['group'].nunique()} groups")
print(f"Class level: {len(class_level)} records, {class_level['class'].nunique()} classes")
print(f"Sub-class level: {len(subclass_level)} records, {subclass_level['sub_class'].nunique()} sub-classes")

Division level: 260 records, 20 divisions
Group level: 499 records, 43 groups
Class level: 1146 records, 92 classes
Sub-class level: 1980 records, 162 sub-classes


In [215]:
# Create function to generate heatmap for each level
def create_mom_heatmap_data(df, category_column, label_column=None):
    """
    Create pivot table for MoM heatmap
    
    Parameters:
    df: dataframe with the data
    category_column: column to use for categories (division, group, class, etc.)
    label_column: optional additional column for labeling (e.g., division for groups)
    """
    # Get last 12 months of data
    recent_dates = sorted(df['date'].unique())[-12:]
    df_recent = df[df['date'].isin(recent_dates)].copy()
    
    # Create label for rows
    if label_column and label_column in df_recent.columns:
        df_recent['row_label'] = df_recent[label_column] + ' - ' + df_recent[category_column]
    else:
        df_recent['row_label'] = df_recent[category_column]
    
    # Create pivot table
    pivot = df_recent.pivot_table(
        index='row_label',
        columns='date',
        values='mom_change',
        aggfunc='first'
    )
    
    # Format column names
    pivot.columns = [col.strftime('%b %Y') for col in pivot.columns]
    
    return pivot

# Create pivot tables for each level
div_pivot = create_mom_heatmap_data(div_heatmap_data, 'division')
group_pivot = create_mom_heatmap_data(group_level, 'group', 'division')
class_pivot = create_mom_heatmap_data(class_level, 'class', 'group')
subclass_pivot = create_mom_heatmap_data(subclass_level, 'sub_class', 'class')

print("Heatmap data prepared:")
print(f"Division heatmap: {div_pivot.shape}")
print(f"Group heatmap: {group_pivot.shape}")
print(f"Class heatmap: {class_pivot.shape}")
print(f"Sub-class heatmap: {subclass_pivot.shape}")

Heatmap data prepared:
Division heatmap: (20, 12)
Group heatmap: (43, 12)
Class heatmap: (92, 12)
Sub-class heatmap: (162, 12)


In [216]:
# Create interactive heatmap with dropdown menu and checkboxes
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Function to create heatmap trace with text annotations
def create_heatmap_trace(pivot_data, name, visible=False):
    # Create text annotations for each cell (show the percentage value)
    text_values = [[f"{val:.2f}%" if not pd.isna(val) else "" 
                    for val in row] 
                   for row in pivot_data.values]
    
    return go.Heatmap(
        z=pivot_data.values,
        x=pivot_data.columns,
        y=pivot_data.index,
        text=text_values,
        texttemplate="%{text}",
        textfont={"size": 8},
        colorscale=[
            [0, '#006400'],      # Dark green - strong deflation
            [0.3, '#90EE90'],    # Light green - deflation
            [0.45, '#FFFFFF'],   # White - near zero
            [0.55, '#FFE4B5'],   # Light orange/beige - low inflation
            [0.7, '#FF8C00'],    # Dark orange - medium inflation
            [1, '#DC143C']       # Crimson red - high inflation
        ],
        zmid=0,
        colorbar=dict(
            title="MoM Change (%)"
        ),
        hovertemplate='<b>%{y}</b><br>%{x}<br>MoM: %{z:.2f}%<extra></extra>',
        name=name,
        visible=visible
    )

# Create traces for each level
traces = [
    create_heatmap_trace(div_pivot, 'Division', visible=True),
    create_heatmap_trace(group_pivot, 'Group', visible=False),
    create_heatmap_trace(class_pivot, 'Class', visible=False),
    create_heatmap_trace(subclass_pivot, 'Sub-class', visible=False)
]

# Create figure
fig = go.Figure(data=traces)

# Add dropdown menu for hierarchy levels
fig.update_layout(
    updatemenus=[
        # Dropdown for hierarchy level
        dict(
            buttons=[
                dict(
                    label="Division Level",
                    method="update",
                    args=[{"visible": [True, False, False, False]},
                          {"title": "Month-on-Month Inflation Heatmap - Division Level"}]
                ),
                dict(
                    label="Group Level",
                    method="update",
                    args=[{"visible": [False, True, False, False]},
                          {"title": "Month-on-Month Inflation Heatmap - Group Level"}]
                ),
                dict(
                    label="Class Level",
                    method="update",
                    args=[{"visible": [False, False, True, False]},
                          {"title": "Month-on-Month Inflation Heatmap - Class Level"}]
                ),
                dict(
                    label="Sub-class Level",
                    method="update",
                    args=[{"visible": [False, False, False, True]},
                          {"title": "Month-on-Month Inflation Heatmap - Sub-class Level"}]
                ),
            ],
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.15,
            xanchor="left",
            y=1.15,
            yanchor="top",
            bgcolor="white",
            bordercolor="gray",
            borderwidth=1
        ),
    ],
    title=dict(
        text="Month-on-Month Inflation Heatmap - Division Level<br><sub>Use dropdown to change hierarchy level. Click on category names in legend to show/hide.</sub>",
        font=dict(size=16, family='Arial Black')
    ),
    xaxis=dict(
        title="Month",
        tickangle=-45,
        tickfont=dict(size=10)
    ),
    yaxis=dict(
        title="Category",
        tickfont=dict(size=9)
    ),
    height=800,
    width=1400,
    margin=dict(l=300, r=100, t=120, b=100),
    showlegend=False
)

# Add instructions
print("=" * 80)
print("INTERACTIVE HEATMAP CONTROLS:")
print("=" * 80)
print("1. Use the DROPDOWN menu (top-left) to switch between hierarchy levels")
print("2. Values are displayed directly on the heatmap cells")
print("3. Hover over cells for detailed information")
print("4. Color scale:")
print("   üü¢ Green = Deflation (negative inflation)")
print("   ‚ö™ White = Near zero inflation")
print("   üü° Beige/Light Orange = Low inflation (0-2%)")
print("   üü† Orange = Medium inflation (2-4%)")
print("   üî¥ Red = High inflation (>4%)")
print("=" * 80)

fig.show()

INTERACTIVE HEATMAP CONTROLS:
1. Use the DROPDOWN menu (top-left) to switch between hierarchy levels
2. Values are displayed directly on the heatmap cells
3. Hover over cells for detailed information
4. Color scale:
   üü¢ Green = Deflation (negative inflation)
   ‚ö™ White = Near zero inflation
   üü° Beige/Light Orange = Low inflation (0-2%)
   üü† Orange = Medium inflation (2-4%)
   üî¥ Red = High inflation (>4%)


### Advanced Heatmap with Category Filtering

Create custom heatmaps by selecting specific divisions or categories

In [217]:
# Function to create filtered heatmap for specific categories
def create_filtered_heatmap(pivot_data, title, categories_to_include=None, categories_to_exclude=None):
    """
    Create a heatmap with specific categories included or excluded
    
    Parameters:
    pivot_data: pivot table with categories as index
    title: title for the heatmap
    categories_to_include: list of categories to include (if None, include all)
    categories_to_exclude: list of categories to exclude (if None, exclude none)
    """
    # Filter data
    filtered_data = pivot_data.copy()
    
    if categories_to_include is not None:
        # Only include specified categories
        filtered_data = filtered_data[filtered_data.index.isin(categories_to_include)]
    
    if categories_to_exclude is not None:
        # Exclude specified categories
        filtered_data = filtered_data[~filtered_data.index.isin(categories_to_exclude)]
    
    # Create text annotations
    text_values = [[f"{val:.2f}%" if not pd.isna(val) else "" 
                    for val in row] 
                   for row in filtered_data.values]
    
    # Create heatmap
    fig = go.Figure(data=go.Heatmap(
        z=filtered_data.values,
        x=filtered_data.columns,
        y=filtered_data.index,
        text=text_values,
        texttemplate="%{text}",
        textfont={"size": 9},
        colorscale=[
            [0, '#006400'],      # Dark green - strong deflation
            [0.3, '#90EE90'],    # Light green - deflation
            [0.45, '#FFFFFF'],   # White - near zero
            [0.55, '#FFE4B5'],   # Light orange/beige - low inflation
            [0.7, '#FF8C00'],    # Dark orange - medium inflation
            [1, '#DC143C']       # Crimson red - high inflation
        ],
        zmid=0,
        colorbar=dict(title="MoM Change (%)"),
        hovertemplate='<b>%{y}</b><br>%{x}<br>MoM: %{z:.2f}%<extra></extra>'
    ))
    
    fig.update_layout(
        title=dict(text=title, font=dict(size=16)),
        xaxis=dict(title="Month", tickangle=-45, tickfont=dict(size=10)),
        yaxis=dict(title="Category", tickfont=dict(size=10)),
        height=max(400, len(filtered_data) * 25),  # Dynamic height based on rows
        width=1400,
        margin=dict(l=300, r=100, t=100, b=100)
    )
    
    return fig

print("‚úì Filtering function created!")
print("\nExample usage:")
print("  # Include only specific divisions")
print("  fig = create_filtered_heatmap(div_pivot, 'Selected Divisions',")
print("      categories_to_include=['Food and beverages', 'Housing'])")
print("  fig.show()")
print("\n  # Exclude specific divisions")
print("  fig = create_filtered_heatmap(div_pivot, 'Excluding CPI General',")
print("      categories_to_exclude=['CPI (General)'])")
print("  fig.show()")

‚úì Filtering function created!

Example usage:
  # Include only specific divisions
  fig = create_filtered_heatmap(div_pivot, 'Selected Divisions',
      categories_to_include=['Food and beverages', 'Housing'])
  fig.show()

  # Exclude specific divisions
  fig = create_filtered_heatmap(div_pivot, 'Excluding CPI General',
      categories_to_exclude=['CPI (General)'])
  fig.show()


In [218]:
# Example: Show available divisions to choose from
print("=" * 80)
print("AVAILABLE DIVISIONS:")
print("=" * 80)
for i, div in enumerate(div_pivot.index, 1):
    print(f"{i}. {div}")
print("=" * 80)

# Example: Create heatmap excluding CPI General (to see individual divisions better)
fig_filtered = create_filtered_heatmap(
    div_pivot, 
    'Division-Level MoM Inflation (Excluding CPI General)',
    categories_to_exclude=['CPI (General)']
)
fig_filtered.show()

AVAILABLE DIVISIONS:
1. CPI (Ex Veggies and Cereals)
2. CPI (Excluding Food and Beverages)
3. CPI (Excluding Paan Tobacco and Intoxicants)
4. CPI (General)
5. CPI Excluding Food and Beverages
6. Clothing and footwear
7. Core (Ex Gold and Silver)
8. Core (Ex Gold)
9. Core CPI
10. Education services
11. Food and beverages
12. Furnishings, household equipment and routine household maintenance
13. Health
14. Housing, water, electricity, gas and other fuels
15. Information and communication
16. Paan, tobacco and intoxicants
17. Personal care, social protection and miscellaneous goods and services
18. Recreation, sport and culture
19. Restaurants and accommodation services
20. Transport


In [219]:
# Example: Focus on specific divisions of interest (e.g., Food, Housing, Transport)
selected_divisions = ['CPI (General)', 'CPI (Excluding Food and Beverages)','Core (Ex Gold and Silver)', 'CPI (Ex Veggies and Cereals)','Core (Ex Gold)', 'Core CPI']

fig_selected = create_filtered_heatmap(
    div_pivot,
    'Key Divisions: Food, Housing & Transport MoM Inflation',
    categories_to_include=selected_divisions #Or Categories to Exclude=selected_divisions to exclude them instead
)
fig_selected.show()

In [220]:
# Export heatmap data to CSV files for each level
div_pivot.to_csv('heatmap_division_mom.csv')
group_pivot.to_csv('heatmap_group_mom.csv')
class_pivot.to_csv('heatmap_class_mom.csv')
subclass_pivot.to_csv('heatmap_subclass_mom.csv')

print("‚úì Heatmap data exported:")
print("  - heatmap_division_mom.csv")
print("  - heatmap_group_mom.csv")
print("  - heatmap_class_mom.csv")
print("  - heatmap_subclass_mom.csv")

‚úì Heatmap data exported:
  - heatmap_division_mom.csv
  - heatmap_group_mom.csv
  - heatmap_class_mom.csv
  - heatmap_subclass_mom.csv


## Part 3: Interactive Bar Charts for YoY Changes

Create bar charts to visualize Year-over-Year inflation trends

In [221]:
# Show available divisions for selection
print("=" * 80)
print("AVAILABLE DIVISIONS FOR BAR CHART:")
print("=" * 80)
divisions_list = division_latest['division'].tolist()
for i, div in enumerate(divisions_list, 1):
    yoy_val = division_latest[division_latest['division'] == div]['yoy_change'].values[0]
    print(f"{i}. {div} (YoY: {yoy_val:.2f}%)")
print("=" * 80)
print("\nSelect divisions for the bar chart in the next cell")

AVAILABLE DIVISIONS FOR BAR CHART:
1. CPI (General) (YoY: 2.74%)
2. Clothing and footwear (YoY: 2.97%)
3. Education services (YoY: 3.35%)
4. Food and beverages (YoY: 2.12%)
5. Furnishings, household equipment and routine household maintenance (YoY: 1.45%)
6. Health (YoY: 2.18%)
7. Housing, water, electricity, gas and other fuels (YoY: 1.53%)
8. Information and communication (YoY: 0.16%)
9. Paan, tobacco and intoxicants (YoY: 2.87%)
10. Personal care, social protection and miscellaneous goods and services (YoY: 19.02%)
11. Recreation, sport and culture (YoY: 2.32%)
12. Restaurants and accommodation services (YoY: 2.86%)
13. Transport (YoY: 0.09%)
14. CPI Excluding Food and Beverages (YoY: 3.12%)
15. CPI (Excluding Food and Beverages) (YoY: 3.14%)
16. Core CPI (YoY: 3.41%)
17. Core (Ex Gold) (YoY: 2.87%)
18. Core (Ex Gold and Silver) (YoY: 1.89%)
19. CPI (Excluding Paan Tobacco and Intoxicants) (YoY: 3.07%)
20. CPI (Ex Veggies and Cereals) (YoY: 3.80%)

Select divisions for the bar chart

In [222]:
# Function to create YoY bar chart with selectable divisions
def create_yoy_bar_chart(divisions_to_include=None, divisions_to_exclude=None, 
                         sort_by='value', title=None):
    """
    Create an interactive bar chart for YoY inflation changes
    
    Parameters:
    divisions_to_include: list of divisions to include (if None, include all)
    divisions_to_exclude: list of divisions to exclude (if None, exclude none)
    sort_by: 'value' (sort by YoY value) or 'name' (alphabetical)
    title: custom title (if None, auto-generate)
    """
    # Filter data
    data = division_latest.copy()
    
    if divisions_to_include is not None:
        data = data[data['division'].isin(divisions_to_include)]
    
    if divisions_to_exclude is not None:
        data = data[~data['division'].isin(divisions_to_exclude)]
    
    # Sort data
    if sort_by == 'value':
        data = data.sort_values('yoy_change', ascending=True)  # Ascending for horizontal bar
    else:
        data = data.sort_values('division')
    
    # Create color based on YoY value
    colors = []
    for val in data['yoy_change']:
        if pd.isna(val):
            colors.append('gray')
        elif val < 0:
            colors.append('#006400')  # Dark green for deflation
        elif val < 2:
            colors.append('#90EE90')  # Light green for low inflation
        elif val < 4:
            colors.append('#FFE4B5')  # Beige for moderate
        elif val < 6:
            colors.append('#FF8C00')  # Orange for medium-high
        else:
            colors.append('#DC143C')  # Red for high inflation
    
    # Create bar chart
    fig = go.Figure(data=[
        go.Bar(
            x=data['yoy_change'],
            y=data['division'],
            orientation='h',
            marker=dict(
                color=colors,
                line=dict(color='black', width=0.5)
            ),
            text=[f"{val:.2f}%" if not pd.isna(val) else "N/A" for val in data['yoy_change']],
            textposition='outside',
            hovertemplate='<b>%{y}</b><br>YoY: %{x:.2f}%<br>Index: %{customdata[0]:.2f}<br>MoM: %{customdata[1]:.2f}%<extra></extra>',
            customdata=data[['index', 'mom_change']].values
        )
    ])
    
    # Auto-generate title if not provided
    if title is None:
        title = f'Year-over-Year Inflation by Division - {latest_date.strftime("%B %Y")}'
    
    fig.update_layout(
        title=dict(text=title, font=dict(size=16, family='Arial Black')),
        xaxis=dict(
            title='YoY Change (%)',
            tickfont=dict(size=11),
            gridcolor='lightgray'
        ),
        yaxis=dict(
            title='',
            tickfont=dict(size=10)
        ),
        height=max(400, len(data) * 30),  # Dynamic height
        width=1200,
        margin=dict(l=250, r=150, t=100, b=80),
        plot_bgcolor='white',
        showlegend=False
    )
    
    # Add zero line
    fig.add_vline(x=0, line_dash="dash", line_color="gray", line_width=1)
    
    return fig

print("‚úì YoY bar chart function created!")
print("\nUsage examples:")
print("  # All divisions")
print("  fig = create_yoy_bar_chart()")
print("  fig.show()")
print("\n  # Selected divisions")
print("  fig = create_yoy_bar_chart(divisions_to_include=['Food and beverages', 'Housing'])")
print("  fig.show()")
print("\n  # Exclude divisions")
print("  fig = create_yoy_bar_chart(divisions_to_exclude=['CPI (General)'])")
print("  fig.show()")

‚úì YoY bar chart function created!

Usage examples:
  # All divisions
  fig = create_yoy_bar_chart()
  fig.show()

  # Selected divisions
  fig = create_yoy_bar_chart(divisions_to_include=['Food and beverages', 'Housing'])
  fig.show()

  # Exclude divisions
  fig = create_yoy_bar_chart(divisions_to_exclude=['CPI (General)'])
  fig.show()


In [223]:
# Example 1: Bar chart with all divisions
fig_all = create_yoy_bar_chart()
fig_all.show()

In [224]:
# Example 2: Select your own divisions
# Modify the list below to include the divisions you want to visualize

selected_divisions_bar = [
'CPI (General)', 'CPI (Excluding Food and Beverages)','Core (Ex Gold and Silver)', 'CPI (Ex Veggies and Cereals)','Core (Ex Gold)', 'Core CPI'
]

fig_selected_bar = create_yoy_bar_chart(
    divisions_to_include=selected_divisions_bar,
    title='YoY Inflation: Selected Key Divisions'
)
fig_selected_bar.show()

## Part 4: Inflation Dispersion Analysis

### Plan for Dispersion Chart & Stacked Area Chart

**Goal:** Visualize how inflation is distributed across different items in the CPI basket, weighted by their importance.

**Data Requirements:**
1. **Item-level inflation data** - Get the most granular level (where `item != '*'`)
2. **Item weights** - Load from `weights_new/items.csv` to understand relative importance
3. **Time series data** - For stacked area chart showing evolution over time
4. **Latest month data** - Focus on current inflation dispersion

**Visualization Approach:**

#### 1. **Bubble Chart (Dispersion)**
   - X-axis: Item name or category
   - Y-axis: YoY inflation rate (%)
   - Bubble size: Weight of item in CPI basket
   - Color: Inflation intensity (using red-green scale)
   - Shows current month dispersion

#### 2. **Stacked Area Chart (Time Evolution)**
   - X-axis: Time (months)
   - Y-axis: Cumulative contribution to overall inflation
   - Areas: Different divisions stacked
   - Shows how different categories contribute to total inflation over time
   - Color-coded by division
   
#### 3. **Distribution Histogram**
   - Show how many items fall into different inflation buckets
   - Weight-adjusted distribution (not just count)
   
**Key Metrics to Display:**
   - Weighted median inflation
   - Weighted standard deviation (measure of dispersion)
   - % of basket experiencing deflation vs high inflation
   - Top contributors to overall inflation
   - Contribution breakdown over time

**Implementation Steps:**
1. Load item weights data
2. Filter for item-level inflation (most granular)
3. Merge inflation data with weights
4. Create interactive bubble chart (dispersion)
5. Create stacked area chart (time evolution of division contributions)
6. Calculate dispersion statistics
7. Create histogram showing distribution

In [225]:
# Step 1: Load item weights data
items_weights = pd.read_csv('../weights_new/items.csv')

print(f"Item weights loaded: {len(items_weights)} items")
print(f"\nColumns: {list(items_weights.columns)}")
print(f"\nSample weights data:")
print(items_weights.head(10))

Item weights loaded: 358 items

Columns: ['Item_Code', 'Item_Name', 'Subclass_Code', 'Weight', 'Include_in_CPI']

Sample weights data:
       Item_Code                       Item_Name Subclass_Code    Weight  \
0  01.1.1.1.1.01                            Rice      01.1.1.1  2.013186   
1  01.1.1.1.1.02                           Wheat      01.1.1.1  0.767549   
2  01.1.1.1.1.03          Jowar and its products      01.1.1.1  0.062233   
3  01.1.1.1.1.04           Ragi and its products      01.1.1.1  0.029888   
4  01.1.1.1.1.05          Bajra and its products      01.1.1.1  0.045496   
5  01.1.1.1.1.06          Maize and its products      01.1.1.1  0.036469   
6  01.1.1.1.1.07  Small millets and its products      01.1.1.1  0.006308   
7  01.1.1.1.1.08         Barley and its products      01.1.1.1  0.005306   
8  01.1.1.1.1.09      Other cereals and products      01.1.1.1  0.030317   
9  01.1.1.1.1.10            Other wheat products      01.1.1.1  0.006537   

   Include_in_CPI  
0       

In [226]:
# Step 2: Filter for item-level data (most granular)
item_level = if_df_analysis[
    (if_df_analysis['state'] == 'All India') &
    (if_df_analysis['sector'] == 'Combined') &
    (if_df_analysis['item'] != '*')
].copy()

print(f"Item-level records: {len(item_level)}")
print(f"Unique items: {item_level['item'].nunique()}")
print(f"Date range: {item_level['date'].min()} to {item_level['date'].max()}")

# Get latest month for dispersion analysis
latest_items = item_level[item_level['date'] == latest_date].copy()
print(f"\nLatest month ({latest_date.strftime('%B %Y')}): {len(latest_items)} items")

Item-level records: 4524
Unique items: 358
Date range: 2025-01-01 00:00:00 to 2026-01-01 00:00:00

Latest month (January 2026): 358 items


In [227]:
# Step 3: Merge inflation data with weights
# The weights file uses Item_Code, so we need to merge on item name
# Rename columns for easier merging
items_weights_clean = items_weights.rename(columns={
    'Item_Name': 'item',
    'Weight': 'weight'
})

# Merge on item name
dispersion_data = latest_items.merge(
    items_weights_clean[['item', 'weight']],
    on='item',
    how='left'
)

print(f"Merged data: {len(dispersion_data)} items")
print(f"Items with weights: {dispersion_data['weight'].notna().sum()}")
print(f"Items without weights: {dispersion_data['weight'].isna().sum()}")

# Check weight distribution
print(f"\nWeight statistics:")
print(dispersion_data['weight'].describe())
print(f"\nTotal weight: {dispersion_data['weight'].sum():.2f}")

Merged data: 358 items
Items with weights: 357
Items without weights: 1

Weight statistics:
count    357.000000
mean       0.279818
std        0.761770
min        0.000359
25%        0.031960
50%        0.096819
75%        0.261506
max       10.884970
Name: weight, dtype: float64

Total weight: 99.89


In [228]:
# Step 4: Calculate dispersion metrics
# Remove items without YoY data or weights
dispersion_clean = dispersion_data[
    dispersion_data['yoy_change'].notna() & 
    dispersion_data['weight'].notna()
].copy()

print(f"Items for dispersion analysis: {len(dispersion_clean)}")

# Calculate weighted metrics
total_weight = dispersion_clean['weight'].sum()
dispersion_clean['weight_normalized'] = dispersion_clean['weight'] / total_weight

# Weighted mean
weighted_mean = (dispersion_clean['yoy_change'] * dispersion_clean['weight_normalized']).sum()

# Weighted median
dispersion_sorted = dispersion_clean.sort_values('yoy_change')
dispersion_sorted['cumulative_weight'] = dispersion_sorted['weight_normalized'].cumsum()
weighted_median_idx = (dispersion_sorted['cumulative_weight'] >= 0.5).idxmax()
weighted_median = dispersion_sorted.loc[weighted_median_idx, 'yoy_change']

# Weighted standard deviation
weighted_variance = ((dispersion_clean['yoy_change'] - weighted_mean) ** 2 * 
                     dispersion_clean['weight_normalized']).sum()
weighted_std = np.sqrt(weighted_variance)

# Basket composition by inflation zones
deflation_weight = dispersion_clean[dispersion_clean['yoy_change'] < 0]['weight'].sum() / total_weight * 100
low_inflation_weight = dispersion_clean[(dispersion_clean['yoy_change'] >= 0) & 
                                        (dispersion_clean['yoy_change'] < 2)]['weight'].sum() / total_weight * 100
moderate_weight = dispersion_clean[(dispersion_clean['yoy_change'] >= 2) & 
                                   (dispersion_clean['yoy_change'] < 4)]['weight'].sum() / total_weight * 100
high_weight = dispersion_clean[(dispersion_clean['yoy_change'] >= 4) & 
                               (dispersion_clean['yoy_change'] < 6)]['weight'].sum() / total_weight * 100
extreme_weight = dispersion_clean[dispersion_clean['yoy_change'] >= 6]['weight'].sum() / total_weight * 100

print("=" * 80)
print(f"DISPERSION METRICS - {latest_date.strftime('%B %Y')}")
print("=" * 80)
print(f"Weighted Mean Inflation: {weighted_mean:.2f}%")
print(f"Weighted Median Inflation: {weighted_median:.2f}%")
print(f"Weighted Std Deviation: {weighted_std:.2f}% (Dispersion measure)")
print(f"\nBASKET COMPOSITION BY INFLATION ZONES:")
print(f"  Deflation (<0%):        {deflation_weight:6.2f}% of basket")
print(f"  Low (0-2%):             {low_inflation_weight:6.2f}% of basket")
print(f"  Moderate (2-4%):        {moderate_weight:6.2f}% of basket")
print(f"  High (4-6%):            {high_weight:6.2f}% of basket")
print(f"  Extreme (>6%):          {extreme_weight:6.2f}% of basket")
print("=" * 80)

Items for dispersion analysis: 227
DISPERSION METRICS - January 2026
Weighted Mean Inflation: 2.59%
Weighted Median Inflation: 1.93%
Weighted Std Deviation: 14.56% (Dispersion measure)

BASKET COMPOSITION BY INFLATION ZONES:
  Deflation (<0%):         14.37% of basket
  Low (0-2%):              40.24% of basket
  Moderate (2-4%):         30.71% of basket
  High (4-6%):              6.23% of basket
  Extreme (>6%):            8.45% of basket


In [229]:
# Step 5: Create Bubble Chart (Dispersion Visualization)
# Sort by division for better organization
dispersion_plot = dispersion_clean.sort_values(['division', 'yoy_change'])

# Assign colors based on YoY inflation
def get_inflation_color(val):
    if pd.isna(val):
        return 'gray'
    elif val < 0:
        return '#006400'  # Dark green
    elif val < 2:
        return '#90EE90'  # Light green
    elif val < 4:
        return '#FFE4B5'  # Beige
    elif val < 6:
        return '#FF8C00'  # Orange
    else:
        return '#DC143C'  # Red

dispersion_plot['color'] = dispersion_plot['yoy_change'].apply(get_inflation_color)

# Create bubble chart
fig_bubble = go.Figure()

# Add trace for each division to allow filtering
for division in dispersion_plot['division'].unique():
    div_data = dispersion_plot[dispersion_plot['division'] == division]
    
    fig_bubble.add_trace(go.Scatter(
        x=div_data.index,
        y=div_data['yoy_change'],
        mode='markers',
        name=division,
        marker=dict(
            size=div_data['weight'] * 50,  # Scale up for visibility
            color=div_data['color'],
            line=dict(color='black', width=0.5),
            opacity=0.7
        ),
        text=div_data['item'],
        customdata=np.column_stack((
            div_data['weight'],
            div_data['mom_change'],
            div_data['index']
        )),
        hovertemplate='<b>%{text}</b><br>' +
                      'Division: ' + division + '<br>' +
                      'YoY: %{y:.2f}%<br>' +
                      'MoM: %{customdata[1]:.2f}%<br>' +
                      'Weight: %{customdata[0]:.4f}<br>' +
                      'Index: %{customdata[2]:.2f}<extra></extra>'
    ))

fig_bubble.update_layout(
    title=dict(
        text=f"Inflation Dispersion: Item-Level Analysis - {latest_date.strftime('%B %Y')}<br>" +
             f"<sub>Bubble size = Item weight | Color = Inflation intensity | " +
             f"Weighted Std Dev: {weighted_std:.2f}%</sub>",
        font=dict(size=16, family='Arial Black')
    ),
    xaxis=dict(
        title='Items (grouped by division)',
        showticklabels=False,
        gridcolor='lightgray'
    ),
    yaxis=dict(
        title='YoY Inflation (%)',
        gridcolor='lightgray',
        zeroline=True,
        zerolinecolor='black',
        zerolinewidth=2
    ),
    height=700,
    width=1400,
    plot_bgcolor='white',
    hovermode='closest',
    showlegend=True,
    legend=dict(
        title="Division<br>(Click to show/hide)",
        yanchor="top",
        y=1,
        xanchor="left",
        x=1.02
    )
)

# Add reference lines for inflation zones
fig_bubble.add_hline(y=0, line_dash="dash", line_color="green", line_width=1, 
                     annotation_text="0%", annotation_position="right")
fig_bubble.add_hline(y=2, line_dash="dot", line_color="orange", line_width=1,
                     annotation_text="2%", annotation_position="right")
fig_bubble.add_hline(y=4, line_dash="dot", line_color="red", line_width=1,
                     annotation_text="4%", annotation_position="right")
fig_bubble.add_hline(y=weighted_median, line_dash="solid", line_color="blue", line_width=2,
                     annotation_text=f"Weighted Median: {weighted_median:.2f}%", 
                     annotation_position="left")

print("‚úì Bubble chart created!")
print("\nINTERACTION:")
print("  - Click on division names in legend to show/hide")
print("  - Hover over bubbles for detailed item information")
print("  - Larger bubbles = higher weight in CPI basket")

fig_bubble.show()

‚úì Bubble chart created!

INTERACTION:
  - Click on division names in legend to show/hide
  - Hover over bubbles for detailed item information
  - Larger bubbles = higher weight in CPI basket


In [230]:
# Step 6: Create Stacked Area Chart (Item-Level Contributions Over Time)
# Merge item weights with time series data
item_with_weights = item_level.merge(
    items_weights_clean[['item', 'weight']],
    on='item',
    how='left'
)

# Remove items without weights
item_with_weights = item_with_weights[item_with_weights['weight'].notna()].copy()

# Calculate contribution (MoM inflation √ó weight)
# This shows how much each item contributes to overall MoM change
item_with_weights['contribution'] = item_with_weights['mom_change'] * item_with_weights['weight'] / 100

print(f"Item-level time series prepared:")
print(f"  Total records: {len(item_with_weights)}")
print(f"  Unique items: {item_with_weights['item'].nunique()}")
print(f"  Date range: {item_with_weights['date'].min()} to {item_with_weights['date'].max()}")

# Since we have many items, let's show top contributors
# Get items with highest average absolute contribution
item_avg_contribution = item_with_weights.groupby('item')['contribution'].agg(
    lambda x: abs(x).mean()
).sort_values(ascending=False)

# Select top 20 items by average contribution
top_items = item_avg_contribution.head(20).index.tolist()

print(f"\nTop 20 items by average contribution (will be shown in stacked chart):")
for i, item in enumerate(top_items, 1):
    avg_contrib = item_avg_contribution[item]
    print(f"  {i}. {item[:50]:<50} (Avg: {avg_contrib:.4f})")

# Filter data for top items
top_items_data = item_with_weights[item_with_weights['item'].isin(top_items)].copy()

# Prepare data for stacked area chart
stacked_data = top_items_data.pivot_table(
    index='date',
    columns='item',
    values='contribution',
    aggfunc='first'
).fillna(0)

print(f"\nStacked area chart data shape: {stacked_data.shape}")
print(f"  Months: {len(stacked_data)}")
print(f"  Items: {len(stacked_data.columns)}")

Item-level time series prepared:
  Total records: 4511
  Unique items: 357
  Date range: 2025-01-01 00:00:00 to 2026-01-01 00:00:00

Top 20 items by average contribution (will be shown in stacked chart):
  1. Tomato                                             (Avg: 0.0921)
  2. Potato                                             (Avg: 0.0430)
  3. Apple                                              (Avg: 0.0408)
  4. Onion                                              (Avg: 0.0408)
  5. Chicken                                            (Avg: 0.0378)
  6. Cauliflower                                        (Avg: 0.0344)
  7. Mango                                              (Avg: 0.0342)
  8. Garlic                                             (Avg: 0.0300)
  9. Silver jewellery                                   (Avg: 0.0272)
  10. Palak and other leafy vegetables                   (Avg: 0.0223)
  11. Peas                                               (Avg: 0.0218)
  12. Lady's finger     

In [231]:
# Create stacked area chart for top items
fig_stacked = go.Figure()

# Define color palette for items
colors = px.colors.qualitative.Plotly + px.colors.qualitative.Set2 + px.colors.qualitative.Pastel + px.colors.qualitative.Bold

# Add area trace for each item
for i, item in enumerate(stacked_data.columns):
    # Truncate long item names for legend
    display_name = item[:40] + '...' if len(item) > 40 else item
    
    fig_stacked.add_trace(go.Scatter(
        x=stacked_data.index,
        y=stacked_data[item],
        mode='lines',
        name=display_name,
        stackgroup='one',  # This creates the stacked effect
        fillcolor=colors[i % len(colors)],
        line=dict(width=0.5, color=colors[i % len(colors)]),
        hovertemplate='<b>' + item + '</b><br>' +
                      'Date: %{x|%b %Y}<br>' +
                      'Contribution: %{y:.4f}<br>' +
                      '<extra></extra>'
    ))

fig_stacked.update_layout(
    title=dict(
        text='Top 20 Item Contributions to Overall Inflation Over Time<br>' +
             '<sub>Stacked area shows cumulative contribution (MoM% √ó Weight) | Top items by average contribution</sub>',
        font=dict(size=16, family='Arial Black')
    ),
    xaxis=dict(
        title='Month',
        tickformat='%b %Y',
        tickangle=-45,
        gridcolor='lightgray'
    ),
    yaxis=dict(
        title='Cumulative Contribution to CPI Inflation',
        gridcolor='lightgray'
    ),
    height=700,
    width=1400,
    plot_bgcolor='white',
    hovermode='x unified',
    showlegend=True,
    legend=dict(
        title="Item<br>(Click to show/hide)",
        yanchor="top",
        y=1,
        xanchor="left",
        x=1.02,
        bgcolor='rgba(255,255,255,0.8)',
        font=dict(size=9)
    )
)

print("=" * 80)
print("STACKED AREA CHART INTERACTION:")
print("=" * 80)
print("1. Hover over chart to see breakdown by item at each time point")
print("2. Click item names in legend to show/hide specific items")
print("3. Total height shows cumulative contribution from top 20 items")
print("4. Wider sections = larger contribution to overall inflation")
print("5. Showing top 20 items by average absolute contribution")
print("=" * 80)

fig_stacked.show()

STACKED AREA CHART INTERACTION:
1. Hover over chart to see breakdown by item at each time point
2. Click item names in legend to show/hide specific items
3. Total height shows cumulative contribution from top 20 items
4. Wider sections = larger contribution to overall inflation
5. Showing top 20 items by average absolute contribution


In [232]:
# Step 7: Create Distribution Histogram (Inflation Buckets)
# Create inflation buckets
bins = [-20, -2, 0, 2, 4, 6, 8, 10, 20]
labels = ['<-2%', '-2-0%', '0-2%', '2-4%', '4-6%', '6-8%', '8-10%', '>10%']

dispersion_clean['inflation_bucket'] = pd.cut(
    dispersion_clean['yoy_change'], 
    bins=bins, 
    labels=labels,
    include_lowest=True
)

# Calculate count and weight for each bucket
bucket_stats = dispersion_clean.groupby('inflation_bucket', observed=True).agg({
    'item': 'count',
    'weight': 'sum'
}).reset_index()

bucket_stats.columns = ['Inflation Range', 'Item Count', 'Total Weight']
bucket_stats['Weight %'] = bucket_stats['Total Weight'] / bucket_stats['Total Weight'].sum() * 100

print("=" * 80)
print("INFLATION DISTRIBUTION:")
print("=" * 80)
print(bucket_stats.to_string(index=False))
print("=" * 80)

# Create histogram
fig_hist = go.Figure()

# Add item count bars
fig_hist.add_trace(go.Bar(
    x=bucket_stats['Inflation Range'],
    y=bucket_stats['Item Count'],
    name='Item Count',
    marker_color='lightblue',
    yaxis='y',
    hovertemplate='Range: %{x}<br>Items: %{y}<extra></extra>'
))

# Add weight percentage bars
fig_hist.add_trace(go.Bar(
    x=bucket_stats['Inflation Range'],
    y=bucket_stats['Weight %'],
    name='Weight %',
    marker_color='coral',
    yaxis='y2',
    hovertemplate='Range: %{x}<br>Weight: %{y:.2f}%<extra></extra>'
))

fig_hist.update_layout(
    title=dict(
        text=f'Inflation Distribution Across Items - {latest_date.strftime("%B %Y")}<br>' +
             '<sub>Blue = Item count | Coral = Basket weight (%)</sub>',
        font=dict(size=16, family='Arial Black')
    ),
    xaxis=dict(
        title='Inflation Range (YoY %)',
        tickangle=-45
    ),
    yaxis=dict(
        title='Number of Items',
        side='left',
        gridcolor='lightgray'
    ),
    yaxis2=dict(
        title='Basket Weight (%)',
        side='right',
        overlaying='y',
        gridcolor='lightgray'
    ),
    height=600,
    width=1200,
    plot_bgcolor='white',
    barmode='group',
    legend=dict(
        x=0.7,
        y=1.1,
        orientation='h'
    )
)

print("\n‚úì Distribution histogram created!")
print("\nINTERPRETATION:")
print("  - Blue bars show how many items fall in each inflation range")
print("  - Coral bars show what % of the CPI basket is in each range")
print("  - Different heights indicate weight concentration vs item distribution")

fig_hist.show()

INFLATION DISTRIBUTION:
Inflation Range  Item Count  Total Weight  Weight %
           <-2%          16      3.219060  4.936050
          -2-0%          24      4.400215  6.747213
           0-2%          63     27.863441 42.725312
           2-4%          84     21.283905 32.636368
           4-6%          17      4.318281  6.621576
           6-8%           4      1.513060  2.320100
          8-10%           2      0.230687  0.353731
           >10%           9      2.386652  3.659651

‚úì Distribution histogram created!

INTERPRETATION:
  - Blue bars show how many items fall in each inflation range
  - Coral bars show what % of the CPI basket is in each range
  - Different heights indicate weight concentration vs item distribution


In [233]:
# Step 8: Top Contributors Analysis
# Calculate contribution score (YoY √ó Weight)
dispersion_clean['contribution_score'] = dispersion_clean['yoy_change'] * dispersion_clean['weight']

# Get top 20 contributors (positive and negative)
top_contributors = dispersion_clean.nlargest(20, 'contribution_score')[
    ['division', 'group', 'class', 'item', 'yoy_change', 'mom_change', 'weight', 'contribution_score']
].copy()

bottom_contributors = dispersion_clean.nsmallest(20, 'contribution_score')[
    ['division', 'group', 'class', 'item', 'yoy_change', 'mom_change', 'weight', 'contribution_score']
].copy()

print("=" * 100)
print(f"TOP 20 INFLATION CONTRIBUTORS - {latest_date.strftime('%B %Y')}")
print("=" * 100)
print(f"{'Item':<40} {'Division':<20} {'YoY%':>8} {'Weight':>8} {'Score':>10}")
print("-" * 100)
for idx, row in top_contributors.iterrows():
    print(f"{row['item'][:38]:<40} {row['division'][:18]:<20} {row['yoy_change']:>7.2f}% {row['weight']:>8.4f} {row['contribution_score']:>10.4f}")
print("=" * 100)

print("\n" + "=" * 100)
print(f"TOP 20 DEFLATION CONTRIBUTORS (Reducing Inflation) - {latest_date.strftime('%B %Y')}")
print("=" * 100)
print(f"{'Item':<40} {'Division':<20} {'YoY%':>8} {'Weight':>8} {'Score':>10}")
print("-" * 100)
for idx, row in bottom_contributors.iterrows():
    print(f"{row['item'][:38]:<40} {row['division'][:18]:<20} {row['yoy_change']:>7.2f}% {row['weight']:>8.4f} {row['contribution_score']:>10.4f}")
print("=" * 100)

# Export to CSV
top_contributors.to_csv('top_inflation_contributors.csv', index=False)
bottom_contributors.to_csv('top_deflation_contributors.csv', index=False)
print("\n‚úì Contributor analysis exported to CSV files")

TOP 20 INFLATION CONTRIBUTORS - January 2026
Item                                     Division                 YoY%   Weight      Score
----------------------------------------------------------------------------------------------------
Silver jewellery                         Personal care, soc    159.68%   0.3127    49.9231
Tomato                                   Food and beverages     64.79%   0.4961    32.1403
Gold /diamond /platinum jewellery        Personal care, soc     46.77%   0.6230    29.1361
House Rent                               Housing, water, el      1.93%  10.8850    21.0285
Chicken                                  Food and beverages     10.42%   1.5752    16.4191
Milk: liquid                             Food and beverages      2.88%   5.2552    15.1228
Brinjal                                  Food and beverages     25.14%   0.2912     7.3210
Fish and prawn                           Food and beverages      6.74%   1.0223     6.8943
Green chillies                     

In [234]:
# Summary: Key Insights from Dispersion Analysis
print("=" * 100)
print(f"DISPERSION ANALYSIS SUMMARY - {latest_date.strftime('%B %Y')}")
print("=" * 100)
print(f"\nüìä OVERALL STATISTICS:")
print(f"   Total items analyzed: {len(dispersion_clean)}")
print(f"   Weighted Mean Inflation: {weighted_mean:.2f}%")
print(f"   Weighted Median Inflation: {weighted_median:.2f}%")
print(f"   Dispersion (Weighted Std Dev): {weighted_std:.2f}%")
print(f"\nüéØ BASKET COMPOSITION:")
print(f"   Deflation (<0%):       {deflation_weight:6.2f}% of basket")
print(f"   Low (0-2%):            {low_inflation_weight:6.2f}% of basket")
print(f"   Moderate (2-4%):       {moderate_weight:6.2f}% of basket")
print(f"   High (4-6%):           {high_weight:6.2f}% of basket")
print(f"   Extreme (>6%):         {extreme_weight:6.2f}% of basket")
print(f"\nüìà VISUALIZATIONS CREATED:")
print(f"   1. Bubble Chart - Shows dispersion across all items")
print(f"   2. Stacked Area Chart - Division contributions over time")
print(f"   3. Distribution Histogram - Item count vs basket weight by inflation range")
print(f"\nüíæ EXPORTS:")
print(f"   - top_inflation_contributors.csv")
print(f"   - top_deflation_contributors.csv")
print("=" * 100)

# Interpretation guide
if weighted_std < 2:
    dispersion_level = "LOW - Inflation is concentrated"
elif weighted_std < 4:
    dispersion_level = "MODERATE - Mixed inflation patterns"
else:
    dispersion_level = "HIGH - Wide variation across items"

print(f"\nüí° INTERPRETATION:")
print(f"   Dispersion Level: {dispersion_level}")
print(f"   Mean vs Median: {abs(weighted_mean - weighted_median):.2f}% difference")
if abs(weighted_mean - weighted_median) > 1:
    print(f"   ‚Üí Inflation distribution is SKEWED (few extreme items pulling average)")
else:
    print(f"   ‚Üí Inflation distribution is BALANCED (mean ‚âà median)")

DISPERSION ANALYSIS SUMMARY - January 2026

üìä OVERALL STATISTICS:
   Total items analyzed: 227
   Weighted Mean Inflation: 2.59%
   Weighted Median Inflation: 1.93%
   Dispersion (Weighted Std Dev): 14.56%

üéØ BASKET COMPOSITION:
   Deflation (<0%):        14.37% of basket
   Low (0-2%):             40.24% of basket
   Moderate (2-4%):        30.71% of basket
   High (4-6%):             6.23% of basket
   Extreme (>6%):           8.45% of basket

üìà VISUALIZATIONS CREATED:
   1. Bubble Chart - Shows dispersion across all items
   2. Stacked Area Chart - Division contributions over time
   3. Distribution Histogram - Item count vs basket weight by inflation range

üíæ EXPORTS:
   - top_inflation_contributors.csv
   - top_deflation_contributors.csv

üí° INTERPRETATION:
   Dispersion Level: HIGH - Wide variation across items
   Mean vs Median: 0.66% difference
   ‚Üí Inflation distribution is BALANCED (mean ‚âà median)
