In [130]:
import pandas as pandas
import matplotlib.pyplot as plt


In [131]:
#Extracting all the weights and saving them

item_weights_df = pandas.read_excel('CPI_2024_Weights.xlsx', sheet_name='5.3d', header=3 )
state_wise_weights_df = pandas.read_excel('CPI_2024_Weights.xlsx', sheet_name='5.3c', header=5 )
group_wise_weights_df = pandas.read_excel('CPI_2024_Weights.xlsx', sheet_name='5.3b', header=5 )

In [132]:
print(item_weights_df.columns)

Index(['State', 'State Name', 'Sector*', 'Item Name ', 'Item Code',
       'Subclass Code', 'Subclass Name', 'Class Code', 'Class Name',
       'Group Code', 'Group Name', 'Division Code', 'Division Name',
       'Share in All India**', 'Share within State***'],
      dtype='object')


In [133]:
# Normalize column names - strip whitespace and special characters
item_weights_df.columns = item_weights_df.columns.str.strip().str.replace('*', '', regex=False).str.replace(' ', '_')
state_wise_weights_df.columns = state_wise_weights_df.columns.str.strip().str.replace('*', '', regex=False).str.replace(' ', '_')
group_wise_weights_df.columns = group_wise_weights_df.columns.str.strip().str.replace('*', '', regex=False).str.replace(' ', '_')

print("Normalized columns for item_weights_df:")
print(item_weights_df.columns.tolist())


Normalized columns for item_weights_df:
['State', 'State_Name', 'Sector', 'Item_Name', 'Item_Code', 'Subclass_Code', 'Subclass_Name', 'Class_Code', 'Class_Name', 'Group_Code', 'Group_Name', 'Division_Code', 'Division_Name', 'Share_in_All_India', 'Share_within_State']


In [134]:
# Group by Item Code - SUM the shares across all states to get aggregated index
item_grouped = item_weights_df.groupby('Item_Code').agg({
    'Item_Name': 'first',
    'Share_in_All_India': 'sum',  # Sum shares across all states for aggregated index
    'State': lambda x: x.unique().tolist()  # List of all states
}).reset_index()

item_grouped.columns = ['Item_Code', 'Item_Name', 'Aggregated_Index', 'States']
print("Item Level Grouping:")
print(item_grouped.head())
print(f"\nTotal unique items: {len(item_grouped)}")


Item Level Grouping:
       Item_Code               Item_Name  Aggregated_Index  \
0  01.1.1.1.1.01                    Rice          2.013186   
1  01.1.1.1.1.02                   Wheat          0.767549   
2  01.1.1.1.1.03  Jowar and its products          0.062233   
3  01.1.1.1.1.04   Ragi and its products          0.029888   
4  01.1.1.1.1.05  Bajra and its products          0.045496   

                                              States  
0  [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ...  
1  [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ...  
2  [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ...  
3  [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ...  
4  [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ...  

Total unique items: 358


In [135]:
# Group by Subclass Code - SUM the shares
subclass_grouped = item_weights_df.groupby('Subclass_Code').agg({
    'Subclass_Name': 'first',
    'Item_Code': lambda x: x.unique().tolist(),  # Items under this subclass
    'Share_in_All_India': 'sum',  # Sum shares across all states
    'State': lambda x: x.unique().tolist()  # States with this subclass
}).reset_index()

subclass_grouped.columns = ['Subclass_Code', 'Subclass_Name', 'Items', 'Aggregated_Index', 'States']
print("Subclass Level Grouping:")
print(subclass_grouped.head())
print(f"\nTotal unique subclasses: {len(subclass_grouped)}")


Subclass Level Grouping:
  Subclass_Code                                      Subclass_Name  \
0      01.1.1.1                                       Cereals (ND)   
1      01.1.1.2                              Flour of cereals (ND)   
2      01.1.1.3                     Bread and bakery products (ND)   
3      01.1.1.4                             Breakfast cereals (ND)   
4      01.1.1.5  Macaroni, noodles, couscous and similar pasta ...   

                                               Items  Aggregated_Index  \
0  [01.1.1.1.1.01, 01.1.1.1.1.02, 01.1.1.1.1.03, ...          3.023830   
1      [01.1.1.2.1.01, 01.1.1.2.1.02, 01.1.1.2.1.03]          0.812152   
2  [01.1.1.3.1.01, 01.1.1.3.1.02, 01.1.1.3.1.03, ...          1.479720   
3      [01.1.1.4.1.01, 01.1.1.4.1.03, 01.1.1.4.1.02]          0.194872   
4                     [01.1.1.5.1.01, 01.1.1.5.1.02]          0.260873   

                                              States  
0  [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ...  

In [136]:
# Group by Class Code - SUM the shares
class_grouped = item_weights_df.groupby('Class_Code').agg({
    'Class_Name': 'first',
    'Subclass_Code': lambda x: x.unique().tolist(),  # Subclasses under this class
    'Share_in_All_India': 'sum',  # Sum shares across all states
    'State': lambda x: x.unique().tolist()  # States with this class
}).reset_index()

class_grouped.columns = ['Class_Code', 'Class_Name', 'Subclasses', 'Aggregated_Index', 'States']
print("Class Level Grouping:")
print(class_grouped.head())
print(f"\nTotal unique classes: {len(class_grouped)}")


Class Level Grouping:
  Class_Code                                Class_Name  \
0     01.1.1          Cereals and cereal products (ND)   
1     01.1.2       Meat, fresh, chilled or frozen (ND)   
2     01.1.3               Fish and other seafood (ND)   
3     01.1.4  Milk, other dairy products and eggs (ND)   
4     01.1.5                        Oils and fats (ND)   

                                          Subclasses  Aggregated_Index  \
0  [01.1.1.1, 01.1.1.2, 01.1.1.3, 01.1.1.4, 01.1....          5.945631   
1                                         [01.1.2.2]          2.515552   
2                                         [01.1.3.1]          1.022319   
3  [01.1.4.1, 01.1.4.3, 01.1.4.5, 01.1.4.6, 01.1....          7.282137   
4                     [01.1.5.1, 01.1.5.2, 01.1.5.3]          2.742921   

                                              States  
0  [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ...  
1  [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ...  
2  [1.0, 2.0, 3.0, 

In [137]:
# Group by Group Code - SUM the shares
group_grouped = item_weights_df.groupby('Group_Code').agg({
    'Group_Name': 'first',
    'Class_Code': lambda x: x.unique().tolist(),  # Classes under this group
    'Share_in_All_India': 'sum',  # Sum shares across all states
    'State': lambda x: x.unique().tolist()  # States with this group
}).reset_index()

group_grouped.columns = ['Group_Code', 'Group_Name', 'Classes', 'Aggregated_Index', 'States']
print("Group Level Grouping:")
print(group_grouped.head())
print(f"\nTotal unique groups: {len(group_grouped)}")


Group Level Grouping:
   Group_Code                                      Group_Name  \
0         1.1                                            Food   
1         1.2                                       Beverages   
2         1.3  Services for processing primary goods for food   
3         2.1                             Alcoholic beverages   
4         2.3                                Paan and tobacco   

                                             Classes  Aggregated_Index  \
0  [01.1.1, 01.1.2, 01.1.3, 01.1.4, 01.1.5, 01.1....         34.777703   
1  [01.2.1, 01.2.2, 01.2.3, 01.2.9, 01.2.4, 01.2....          1.772434   
2                                           [01.3.0]          0.202970   
3                   [02.1.1, 02.1.3, 02.1.9, 02.1.2]          1.445843   
4                                           [02.3.0]          1.543608   

                                              States  
0  [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ...  
1  [1.0, 2.0, 3.0, 4.0, 5.0, 6.0

In [138]:
# Group by Division Code - SUM the shares
division_grouped = item_weights_df.groupby('Division_Code').agg({
    'Division_Name': 'first',
    'Group_Code': lambda x: x.unique().tolist(),  # Groups under this division
    'Share_in_All_India': 'sum',  # Sum shares across all states
    'State': lambda x: x.unique().tolist()  # States with this division
}).reset_index()

division_grouped.columns = ['Division_Code', 'Division_Name', 'Groups', 'Aggregated_Index', 'States']
print("Division Level Grouping:")
print(division_grouped.head())
print(f"\nTotal unique divisions: {len(division_grouped)}")


Division Level Grouping:
   Division_Code                                      Division_Name  \
0            1.0                                 Food and beverages   
1            2.0                      Paan, tobacco and intoxicants   
2            3.0                              Clothing and footwear   
3            4.0   Housing, water, electricity, gas and other fuels   
4            5.0  Furnishings, household equipment and routine h...   

                           Groups  Aggregated_Index  \
0                 [1.1, 1.2, 1.3]         36.753106   
1                      [2.1, 2.3]          2.989451   
2                      [3.1, 3.2]          6.383257   
3            [4.1, 4.3, 4.4, 4.5]         17.664573   
4  [5.1, 5.2, 5.3, 5.4, 5.5, 5.6]          4.469393   

                                              States  
0  [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ...  
1  [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ...  
2  [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ...

In [139]:
# ===== EXPORT HIERARCHY TO CSV AND JSON =====
import os
import sys

# Add current directory to path to import export_hierarchy module
sys.path.insert(0, os.getcwd())

from export_hierarchy import CPIHierarchyExporter

# Create weights directory if it doesn't exist
weights_dir = 'weights'
if not os.path.exists(weights_dir):
    os.makedirs(weights_dir)

# Initialize exporter and export all data
exporter = CPIHierarchyExporter(output_dir=weights_dir)

# Export all hierarchy levels to CSV and JSON
summary = exporter.export_all(
    item_weights_df=item_weights_df,
    division_grouped=division_grouped,
    group_grouped=group_grouped,
    class_grouped=class_grouped,
    subclass_grouped=subclass_grouped,
    item_grouped=item_grouped
)

print("\n✓ Export completed successfully!")
print(f"\nFiles created in '{weights_dir}/' directory:")
print(f"  CSV Files:")
print(f"    - divisions.csv ({summary['divisions']} rows)")
print(f"    - groups.csv ({summary['groups']} rows)")
print(f"    - classes.csv ({summary['classes']} rows)")
print(f"    - subclasses.csv ({summary['subclasses']} rows)")
print(f"    - items.csv ({summary['items']} rows)")
print(f"  JSON File:")
print(f"    - cpi_hierarchy.json (complete 5-level hierarchy)")



EXPORTING CSV FILES
✓ divisions.csv - 12 rows
✓ groups.csv - 43 rows
✓ classes.csv - 92 rows
✓ groups.csv - 43 rows
✓ classes.csv - 92 rows
✓ subclasses.csv - 162 rows
✓ subclasses.csv - 162 rows
✓ items.csv - 358 rows

EXPORTING JSON FILE
✓ items.csv - 358 rows

EXPORTING JSON FILE
✓ cpi_hierarchy.json - 12 divisions, 92832 items

EXPORT SUMMARY

CSV Files:
  • divisions.csv: 12 divisions
  • groups.csv: 43 groups
  • classes.csv: 92 classes
  • subclasses.csv: 162 subclasses
  • items.csv: 358 items

JSON File:
  • cpi_hierarchy.json: Complete hierarchical structure
    - Divisions: 12
    - Total Weight: 100.00

Output Directory: /Users/nakshatragupta/Documents/Coding/inflation-2024-Series/weights

✓ Export completed successfully!

Files created in 'weights/' directory:
  CSV Files:
    - divisions.csv (12 rows)
    - groups.csv (43 rows)
    - classes.csv (92 rows)
    - subclasses.csv (162 rows)
    - items.csv (358 rows)
  JSON File:
    - cpi_hierarchy.json (complete 5-level hi

In [140]:
# ===== INDEX SIMULATION USING LASPEYRES METHOD =====
# Base Year: 2024 (Index = 100)
# Using 2024 weights (Aggregated_Index) as fixed weights for Laspeyres index

import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# Parameters
base_year = 2024
num_periods = 24  # Simulate 24 months (2 years from base)
months = pandas.date_range(start='2024-01', periods=num_periods, freq='M')

# Simulate monthly inflation rates for each item (in percentage)
# Using realistic ranges: 0.1% to 1.5% monthly inflation
inflation_rates = np.random.uniform(0.1, 1.5, size=len(item_grouped))

# Create a dataframe to store price relatives for each item over time
# Price relative = (Current Price / Base Price) * 100
price_relatives = pandas.DataFrame()
price_relatives['Item_Code'] = item_grouped['Item_Code'].values
price_relatives['Item_Name'] = item_grouped['Item_Name'].values
price_relatives['Weight'] = item_grouped['Aggregated_Index'].values

# Generate price relatives for each month
# Starting from 100 (base) and compounding monthly inflation
for i, month in enumerate(months):
    cumulative_inflation = (1 + inflation_rates / 100) ** (i + 1)
    price_relatives[f'Price_Relative_{month.strftime("%Y-%m")}'] = cumulative_inflation * 100

print("Sample Price Relatives for Items (first 5 items):")
print(price_relatives.iloc[:5, :7])


Sample Price Relatives for Items (first 5 items):
       Item_Code               Item_Name    Weight  Price_Relative_2024-01  \
0  01.1.1.1.1.01                    Rice  2.013186              100.624356   
1  01.1.1.1.1.02                   Wheat  0.767549              101.431000   
2  01.1.1.1.1.03  Jowar and its products  0.062233              101.124792   
3  01.1.1.1.1.04   Ragi and its products  0.029888              100.938122   
4  01.1.1.1.1.05  Bajra and its products  0.045496              100.318426   

   Price_Relative_2024-02  Price_Relative_2024-03  Price_Relative_2024-04  
0              101.252611              101.884787              102.520911  
1              102.882478              104.354726              105.848042  
2              102.262235              103.412472              104.575646  
3              101.885044              102.840850              103.805623  
4              100.637866              100.958323              101.279801  


In [141]:
# Calculate Laspeyres Index for All India
# Laspeyres = (Sum of (Price Relative * Weight)) / (Sum of Weights) * 100

laspeyres_indices = []
months_list = []

for i, month in enumerate(months):
    col_name = f'Price_Relative_{month.strftime("%Y-%m")}'
    
    # Laspeyres formula: Sum(Price_Relative * Weight) / Sum(Weights) * 100
    weighted_sum = (price_relatives['Price_Relative_' + month.strftime("%Y-%m")] * price_relatives['Weight']).sum()
    total_weight = price_relatives['Weight'].sum()
    laspeyres_index = (weighted_sum / total_weight)
    
    laspeyres_indices.append(laspeyres_index)
    months_list.append(month)

# Create All India Index DataFrame
all_india_index = pandas.DataFrame({
    'Month': months_list,
    'Month_Str': [m.strftime("%Y-%m") for m in months_list],
    'Laspeyres_Index': laspeyres_indices,
    'YoY_Change_%': [((laspeyres_indices[i] - 100) / 100 * 100) if i == 0 else 
                      ((laspeyres_indices[i] - 100) / 100 * 100) for i in range(len(laspeyres_indices))]
})

print("All India Laspeyres Index (2024=100):")
print(all_india_index.head(10))
print("\n...")
print(all_india_index.tail(5))


All India Laspeyres Index (2024=100):
       Month Month_Str  Laspeyres_Index  YoY_Change_%
0 2024-01-31   2024-01       100.806244      0.806244
1 2024-02-29   2024-02       101.620547      1.620547
2 2024-03-31   2024-03       102.442998      2.442998
3 2024-04-30   2024-04       103.273688      3.273688
4 2024-05-31   2024-05       104.112707      4.112707
5 2024-06-30   2024-06       104.960148      4.960148
6 2024-07-31   2024-07       105.816103      5.816103
7 2024-08-31   2024-08       106.680667      6.680667
8 2024-09-30   2024-09       107.553935      7.553935
9 2024-10-31   2024-10       108.436005      8.436005

...
        Month Month_Str  Laspeyres_Index  YoY_Change_%
19 2025-08-31   2025-08       117.762789     17.762789
20 2025-09-30   2025-09       118.748368     18.748368
21 2025-10-31   2025-10       119.743997     19.743997
22 2025-11-30   2025-11       120.749789     20.749789
23 2025-12-31   2025-12       121.765858     21.765858


In [142]:
# Calculate Laspeyres Indices at different hierarchy levels

# 1. Division Level Indices
division_indices = []
for _, division_row in division_grouped.iterrows():
    division_code = division_row['Division_Code']
    division_name = division_row['Division_Name']
    division_weight = division_row['Aggregated_Index']
    
    # Get all items in this division
    items_in_division = item_weights_df[item_weights_df['Division_Code'] == division_code]
    item_codes = items_in_division['Item_Code'].unique()
    
    # Calculate indices for this division
    division_monthly_indices = []
    for i, month in enumerate(months):
        col_name = f'Price_Relative_{month.strftime("%Y-%m")}'
        weighted_sum = price_relatives[price_relatives['Item_Code'].isin(item_codes)][col_name].mul(
            price_relatives[price_relatives['Item_Code'].isin(item_codes)]['Weight']
        ).sum()
        total_weight = price_relatives[price_relatives['Item_Code'].isin(item_codes)]['Weight'].sum()
        
        if total_weight > 0:
            laspeyres = (weighted_sum / total_weight)
        else:
            laspeyres = 100
        division_monthly_indices.append(laspeyres)
    
    division_indices.append({
        'Division_Code': division_code,
        'Division_Name': division_name,
        'Weight': division_weight,
        'Monthly_Indices': division_monthly_indices
    })

print("Division Level Indices (Sample - First Division):")
print(f"Division: {division_indices[0]['Division_Name']}")
print(f"Indices: {division_indices[0]['Monthly_Indices'][:6]}")


Division Level Indices (Sample - First Division):
Division: Food and beverages
Indices: [100.66314810541772, 101.33240400080173, 102.00783429543894, 102.68950639266822, 103.37748849984779, 104.07184963845172]


In [143]:
# Create comprehensive index output files

# 1. Save All India Index
all_india_index.to_excel(f'{weights_dir}/all_india_laspeyres_index.xlsx', index=False)

# 2. Create Division Level Indices DataFrame
division_indices_df = pandas.DataFrame()
for div in division_indices:
    row_data = {
        'Division_Code': div['Division_Code'],
        'Division_Name': div['Division_Name'],
        'Weight': div['Weight']
    }
    for j, month in enumerate(months):
        row_data[months[j].strftime("%Y-%m")] = div['Monthly_Indices'][j]
    division_indices_df = pandas.concat([division_indices_df, pandas.DataFrame([row_data])], ignore_index=True)

division_indices_df.to_excel(f'{weights_dir}/division_laspeyres_indices.xlsx', index=False)

# 3. Item level indices
item_indices_df = price_relatives.copy()
item_indices_df['Weight'] = item_indices_df['Weight']

item_indices_df.to_excel(f'{weights_dir}/item_laspeyres_indices.xlsx', index=False)

print("✓ Index simulation completed and saved!")
print(f"\nFiles saved to '{weights_dir}' directory:")
print(f"  1. all_india_laspeyres_index.xlsx - All India Index (2024=100)")
print(f"  2. division_laspeyres_indices.xlsx - Division-level Indices")
print(f"  3. item_laspeyres_indices.xlsx - Item-level Price Relatives")
print(f"\nPeriod: {months[0].strftime('%B %Y')} to {months[-1].strftime('%B %Y')}")
print(f"Base Year: 2024 (Index = 100)")
print(f"Methodology: Laspeyres Index with 2024 weights")


✓ Index simulation completed and saved!

Files saved to 'weights' directory:
  1. all_india_laspeyres_index.xlsx - All India Index (2024=100)
  2. division_laspeyres_indices.xlsx - Division-level Indices
  3. item_laspeyres_indices.xlsx - Item-level Price Relatives

Period: January 2024 to December 2025
Base Year: 2024 (Index = 100)
Methodology: Laspeyres Index with 2024 weights


In [144]:
# ===== CREATE JSON CONFIGURATION FOR CPI VARIANTS =====
import json

# Create hierarchical structure JSON from our data
# This will have Division -> Group -> Class -> Items with Include/Exclude toggles

hierarchy_json = {
    "metadata": {
        "base_year": 2024,
        "total_items": len(item_grouped),
        "total_weight": float(item_grouped['Aggregated_Index'].sum()),
        "last_updated": pandas.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S")
    },
    "divisions": []
}

# Build the hierarchy - normalize all codes to strings for consistent lookup
for _, div_row in division_grouped.iterrows():
    division_code = str(div_row['Division_Code']).strip()
    division_name = div_row['Division_Name']
    division_weight = float(div_row['Aggregated_Index'])
    
    # Get all groups in this division - normalize comparison
    groups_in_div = item_weights_df[
        item_weights_df['Division_Code'].astype(str) == division_code
    ]['Group_Code'].unique()
    
    division_obj = {
        "Division_Code": division_code,
        "Division_Name": division_name,
        "Weight": division_weight,
        "Include": True,  # Default: include all
        "Groups": []
    }
    
    # Add groups
    for group_code in groups_in_div:
        group_code = str(group_code).strip()
        group_data = group_grouped[
            group_grouped['Group_Code'].astype(str) == group_code
        ]
        
        if len(group_data) == 0:
            continue
            
        group_data = group_data.iloc[0]
        group_name = group_data['Group_Name']
        group_weight = float(group_data['Aggregated_Index'])
        
        # Get all classes in this group
        classes_in_group = item_weights_df[
            item_weights_df['Group_Code'].astype(str) == group_code
        ]['Class_Code'].unique()
        
        group_obj = {
            "Group_Code": group_code,
            "Group_Name": group_name,
            "Weight": group_weight,
            "Include": True,
            "Classes": []
        }
        
        # Add classes
        for class_code in classes_in_group:
            class_code = str(class_code).strip()
            class_data = class_grouped[
                class_grouped['Class_Code'].astype(str) == class_code
            ]
            
            if len(class_data) == 0:
                continue
                
            class_data = class_data.iloc[0]
            class_name = class_data['Class_Name']
            class_weight = float(class_data['Aggregated_Index'])
            
            # Get items in this class
            items_in_class = item_weights_df[
                item_weights_df['Class_Code'].astype(str) == class_code
            ]['Item_Code'].unique()
            
            class_obj = {
                "Class_Code": class_code,
                "Class_Name": class_name,
                "Weight": class_weight,
                "Include": True,
                "Item_Count": len(items_in_class)
            }
            group_obj["Classes"].append(class_obj)
        
        division_obj["Groups"].append(group_obj)
    
    hierarchy_json["divisions"].append(division_obj)

# Save the hierarchy JSON
hierarchy_json_path = f'{weights_dir}/cpi_hierarchy.json'
with open(hierarchy_json_path, 'w') as f:
    json.dump(hierarchy_json, f, indent=2)

print(f"✓ Hierarchy JSON created: {hierarchy_json_path}")
print(f"\nStructure:")
print(f"  - Divisions: {len(hierarchy_json['divisions'])}")
print(f"  - Total Weight: {hierarchy_json['metadata']['total_weight']:.2f}")

# Print structure of first division
first_div = hierarchy_json['divisions'][0]
print(f"\nSample: {first_div['Division_Name']}")
print(f"  Groups: {len(first_div['Groups'])}")
for grp in first_div['Groups'][:2]:
    print(f"    - {grp['Group_Name']} ({len(grp['Classes'])} classes)")


✓ Hierarchy JSON created: weights/cpi_hierarchy.json

Structure:
  - Divisions: 12
  - Total Weight: 100.00

Sample: Food and beverages
  Groups: 3
    - Food (9 classes)
    - Beverages (7 classes)


In [145]:
# ===== FUNCTIONS TO CALCULATE CPI VARIANTS FROM JSON CONFIG =====

def get_selected_item_codes(hierarchy_config):
    """
    Extract item codes from hierarchy based on Include flags
    hierarchy_config: Dict with divisions/groups/classes and their Include flags
    Returns: List of Item_Codes to include
    """
    selected_items = set()
    
    for division in hierarchy_config['divisions']:
        if not division['Include']:
            continue
        
        div_code = str(division['Division_Code']).strip()
        
        for group in division['Groups']:
            if not group['Include']:
                continue
            
            group_code = str(group['Group_Code']).strip()
            
            for class_item in group['Classes']:
                if not class_item['Include']:
                    continue
                
                class_code = str(class_item['Class_Code']).strip()
                
                # Get items for this class - normalize comparison
                items = item_weights_df[
                    (item_weights_df['Division_Code'].astype(str) == div_code) &
                    (item_weights_df['Group_Code'].astype(str) == group_code) &
                    (item_weights_df['Class_Code'].astype(str) == class_code)
                ]['Item_Code'].unique()
                
                selected_items.update(items)
    
    return list(selected_items)

def calculate_cpi_index(item_codes, price_relatives_df, variant_name="CPI"):
    """
    Calculate Laspeyres index for selected items
    item_codes: List of item codes to include
    price_relatives_df: DataFrame with price relatives and weights
    Returns: Dict with index values for each month
    """
    # Filter price relatives to selected items
    selected_data = price_relatives_df[price_relatives_df['Item_Code'].isin(item_codes)].copy()
    
    if len(selected_data) == 0:
        print(f"Warning: No items found for {variant_name}")
        return None
    
    results = {
        'Variant': variant_name,
        'Items_Count': len(selected_data),
        'Total_Weight': float(selected_data['Weight'].sum()),
        'Monthly_Data': []
    }
    
    # Calculate index for each month
    for month in months:
        month_str = month.strftime("%Y-%m")
        col_name = f'Price_Relative_{month_str}'
        
        if col_name not in selected_data.columns:
            continue
        
        weighted_sum = (selected_data[col_name] * selected_data['Weight']).sum()
        total_weight = selected_data['Weight'].sum()
        index_value = weighted_sum / total_weight if total_weight > 0 else 100
        
        results['Monthly_Data'].append({
            'Month': month_str,
            'Index': float(index_value)
        })
    
    return results

def create_variant_comparison(variants_dict):
    """
    Create a comparison DataFrame from multiple CPI variants
    variants_dict: Dict of {variant_name: variant_results}
    Returns: DataFrame with all variants for comparison
    """
    comparison_data = {}
    
    for variant_name, variant_result in variants_dict.items():
        if variant_result is None:
            continue
        
        indices = [m['Index'] for m in variant_result['Monthly_Data']]
        comparison_data[variant_name] = indices
    
    # Add month column
    month_strs = [m.strftime("%Y-%m") for m in months]
    comparison_df = pandas.DataFrame({
        'Month': month_strs,
        **comparison_data
    })
    
    return comparison_df

print("✓ CPI calculation functions created:")
print("  - get_selected_item_codes()")
print("  - calculate_cpi_index()")
print("  - create_variant_comparison()")


✓ CPI calculation functions created:
  - get_selected_item_codes()
  - calculate_cpi_index()
  - create_variant_comparison()


In [146]:
# ===== CREATE AND TEST CPI VARIANTS FROM HEADLINE CPI =====
# Strategy: Start with Headline CPI, then create variants by excluding divisions

# Load the base hierarchy
with open(hierarchy_json_path, 'r') as f:
    base_hierarchy = json.load(f)

# Print available divisions for reference
print("=" * 80)
print("AVAILABLE DIVISIONS (for exclusion)")
print("=" * 80)
for i, div in enumerate(base_hierarchy['divisions']):
    print(f"{i+1}. {div['Division_Name']:50s} | Weight: {div['Weight']:6.2f}%")

print("\n" + "=" * 80)
print("CREATING CPI VARIANTS FROM HEADLINE")
print("=" * 80)

# Variant 1: Headline CPI (all divisions included)
headline_config = json.loads(json.dumps(base_hierarchy))  # Deep copy with all Include=True

# Variant 2: Core CPI - Exclude Food and Paan/tobacco
core_config = json.loads(json.dumps(base_hierarchy))
excluded_divs_core = ['Food and beverages', 'Paan, tobacco and intoxicants']
for division in core_config['divisions']:
    if division['Division_Name'] in excluded_divs_core:
        division['Include'] = False

# Variant 3: Ex-Food CPI
ex_food_config = json.loads(json.dumps(base_hierarchy))
for division in ex_food_config['divisions']:
    if division['Division_Name'] == 'Food and beverages':
        division['Include'] = False

# Variant 4: Ex-Housing CPI
ex_housing_config = json.loads(json.dumps(base_hierarchy))
for division in ex_housing_config['divisions']:
    if 'Housing' in division['Division_Name'] or 'electricity' in division['Division_Name'] or 'gas' in division['Division_Name']:
        division['Include'] = False

# Variant 5: Ex-Transport CPI
ex_transport_config = json.loads(json.dumps(base_hierarchy))
for division in ex_transport_config['divisions']:
    if division['Division_Name'] == 'Transport':
        division['Include'] = False

# Calculate indices for all variants
cpi_variants = {}
variant_metadata = {}

variants_to_calculate = {
    'Headline CPI': headline_config,
    'Core CPI (ex-Food & Tobacco)': core_config,
    'CPI ex-Food': ex_food_config,
    'CPI ex-Housing': ex_housing_config,
    'CPI ex-Transport': ex_transport_config
}

for variant_name, config in variants_to_calculate.items():
    selected_items = get_selected_item_codes(config)
    result = calculate_cpi_index(selected_items, price_relatives, variant_name)
    cpi_variants[variant_name] = result
    
    if result:
        # Calculate weight contribution
        included_weight = result['Total_Weight']
        weight_percent = (included_weight / 100) * 100  # % of original total weight
        
        variant_metadata[variant_name] = {
            'Items': result['Items_Count'],
            'Weight': included_weight,
            'Weight_Percent': weight_percent
        }
        
        print(f"\n{variant_name}:")
        print(f"  Items included: {result['Items_Count']}")
        print(f"  Total weight: {result['Total_Weight']:.2f} (out of 100)")
        print(f"  Coverage: {weight_percent:.1f}%")
        if result['Monthly_Data']:
            print(f"  Latest Index: {result['Monthly_Data'][-1]['Index']:.2f}")

# Create comparison DataFrame
comparison_df = create_variant_comparison(cpi_variants)

print("\n" + "=" * 80)
print("CPI VARIANTS COMPARISON (Laspeyres Index, Base 2024=100)")
print("=" * 80)
print(comparison_df.to_string(index=False))

# Save comparison to Excel
comparison_df.to_excel(f'{weights_dir}/cpi_variants_comparison.xlsx', index=False)

# Also save metadata
metadata_df = pandas.DataFrame(variant_metadata).T
metadata_df.to_excel(f'{weights_dir}/cpi_variants_metadata.xlsx')

print(f"\n✓ Comparison saved to: {weights_dir}/cpi_variants_comparison.xlsx")
print(f"✓ Metadata saved to: {weights_dir}/cpi_variants_metadata.xlsx")


AVAILABLE DIVISIONS (for exclusion)
1. Food and beverages                                 | Weight:  36.75%
2. Paan, tobacco and intoxicants                      | Weight:   2.99%
3. Clothing and footwear                              | Weight:   6.38%
4. Housing, water, electricity, gas and other fuels   | Weight:  17.66%
5. Furnishings, household equipment and routine household maintenance | Weight:   4.47%
6. Health                                             | Weight:   6.10%
7. Transport                                          | Weight:   8.80%
8. Information and communication                      | Weight:   3.61%
9. Recreation, sport and culture                      | Weight:   1.52%
10. Education services                                 | Weight:   3.33%
11. Restaurants and accommodation services             | Weight:   3.35%
12. Personal care, social protection and miscellaneous goods and services | Weight:   5.04%

CREATING CPI VARIANTS FROM HEADLINE

Headline CPI:
  Items in

In [147]:
# ===== ENHANCED: ITEM-LEVEL EXCLUSION CONFIGURATION =====
# This creates a comprehensive JSON config that allows exclusion at ALL levels:
# Division -> Group -> Class -> Item

# Build item-level hierarchy JSON
item_hierarchy_json = {
    "metadata": {
        "base_year": 2024,
        "total_items": len(item_grouped),
        "total_weight": float(item_grouped['Aggregated_Index'].sum()),
        "last_updated": pandas.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"),
        "description": "Hierarchical CPI structure with Include/Exclude at each level"
    },
    "divisions": []
}

# Build the complete hierarchy with items
for _, div_row in division_grouped.iterrows():
    division_code = str(div_row['Division_Code']).strip()
    division_name = div_row['Division_Name']
    division_weight = float(div_row['Aggregated_Index'])
    
    groups_in_div = item_weights_df[
        item_weights_df['Division_Code'].astype(str) == division_code
    ]['Group_Code'].unique()
    
    division_obj = {
        "Division_Code": division_code,
        "Division_Name": division_name,
        "Weight": division_weight,
        "Include": True,
        "Groups": []
    }
    
    for group_code in groups_in_div:
        group_code = str(group_code).strip()
        group_data = group_grouped[
            group_grouped['Group_Code'].astype(str) == group_code
        ]
        
        if len(group_data) == 0:
            continue
            
        group_data = group_data.iloc[0]
        group_name = group_data['Group_Name']
        group_weight = float(group_data['Aggregated_Index'])
        
        classes_in_group = item_weights_df[
            item_weights_df['Group_Code'].astype(str) == group_code
        ]['Class_Code'].unique()
        
        group_obj = {
            "Group_Code": group_code,
            "Group_Name": group_name,
            "Weight": group_weight,
            "Include": True,
            "Classes": []
        }
        
        for class_code in classes_in_group:
            class_code = str(class_code).strip()
            class_data = class_grouped[
                class_grouped['Class_Code'].astype(str) == class_code
            ]
            
            if len(class_data) == 0:
                continue
                
            class_data = class_data.iloc[0]
            class_name = class_data['Class_Name']
            class_weight = float(class_data['Aggregated_Index'])
            
            # Get items in this class
            items_in_class = item_weights_df[
                item_weights_df['Class_Code'].astype(str) == class_code
            ]
            
            class_obj = {
                "Class_Code": class_code,
                "Class_Name": class_name,
                "Weight": class_weight,
                "Include": True,
                "Items": []
            }
            
            # Add items
            for _, item_row in items_in_class.iterrows():
                item_code = str(item_row['Item_Code']).strip()
                item_name = item_row['Item_Name']
                item_weight = float(item_row['Share_in_All_India'])
                
                item_obj = {
                    "Item_Code": item_code,
                    "Item_Name": item_name,
                    "Weight": item_weight,
                    "Include": True
                }
                class_obj["Items"].append(item_obj)
            
            group_obj["Classes"].append(class_obj)
        
        division_obj["Groups"].append(group_obj)
    
    item_hierarchy_json["divisions"].append(division_obj)

# Save the enhanced hierarchy JSON
item_hierarchy_json_path = f'{weights_dir}/cpi_item_hierarchy.json'
with open(item_hierarchy_json_path, 'w') as f:
    json.dump(item_hierarchy_json, f, indent=2)

print(f"✓ Item-level hierarchy JSON created: {item_hierarchy_json_path}")
print(f"\nStructure Summary:")
print(f"  - Divisions: {len(item_hierarchy_json['divisions'])}")
print(f"  - Total Items: {sum(len(c['Items']) for d in item_hierarchy_json['divisions'] for g in d['Groups'] for c in g['Classes'])}")
print(f"  - Total Weight: {item_hierarchy_json['metadata']['total_weight']:.2f}")

# Show sample structure
first_div = item_hierarchy_json['divisions'][0]
first_group = first_div['Groups'][0] if first_div['Groups'] else None
if first_group:
    first_class = first_group['Classes'][0] if first_group['Classes'] else None
    if first_class:
        print(f"\nSample Structure:")
        print(f"  Division: {first_div['Division_Name']}")
        print(f"  └─ Group: {first_group['Group_Name']}")
        print(f"     └─ Class: {first_class['Class_Name']}")
        print(f"        └─ Items: {len(first_class['Items'])} items")
        if first_class['Items']:
            print(f"           Sample: {first_class['Items'][0]['Item_Name']} (Weight: {first_class['Items'][0]['Weight']:.4f})")


✓ Item-level hierarchy JSON created: weights/cpi_item_hierarchy.json

Structure Summary:
  - Divisions: 12
  - Total Items: 23208
  - Total Weight: 100.00

Sample Structure:
  Division: Food and beverages
  └─ Group: Food
     └─ Class: Cereals and cereal products (ND)
        └─ Items: 1580 items
           Sample: Rice (Weight: 0.0212)


In [148]:
# ===== ENHANCED: ITEM-LEVEL CPI CALCULATION =====

def get_selected_items_enhanced(hierarchy_config):
    """
    Extract item codes from hierarchy based on Include flags at ALL levels
    Hierarchy_config: Dict with divisions/groups/classes/items and their Include flags
    Returns: List of Item_Codes to include and their weights
    """
    selected_items = []
    
    for division in hierarchy_config['divisions']:
        if not division.get('Include', True):
            continue
        
        for group in division.get('Groups', []):
            if not group.get('Include', True):
                continue
            
            for class_item in group.get('Classes', []):
                if not class_item.get('Include', True):
                    continue
                
                # Item-level exclusion
                for item in class_item.get('Items', []):
                    if item.get('Include', True):
                        selected_items.append({
                            'Item_Code': item['Item_Code'],
                            'Item_Name': item['Item_Name'],
                            'Weight': item['Weight'],
                            'Division': division['Division_Name'],
                            'Group': group['Group_Name'],
                            'Class': class_item['Class_Name']
                        })
    
    return selected_items

def calculate_cpi_index_enhanced(selected_items_list, price_relatives_df, variant_name="CPI"):
    """
    Calculate Laspeyres index for selected items (with item-level granularity)
    selected_items_list: List of dicts with Item_Code and Weight
    price_relatives_df: DataFrame with price relatives and weights
    Returns: Dict with index values for each month
    """
    if len(selected_items_list) == 0:
        print(f"Warning: No items found for {variant_name}")
        return None
    
    # Get item codes from selection
    item_codes = [item['Item_Code'] for item in selected_items_list]
    
    # Filter price relatives to selected items
    selected_data = price_relatives_df[price_relatives_df['Item_Code'].isin(item_codes)].copy()
    
    if len(selected_data) == 0:
        print(f"Warning: No items found for {variant_name}")
        return None
    
    total_weight = selected_data['Weight'].sum()
    
    results = {
        'Variant': variant_name,
        'Items_Count': len(selected_data),
        'Total_Weight': float(total_weight),
        'Items_Excluded': len(price_relatives_df) - len(selected_data),
        'Monthly_Data': []
    }
    
    # Calculate index for each month
    for month in months:
        month_str = month.strftime("%Y-%m")
        col_name = f'Price_Relative_{month_str}'
        
        if col_name not in selected_data.columns:
            continue
        
        weighted_sum = (selected_data[col_name] * selected_data['Weight']).sum()
        index_value = weighted_sum / total_weight if total_weight > 0 else 100
        
        # Calculate MoM change
        if len(results['Monthly_Data']) > 0:
            prev_index = results['Monthly_Data'][-1]['Index']
            mom_change = ((index_value - prev_index) / prev_index) * 100
        else:
            mom_change = 0.0
        
        results['Monthly_Data'].append({
            'Month': month_str,
            'Index': float(index_value),
            'MoM_Change_%': float(mom_change)
        })
    
    return results

# Test the enhanced functions
print("✓ Enhanced CPI calculation functions created:")
print("  - get_selected_items_enhanced()")
print("  - calculate_cpi_index_enhanced()")
print("\nThese functions support exclusion at ALL hierarchy levels:")
print("  • Division level")
print("  • Group level")
print("  • Class level")
print("  • Item level (NEW!)")


✓ Enhanced CPI calculation functions created:
  - get_selected_items_enhanced()
  - calculate_cpi_index_enhanced()

These functions support exclusion at ALL hierarchy levels:
  • Division level
  • Group level
  • Class level
  • Item level (NEW!)


# Data Templates for Real CPI Data

## Template 1: Weights Data Format (Replace CPI_2024_Weights.xlsx)

Your Excel file should have the following structure with 3 sheets:

### Sheet 5.3d: Item-Level Weights
| Item_Code | Item_Name | Subclass_Code | Subclass_Name | Class_Code | Class_Name | Group_Code | Group_Name | Division_Code | Division_Name | State | Share_in_All_India |
|-----------|-----------|---------------|---------------|-----------|-----------|-----------|-----------|---------------|---------------|-------|-------------------|
| 01.1.1.1 | Rice | 01.1.1 | Cereals | 01.1 | Food | 01 | Food Group | 01 | Food and beverages | All-India | 2.45 |
| 01.1.1.2 | Wheat | 01.1.1 | Cereals | 01.1 | Food | 01 | Food Group | 01 | Food and beverages | All-India | 1.23 |

**Key Requirements:**
- Codes should be hierarchical (e.g., 01, 01.1, 01.1.1, 01.1.1.1)
- Share_in_All_India should sum to 100 across all items
- State column can be individual states or "All-India"

### Sheet 5.3c: State-Wise Group Weights
| Division_Code | Division_Name | Group_Code | Group_Name | State | Share |
|---------------|---------------|-----------|-----------|-------|--------|
| 01 | Food and beverages | 01 | Food Group | Tamil Nadu | 2.10 |

### Sheet 5.3b: Group-Level Weights (Optional)
| Division_Code | Division_Name | Group_Code | Group_Name | Share |
|---------------|---------------|-----------|-----------|--------|
| 01 | Food and beverages | 01 | Food Group | 25.30 |

---

## Template 2: Price Data Format

Create price data in one of these formats:

### Option A: Monthly Price Relatives (Recommended)
File: `price_relatives_data.xlsx`

| Item_Code | Item_Name | 2024-01 | 2024-02 | 2024-03 | ... | 2025-12 |
|-----------|-----------|---------|---------|---------|-----|---------|
| 01.1.1.1 | Rice | 100.00 | 100.45 | 100.82 | ... | 115.32 |
| 01.1.1.2 | Wheat | 100.00 | 100.23 | 100.58 | ... | 112.45 |

Where each cell contains the price relative (Current Price / Base Year Price) × 100

### Option B: Absolute Prices
File: `item_prices.xlsx`

| Item_Code | Item_Name | Date | Price | Unit |
|-----------|-----------|------|-------|------|
| 01.1.1.1 | Rice | 2024-01-31 | 50.00 | Per KG |
| 01.1.1.1 | Rice | 2024-02-29 | 50.23 | Per KG |

The system will calculate price relatives automatically

### Option C: Inflation Rates (Simplest)
File: `inflation_rates.xlsx`

| Item_Code | Item_Name | Monthly_Inflation_% | Notes |
|-----------|-----------|---------------------|-------|
| 01.1.1.1 | Rice | 0.45 | Average monthly inflation |
| 01.1.1.2 | Wheat | 0.23 | Average monthly inflation |

The system will compound these to create price relatives

---

## Template 3: Real Data Loading Script

Once you have real data, use this code:


In [149]:
# REAL DATA LOADING - Option A: Monthly Price Relatives

def load_real_data_option_a(weights_file, prices_file):
    """
    Load real weights and price relatives data
    
    Args:
        weights_file: Path to weights Excel file (with sheets 5.3d, 5.3c, 5.3b)
        prices_file: Path to price relatives file
    """
    print("Loading real data - Option A (Price Relatives)...")
    
    # Load weights
    weights_df = pandas.read_excel(weights_file, sheet_name='5.3d', header=3)
    weights_df.columns = weights_df.columns.str.strip().str.replace('*', '', regex=False).str.replace(' ', '_')
    
    # Load price relatives
    price_relatives_df = pandas.read_excel(prices_file)
    price_relatives_df['Item_Code'] = price_relatives_df['Item_Code'].astype(str).str.strip()
    
    # Aggregate weights
    item_grouped = weights_df.groupby('Item_Code').agg({
        'Item_Name': 'first',
        'Share_in_All_India': 'sum',
        'State': lambda x: x.unique().tolist()
    }).reset_index()
    item_grouped.columns = ['Item_Code', 'Item_Name', 'Weight', 'States']
    
    # Match weights with price relatives
    price_relatives_df = price_relatives_df.merge(
        item_grouped[['Item_Code', 'Weight']],
        on='Item_Code',
        how='inner'
    )
    
    print(f"✓ Loaded {len(price_relatives_df)} items")
    print(f"✓ Total weight: {price_relatives_df['Weight'].sum():.2f}")
    print(f"✓ Date range: {price_relatives_df.columns[2]} to {price_relatives_df.columns[-2]}")
    
    return price_relatives_df, weights_df

# REAL DATA LOADING - Option B: Absolute Prices

def load_real_data_option_b(weights_file, prices_file):
    """
    Load real weights and absolute price data, convert to price relatives
    
    Args:
        weights_file: Path to weights Excel file
        prices_file: Path to absolute prices file with columns: Item_Code, Item_Name, Date, Price, Unit
    """
    print("Loading real data - Option B (Absolute Prices)...")
    
    # Load weights
    weights_df = pandas.read_excel(weights_file, sheet_name='5.3d', header=3)
    weights_df.columns = weights_df.columns.str.strip().str.replace('*', '', regex=False).str.replace(' ', '_')
    
    # Load prices
    prices_df = pandas.read_excel(prices_file)
    prices_df['Date'] = pandas.to_datetime(prices_df['Date'])
    prices_df['Item_Code'] = prices_df['Item_Code'].astype(str).str.strip()
    
    # Calculate base price (first date) for each item
    base_prices = prices_df.groupby('Item_Code')['Price'].first().reset_index()
    base_prices.columns = ['Item_Code', 'Base_Price']
    
    # Merge and calculate price relatives
    prices_df = prices_df.merge(base_prices, on='Item_Code')
    prices_df['Price_Relative'] = (prices_df['Price'] / prices_df['Base_Price']) * 100
    
    # Pivot to get monthly columns
    prices_df['Month'] = prices_df['Date'].dt.strftime('%Y-%m')
    price_relatives_df = prices_df.pivot_table(
        index='Item_Code',
        columns='Month',
        values='Price_Relative',
        aggfunc='first'
    ).reset_index()
    
    # Add item names and weights
    item_grouped = weights_df.groupby('Item_Code').agg({
        'Item_Name': 'first',
        'Share_in_All_India': 'sum'
    }).reset_index()
    item_grouped.columns = ['Item_Code', 'Item_Name', 'Weight']
    
    price_relatives_df = price_relatives_df.merge(
        item_grouped,
        on='Item_Code',
        how='inner'
    )
    
    print(f"✓ Loaded {len(price_relatives_df)} items")
    print(f"✓ Total weight: {price_relatives_df['Weight'].sum():.2f}")
    
    return price_relatives_df, weights_df

# REAL DATA LOADING - Option C: Inflation Rates

def load_real_data_option_c(weights_file, inflation_file, num_periods=24):
    """
    Load weights and inflation rates, generate price relatives
    
    Args:
        weights_file: Path to weights Excel file
        inflation_file: Path to inflation rates file (Item_Code, Inflation_%)
        num_periods: Number of months to simulate (default 24)
    """
    print("Loading real data - Option C (Inflation Rates)...")
    
    # Load weights
    weights_df = pandas.read_excel(weights_file, sheet_name='5.3d', header=3)
    weights_df.columns = weights_df.columns.str.strip().str.replace('*', '', regex=False).str.replace(' ', '_')
    
    # Load inflation rates
    inflation_df = pandas.read_excel(inflation_file)
    inflation_df['Item_Code'] = inflation_df['Item_Code'].astype(str).str.strip()
    
    # Aggregate weights
    item_grouped = weights_df.groupby('Item_Code').agg({
        'Item_Name': 'first',
        'Share_in_All_India': 'sum'
    }).reset_index()
    item_grouped.columns = ['Item_Code', 'Item_Name', 'Weight']
    
    # Merge with inflation
    item_grouped = item_grouped.merge(inflation_df, on='Item_Code', how='inner')
    
    # Generate price relatives by compounding inflation
    months_gen = pandas.date_range(start='2024-01', periods=num_periods, freq='M')
    
    price_relatives_df = pandas.DataFrame({
        'Item_Code': item_grouped['Item_Code'],
        'Item_Name': item_grouped['Item_Name'],
        'Weight': item_grouped['Weight']
    })
    
    for i, month in enumerate(months_gen):
        month_str = month.strftime("%Y-%m")
        cumulative_inflation = (1 + item_grouped['Monthly_Inflation_%'] / 100) ** (i + 1)
        price_relatives_df[month_str] = cumulative_inflation * 100
    
    print(f"✓ Loaded {len(price_relatives_df)} items")
    print(f"✓ Total weight: {price_relatives_df['Weight'].sum():.2f}")
    print(f"✓ Generated {num_periods} months of price relatives")
    
    return price_relatives_df, weights_df

print("✓ Real data loading functions created:")
print("  - load_real_data_option_a() - For pre-calculated price relatives")
print("  - load_real_data_option_b() - For absolute prices")
print("  - load_real_data_option_c() - For inflation rates")


✓ Real data loading functions created:
  - load_real_data_option_a() - For pre-calculated price relatives
  - load_real_data_option_b() - For absolute prices
  - load_real_data_option_c() - For inflation rates


In [150]:
# ===== EXAMPLE VARIANTS WITH ITEM-LEVEL EXCLUSIONS =====

# Load the item-level hierarchy
with open(item_hierarchy_json_path, 'r') as f:
    base_item_hierarchy = json.load(f)

print("=" * 80)
print("CREATING CPI VARIANTS WITH ITEM-LEVEL CONTROL")
print("=" * 80)

# Variant 1: Headline CPI (all items included)
headline_item_config = json.loads(json.dumps(base_item_hierarchy))

# Variant 2: Core CPI (ex-Food and Paan/Tobacco)
core_item_config = json.loads(json.dumps(base_item_hierarchy))
for division in core_item_config['divisions']:
    if division['Division_Name'] in ['Food and beverages', 'Paan, tobacco and intoxicants']:
        division['Include'] = False

# Variant 3: Custom - Exclude specific items
# Example: Exclude seasonal vegetables, petroleum products
custom_config = json.loads(json.dumps(base_item_hierarchy))
excluded_item_patterns = ['Seasonal vegetables', 'Petrol', 'Diesel']

for division in custom_config['divisions']:
    for group in division['Groups']:
        for class_item in group['Classes']:
            for item in class_item['Items']:
                for pattern in excluded_item_patterns:
                    if pattern.lower() in item['Item_Name'].lower():
                        item['Include'] = False
                        print(f"  Excluded: {item['Item_Name']}")

# Calculate variants
cpi_item_variants = {}

variants_item_to_calc = {
    'Headline CPI': headline_item_config,
    'Core CPI (ex-Food & Tobacco)': core_item_config,
    'Custom CPI (ex-Seasonal Items & Fuel)': custom_config
}

for variant_name, config in variants_item_to_calc.items():
    selected_items = get_selected_items_enhanced(config)
    result = calculate_cpi_index_enhanced(selected_items, price_relatives, variant_name)
    cpi_item_variants[variant_name] = result
    
    if result:
        print(f"\n{variant_name}:")
        print(f"  Items included: {result['Items_Count']}")
        print(f"  Items excluded: {result['Items_Excluded']}")
        print(f"  Total weight: {result['Total_Weight']:.2f}")
        if result['Monthly_Data']:
            print(f"  Latest Index: {result['Monthly_Data'][-1]['Index']:.2f}")
            print(f"  MoM Change: {result['Monthly_Data'][-1]['MoM_Change_%']:.4f}%")

# Create comparison with MoM changes
comparison_data = {'Month': []}
for month in months:
    comparison_data['Month'].append(month.strftime("%Y-%m"))

for variant_name, result in cpi_item_variants.items():
    if result:
        comparison_data[f'{variant_name}'] = [m['Index'] for m in result['Monthly_Data']]
        comparison_data[f'{variant_name} MoM%'] = [m['MoM_Change_%'] for m in result['Monthly_Data']]

comparison_enhanced_df = pandas.DataFrame(comparison_data)

print("\n" + "=" * 80)
print("CPI VARIANTS WITH MOM CHANGES")
print("=" * 80)
print(comparison_enhanced_df.to_string(index=False))

# Save enhanced comparison
comparison_enhanced_df.to_excel(f'{weights_dir}/cpi_variants_with_mom.xlsx', index=False)
print(f"\n✓ Comparison saved to: {weights_dir}/cpi_variants_with_mom.xlsx")


CREATING CPI VARIANTS WITH ITEM-LEVEL CONTROL
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Excluded: Diesel
  Excluded: Petrol
  Ex