# Setup and Data

In [None]:
from inflation_analysis import calculate_price_indexes
from tabulate import tabulate
import matplotlib.pyplot as plt

# Parameters
start_year = 2019
end_year = 2022

data_folder="/Users/roykisluk/Downloads/Consumer_Expenditure_Survey/"
base_year = start_year
years=range(start_year, end_year+1)

combined_df, combined_secondary_df, combined_primary_df, yearly_price_index = calculate_price_indexes(start_year, end_year, base_year, cex_data_folder=data_folder)

# Output

## Yearly Price Indexes

In [None]:
# Display yearly price index per year in tabulate
print("Yearly Price Index:")
print(tabulate(yearly_price_index.items(), headers=["Year", "Price Index"], tablefmt="grid"))

## Top Increases and Decreases

In [None]:
# Plot top and bottom price indexes for each year
df_top_secondary = {}
df_bottom_secondary = {}
df_top_primary = {}
df_bottom_primary = {}
for year in years:
    df_top_secondary[year] = combined_secondary_df[combined_secondary_df['Year'] == year].nlargest(top_n, 'price_index')
    df_bottom_secondary[year] = combined_secondary_df[combined_secondary_df['Year'] == year].nsmallest(top_n, 'price_index')
    df_top_primary[year] = combined_primary_df[combined_primary_df['Year'] == year].nlargest(top_n, 'price_index')
    df_bottom_primary[year] = combined_primary_df[combined_primary_df['Year'] == year].nsmallest(top_n, 'price_index')
    if year != base_year:
        print(f"Year: {year}")
        print("Top price indexes (Secondary):")
        print(tabulate(df_top_secondary[year], headers='keys', tablefmt='psql'))
        print("Bottom price indexes (Secondary):")
        print(tabulate(df_bottom_secondary[year], headers='keys', tablefmt='psql'))
        print("Top price indexes (Primary):")
        print(tabulate(df_top_primary[year], headers='keys', tablefmt='psql'))
        print("Bottom price indexes (Primary):")
        print(tabulate(df_bottom_primary[year], headers='keys', tablefmt='psql'))

In [None]:
# Extract prodcodes for the last year
last_year = end_year

top_secondary_prodcodes = df_top_secondary[last_year]['prodcode'].unique()
bottom_secondary_prodcodes = df_bottom_secondary[last_year]['prodcode'].unique()
top_primary_prodcodes = df_top_primary[last_year]['prodcode'].unique()
bottom_primary_prodcodes = df_bottom_primary[last_year]['prodcode'].unique()

# Plot top secondary categories price indexes over time
plt.figure(figsize=(12, 8))
filtered_top_secondary_df = combined_secondary_df[combined_secondary_df['prodcode'].isin(top_secondary_prodcodes)]
for prodcode in filtered_top_secondary_df['prodcode'].unique():
    df_prodcode = filtered_top_secondary_df[filtered_top_secondary_df['prodcode'] == prodcode]
    description = df_prodcode['description'].iloc[0] if not df_prodcode['description'].isna().all() else f'Prodcode {prodcode}'
    plt.plot(df_prodcode['Year'], df_prodcode['price_index'], label=description)
plt.xlabel('Year')
plt.ylabel('Price Index')
plt.title('Top Secondary Categories Price Indexes Over Time')
plt.legend()
plt.grid(True)
plt.show()

# Plot bottom secondary categories price indexes over time
plt.figure(figsize=(12, 8))
filtered_bottom_secondary_df = combined_secondary_df[combined_secondary_df['prodcode'].isin(bottom_secondary_prodcodes)]
for prodcode in filtered_bottom_secondary_df['prodcode'].unique():
    df_prodcode = filtered_bottom_secondary_df[filtered_bottom_secondary_df['prodcode'] == prodcode]
    description = df_prodcode['description'].iloc[0] if not df_prodcode['description'].isna().all() else f'Prodcode {prodcode}'
    plt.plot(df_prodcode['Year'], df_prodcode['price_index'], label=description)
plt.xlabel('Year')
plt.ylabel('Price Index')
plt.title('Bottom Secondary Categories Price Indexes Over Time')
plt.legend()
plt.grid(True)
plt.show()

# Plot top primary categories price indexes over time
plt.figure(figsize=(12, 8))
filtered_top_primary_df = combined_primary_df[combined_primary_df['prodcode'].isin(top_primary_prodcodes)]
for prodcode in filtered_top_primary_df['prodcode'].unique():
    df_prodcode = filtered_top_primary_df[filtered_top_primary_df['prodcode'] == prodcode]
    description = df_prodcode['description'].iloc[0] if not df_prodcode['description'].isna().all() else f'Prodcode {prodcode}'
    plt.plot(df_prodcode['Year'], df_prodcode['price_index'], label=description)
plt.xlabel('Year')
plt.ylabel('Price Index')
plt.title('Top Primary Categories Price Indexes Over Time')
plt.legend()
plt.grid(True)
plt.show()