In [5]:
#Inflation Data Analysis

# Data imports & config
!pip install wbgapi
import wbgapi as wb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

%matplotlib inline
sns.set(style="darkgrid")
plt.rcParams['figure.figsize'] = (12,6)

# create outputs folder
os.makedirs("../outputs", exist_ok=True)


<class 'OSError'>: Not available

In [None]:
# Fetch data from World Bank using wbgapi
indicator = 'FP.CPI.TOTL.ZG'   # Inflation (annual %)
countries = ['USA', 'CHN', 'PAK']   # World Bank country codes

# Fetch yearly data for 1990-2024 (or latest available)
df_wb = wb.data.DataFrame(indicator, countries, time=range(1990, 2025))
# dataframe comes as MultiIndex columns - simplify:
df_wb = df_wb.T
df_wb.columns = [c[1] for c in df_wb.columns]  # rename columns to country codes
df_wb.index = df_wb.index.astype(int)  # years as ints
df_wb = df_wb.sort_index()
df_wb.head()


In [None]:
#cleaning & preview
df = df_wb.copy()
# Rename columns to readable names
df.columns = ['United States', 'China', 'Pakistan']

# Check missing data
print("Shape:", df.shape)
print("Missing values by country:\n", df.isna().sum())

# Forward/backfill small gaps if desired (optional)
df = df.fillna(method='ffill').fillna(method='bfill')

df.tail()


In [None]:
# Line chart
ax = df.plot(title="Inflation (Annual %) — USA, China, Pakistan (1990–2024)")
ax.set_xlabel("Year")
ax.set_ylabel("Inflation (annual %)")
plt.tight_layout()
plt.savefig("../outputs/inflation_trends_line.png", dpi=150)
plt.show()

In [6]:
# Rolling mean & volatility
rolling_mean = df.rolling(window=3).mean()
rolling_std = df.rolling(window=3).std()

fig, ax = plt.subplots(2,1, figsize=(12,10), sharex=True)
rolling_mean.plot(ax=ax[0], title="3-Year Rolling Mean of Inflation")
ax[0].set_ylabel("Inflation (%)")

rolling_std.plot(ax=ax[1], title="3-Year Rolling Std Dev (Volatility) of Inflation")
ax[1].set_ylabel("Std Dev (%)")

plt.tight_layout()
plt.savefig("../outputs/rolling_stats.png", dpi=150)
plt.show()


<class 'NameError'>: name 'df' is not defined

In [None]:
# Average inflation bar chart
avg_inflation = df.mean()
ax = avg_inflation.sort_values(ascending=False).plot(kind='bar', title="Average Inflation (1990–2024)")
ax.set_ylabel("Average annual inflation (%)")
plt.tight_layout()
plt.savefig("../outputs/avg_inflation_bar.png", dpi=150)
plt.show()


In [None]:
# heatmap of year-to-year inflation
plt.figure(figsize=(10,12))
sns.heatmap(df.T, annot=False, cmap="coolwarm", cbar_kws={'label': 'Inflation (%)'})
plt.title("Inflation heatmap (country rows, year columns)")
plt.xlabel("Year")
plt.ylabel("Country")
plt.tight_layout()
plt.savefig("../outputs/inflation_heatmap.png", dpi=150)
plt.show()

In [None]:
# Annotate events
fig, ax = plt.subplots(figsize=(12,6))
df.plot(ax=ax)
ax.axvline(2008, color='gray', linestyle='--', alpha=0.6)
ax.text(2008+0.2, ax.get_ylim()[1]*0.9, '2008 Crisis', rotation=90)
ax.axvline(2020, color='gray', linestyle='--', alpha=0.6)
ax.text(2020+0.2, ax.get_ylim()[1]*0.9, '2020 COVID', rotation=90)
plt.title("Inflation trends with major events annotated")
plt.tight_layout()
plt.savefig("../outputs/inflation_events.png", dpi=150)
plt.show()


In [None]:
# Summary stats and export
summary = df.describe().T
summary['cv'] = summary['std'] / summary['mean']  # coefficient of variation
display(summary)

# Save summary and cleaned data to CSV
df.to_csv("../data/inflation_wb_1990_2024.csv")
summary.to_csv("../outputs/inflation_summary_stats.csv")
