# 02 Exploratory Data Analysis

This notebook generates descriptive statistics and plots to explore the merged panel dataset.


In [None]:

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load merged dataset
merged = pd.read_csv('../data/merged_panel.csv', parse_dates=['date'])

# Summary statistics
display(merged.describe(include='all'))

# Check missing values
plt.figure(figsize=(10,6))
sns.heatmap(merged.isnull(), cbar=False)
plt.title('Missing Values Heatmap')
plt.show()

# Plot trends of bond yield and ND-GAIN over time for a sample of countries
sample_countries = merged['iso3c'].unique()[:5]  # first 5 countries for example
fig, axes = plt.subplots(len(sample_countries), 1, figsize=(10, 3*len(sample_countries)), sharex=True)

for ax, country in zip(axes, sample_countries):
    data = merged[merged['iso3c'] == country]
    ax.plot(data['date'], data['bond_yield'], label='Bond Yield')
    ax.set_ylabel('Yield')
    ax2 = ax.twinx()
    ax2.plot(data['date'], data['nd_gain'], color='orange', label='ND-GAIN')
    ax2.set_ylabel('ND-GAIN')
    ax.set_title(f'Trends for {country}')

plt.xlabel('Date')
plt.tight_layout()
plt.show()

# Correlation matrix
numeric_cols = merged.select_dtypes(include=['float64','int64']).columns
correlation = merged[numeric_cols].corr()
plt.figure(figsize=(12,8))
sns.heatmap(correlation, annot=False, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()
