# Exercise 1.5 | Panel Data (Wide Format)

In [None]:
# Imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# File Path
file_path = 'https://tayweid.github.io/econ-0150/parts/part-1-5/data/'

## Reshaping: Wide to Long and Back

Practice converting between wide and long formats using `melt()` and `pivot()`.

In [None]:
# Load wide format data
percap = pd.read_csv(file_path + 'Coffee_Per_Cap.csv', index_col=0)
percap[['Code', '1999', '2004', '2009', '2014', '2019']].head()

In [None]:
# Melt to long format
percap_long = percap.melt(
    id_vars=['Code'],
    var_name='Year',
    value_name='Consumption'
)
percap_long.head(10)

In [None]:
# Pivot back to wide format
percap_wide = percap_long.pivot(
    index='Code',
    columns='Year',
    values='Consumption'
)
percap_wide.head()

## Per Capita Transformation

Transform total GDP to GDP per capita to compare countries.

In [None]:
# Load GDP and Population data
gdp_pop = pd.read_csv(file_path + 'GDP_Population_2019.csv')
gdp_pop.head()

In [None]:
# Calculate GDP per capita
# GDP is in trillions, Population is in millions
# To get dollars per person: (GDP * 1e12) / (Population * 1e6)
gdp_pop['GDP_per_capita'] = gdp_pop['GDP'] * 1e6 / gdp_pop['Population']

In [None]:
# Compare the top countries by total GDP vs GDP per capita
gdp_pop.sort_values('GDP_per_capita', ascending=False).head(10)

## Multi-Boxplots

Use boxplots to compare distributions of coffee consumption across years.

In [None]:
# Load coffee consumption data
percap = pd.read_csv(file_path + 'Coffee_Per_Cap.csv', index_col=0)
percap[['Code', '1999', '2009', '2019']].head()

In [None]:
# Wide Format Multi-Boxplot
sns.boxplot(percap[['1999','2004','2009','2014','2019']], orient='h', whis=(0, 100))
plt.xlabel('Coffee Consumption (kg per capita)')
plt.title('Coffee Importing Countries')

## Scatterplots: Comparing Years

Use scatterplots to track how individual countries changed between 1999 and 2019.

In [None]:
# Wide Format Scatterplot
sns.scatterplot(percap, x='1999', y='2019')
plt.xlabel('1999 Coffee Consumption (kg per capita)')
plt.ylabel('2019 Coffee Consumption (kg per capita)')
plt.title('Coffee Consumption Per Capita')

In [None]:
# Scatterplot with 45-degree line
sns.scatterplot(percap, x='1999', y='2019')
plt.plot([0, 25], [0, 25], color='red', linestyle='--', label='No change')
plt.xlabel('1999 Coffee Consumption (kg per capita)')
plt.ylabel('2019 Coffee Consumption (kg per capita)')
plt.title('Coffee Consumption Per Capita')
plt.xlim(0, 25)
plt.ylim(0, 25)
plt.grid(True)
plt.legend()