# World Bank Data Analysis

This notebook demonstrates how to fetch and analyze international economic data from the World Bank.

**Compatible with Google Colaboratory**

## Setup

In [None]:
# Uncomment for Google Colab
# !pip install pandas numpy matplotlib seaborn wbdata

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

## 1. Fetch GDP Data for Multiple Countries

In [None]:
import wbdata

# Define countries
countries = ['USA', 'CHN', 'JPN', 'DEU', 'GBR', 'IND', 'BRA']
country_names = {
    'USA': 'United States',
    'CHN': 'China',
    'JPN': 'Japan',
    'DEU': 'Germany',
    'GBR': 'United Kingdom',
    'IND': 'India',
    'BRA': 'Brazil'
}

# GDP indicator
indicator = 'NY.GDP.MKTP.CD'  # GDP (current US$)

# Date range
date_range = (datetime(2000, 1, 1), datetime(2023, 12, 31))

# Fetch data
try:
    gdp_data = wbdata.get_dataframe({indicator: 'GDP'}, country=countries, date=date_range)
    print("GDP data fetched successfully!")
    print(f"\nData shape: {gdp_data.shape}")
    print(f"\nFirst few rows:")
    print(gdp_data.head())
except Exception as e:
    print(f"Error fetching data: {e}")
    print("Note: World Bank API may have connectivity issues. Try again later.")

## 2. Process and Reshape Data

In [None]:
# Reset index to access country and date columns
gdp_pivot = gdp_data.reset_index()

# Convert to wide format
gdp_wide = gdp_pivot.pivot(index='date', columns='country', values='GDP')

# Rename columns to country names
gdp_wide.columns = [country_names.get(col, col) for col in gdp_wide.columns]

# Sort by date
gdp_wide = gdp_wide.sort_index()

print("\nProcessed GDP data:")
print(gdp_wide.tail())

## 3. Visualize GDP Trends

In [None]:
# Plot GDP over time
plt.figure(figsize=(14, 8))

for country in gdp_wide.columns:
    plt.plot(gdp_wide.index, gdp_wide[country] / 1e12, label=country, linewidth=2)

plt.title('GDP Comparison (Current US$)', fontsize=16, fontweight='bold')
plt.xlabel('Year', fontsize=12)
plt.ylabel('GDP (Trillions USD)', fontsize=12)
plt.legend(loc='best', fontsize=10)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 4. GDP Growth Rates

In [None]:
# Calculate year-over-year growth rates
gdp_growth = gdp_wide.pct_change() * 100

# Plot growth rates
plt.figure(figsize=(14, 8))

for country in gdp_growth.columns:
    plt.plot(gdp_growth.index, gdp_growth[country], label=country, linewidth=2)

plt.axhline(y=0, color='black', linestyle='--', alpha=0.5)
plt.title('GDP Growth Rates (Year-over-Year)', fontsize=16, fontweight='bold')
plt.xlabel('Year', fontsize=12)
plt.ylabel('Growth Rate (%)', fontsize=12)
plt.legend(loc='best', fontsize=10)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Average growth rates
print("\nAverage GDP Growth Rates (2000-2023):")
print(gdp_growth.mean().sort_values(ascending=False))

## 5. GDP Per Capita Comparison

In [None]:
# Fetch GDP per capita data
gdp_per_capita_indicator = 'NY.GDP.PCAP.CD'

try:
    gdp_pc_data = wbdata.get_dataframe({gdp_per_capita_indicator: 'GDP_PC'}, 
                                       country=countries, date=date_range)
    
    # Process data
    gdp_pc_pivot = gdp_pc_data.reset_index()
    gdp_pc_wide = gdp_pc_pivot.pivot(index='date', columns='country', values='GDP_PC')
    gdp_pc_wide.columns = [country_names.get(col, col) for col in gdp_pc_wide.columns]
    gdp_pc_wide = gdp_pc_wide.sort_index()
    
    # Plot
    plt.figure(figsize=(14, 8))
    for country in gdp_pc_wide.columns:
        plt.plot(gdp_pc_wide.index, gdp_pc_wide[country], label=country, linewidth=2)
    
    plt.title('GDP Per Capita Comparison', fontsize=16, fontweight='bold')
    plt.xlabel('Year', fontsize=12)
    plt.ylabel('GDP Per Capita (USD)', fontsize=12)
    plt.legend(loc='best', fontsize=10)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    print("\nLatest GDP Per Capita (USD):")
    print(gdp_pc_wide.iloc[-1].sort_values(ascending=False))
    
except Exception as e:
    print(f"Error fetching GDP per capita data: {e}")

## 6. Multiple Indicators Analysis

In [None]:
# Fetch multiple indicators for a single country
indicators = {
    'NY.GDP.MKTP.KD.ZG': 'GDP Growth',
    'SL.UEM.TOTL.ZS': 'Unemployment',
    'FP.CPI.TOTL.ZG': 'Inflation',
    'NE.TRD.GNFS.ZS': 'Trade (% of GDP)'
}

# Focus on USA
country = 'USA'

try:
    us_data = wbdata.get_dataframe(indicators, country=[country], date=date_range)
    us_data = us_data.sort_index()
    
    # Plot all indicators
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    axes = axes.flatten()
    
    for i, col in enumerate(us_data.columns):
        axes[i].plot(us_data.index, us_data[col], linewidth=2, color='blue')
        axes[i].set_title(f'United States - {col}', fontsize=12, fontweight='bold')
        axes[i].set_xlabel('Year', fontsize=10)
        axes[i].set_ylabel('Value', fontsize=10)
        axes[i].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print("\nSummary Statistics for United States:")
    print(us_data.describe())
    
except Exception as e:
    print(f"Error fetching indicator data: {e}")

## 7. Regional Comparison - Bar Chart

In [None]:
# Get latest GDP values
latest_gdp = gdp_wide.iloc[-1].sort_values(ascending=False)

# Create bar chart
plt.figure(figsize=(12, 7))
colors = sns.color_palette('husl', len(latest_gdp))
plt.bar(range(len(latest_gdp)), latest_gdp / 1e12, color=colors)
plt.xticks(range(len(latest_gdp)), latest_gdp.index, rotation=45, ha='right')
plt.title(f'GDP Comparison - Latest Available Data ({gdp_wide.index[-1]})', 
         fontsize=16, fontweight='bold')
plt.xlabel('Country', fontsize=12)
plt.ylabel('GDP (Trillions USD)', fontsize=12)
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

print("\nGDP Rankings (Latest):")
for i, (country, value) in enumerate(latest_gdp.items(), 1):
    print(f"{i}. {country}: ${value/1e12:.2f} trillion")

## 8. Explore Available Indicators

In [None]:
# Search for indicators related to 'education'
try:
    education_indicators = wbdata.search_indicators('education')
    print("Sample Education Indicators:")
    for i, indicator in enumerate(education_indicators[:10], 1):
        print(f"{i}. {indicator['id']}: {indicator['name'][:80]}")
except Exception as e:
    print(f"Error searching indicators: {e}")

## Summary

This notebook demonstrated:
- Fetching World Bank data for multiple countries
- Comparing GDP and GDP per capita across countries
- Analyzing growth rates
- Working with multiple economic indicators
- Creating comparative visualizations

World Bank provides thousands of indicators covering:
- Economic data (GDP, trade, investment)
- Social indicators (education, health, poverty)
- Environmental data (emissions, energy use)
- Infrastructure and development

Explore more at: https://data.worldbank.org/