In [1]:
# --------------------------------------------
# Pakistan Remittances (1974–Present)
# Source: World Bank Open Data API
# Indicator: BX.TRF.PWKR.CD.DT
# Units: Current US Dollars
# --------------------------------------------

import requests
import pandas as pd

# 1. World Bank API URL for Pakistan remittances
url = (
    "https://api.worldbank.org/v2/country/PAK/"
    "indicator/BX.TRF.PWKR.CD.DT"
    "?format=json&per_page=1000"
)

# 2. Request data from the API
response = requests.get(url)
data = response.json()[1]   # The actual data is in index [1]

# 3. Convert JSON data to a DataFrame
df = pd.DataFrame(data)

# 4. Keep only relevant columns
df = df[['date', 'value']]

# 5. Rename columns for clarity
df.columns = ['Year', 'Remittances_USD']

# 6. Convert Year to integer and sort chronologically
df['Year'] = df['Year'].astype(int)
df = df.sort_values('Year')

# 7. Create a full year range from 1950 to latest available year
full_years = pd.DataFrame({
    'Year': range(1974, df['Year'].max() + 1)
})

# 8. Merge to include missing years (1950–1975 will be NaN)
df_final = full_years.merge(df, on='Year', how='left')

# 9. Save final dataset to CSV
df_final.to_csv(
    "../06_datasets/pakistan_remittances_1974_present_worldbank.csv",
    index=False
)

# 10. Display first and last rows to verify
print("First 10 rows:")
print(df_final.head(10))

print("\nLast 10 rows:")
print(df_final.tail(10))


First 10 rows:
   Year  Remittances_USD
0  1974              NaN
1  1975              NaN
2  1976     4.117369e+08
3  1977     8.720866e+08
4  1978     1.309297e+09
5  1979     1.501617e+09
6  1980     2.047622e+09
7  1981     2.067108e+09
8  1982     2.588122e+09
9  1983     2.940240e+09

Last 10 rows:
    Year  Remittances_USD
41  2015     1.930600e+10
42  2016     1.981900e+10
43  2017     1.985600e+10
44  2018     2.119300e+10
45  2019     2.225200e+10
46  2020     2.608900e+10
47  2021     3.131200e+10
48  2022     3.017600e+10
49  2023     2.655800e+10
50  2024     3.491400e+10
