In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('./WEOApr2021all.csv')

  exec(code_obj, self.user_global_ns, self.user_ns)


In [3]:
df.columns

Index(['WEO Country Code', 'ISO', 'WEO Subject Code', 'Country',
       'Subject Descriptor', 'Subject Notes', 'Units', 'Scale',
       'Country/Series-specific Notes', '1980', '1981', '1982', '1983', '1984',
       '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1993',
       '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002',
       '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011',
       '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020',
       '2021', '2022', '2023', '2024', '2025', '2026', 'Estimates Start After',
       'Unnamed: 57', 'Unnamed: 58', 'Unnamed: 59', 'Unnamed: 60',
       'Unnamed: 61', 'Unnamed: 62', 'Unnamed: 63', 'Unnamed: 64'],
      dtype='object')

In [4]:
df.shape

(8775, 65)

# Data Filtering and Preprocessing

## Choosing the required subjects

* Not all subjects are important for us.
* So to choose subjects first we are going to go through notes of each subject **to decide which subject to choose**.

In [5]:
subjects = dict()
for index, row in df.iterrows():
    if row['WEO Subject Code'] not in subjects:
        subjects[row['WEO Subject Code']] = { 
            'descriptior' : row['Subject Descriptor'], 
            'notes' : row['Subject Notes'],
            'unit' : row['Units']
        }

for key in subjects.keys():
    print()
    value = subjects[key]
    print(key)
    print('descriptior :', value['descriptior'])
    print('notes :', value['unit'])


NGDP_R
descriptior : Gross domestic product, constant prices
notes : National currency

NGDP_RPCH
descriptior : Gross domestic product, constant prices
notes : Percent change

NGDP
descriptior : Gross domestic product, current prices
notes : National currency

NGDPD
descriptior : Gross domestic product, current prices
notes : U.S. dollars

PPPGDP
descriptior : Gross domestic product, current prices
notes : Purchasing power parity

NGDP_D
descriptior : Gross domestic product, deflator
notes : Index

NGDPRPC
descriptior : Gross domestic product per capita, constant prices
notes : National currency

NGDPRPPPPC
descriptior : Gross domestic product per capita, constant prices
notes : Purchasing power parity

NGDPPC
descriptior : Gross domestic product per capita, current prices
notes : National currency

NGDPDPC
descriptior : Gross domestic product per capita, current prices
notes : U.S. dollars

PPPPC
descriptior : Gross domestic product per capita, current prices
notes : Purchasing power

In [6]:
# Choosen subjects

choosen_subjects = {
    'NGDP_R' : 'GDP (Constant)',
    'NGDP' : 'GDP (Current)',
    'NGDPD' : 'GDP (Current)', # USD
    'NGDPRPC' : 'GDP Per Capita (Constant)',
    'NGDPDPC' : 'GDP Per Capita (Current)', #USD
    'PPPSH' : 'GDP (PPP)',
    'NID_NGDP' : 'Total Investment',
    'NGSD_NGDP' : 'Gross national savings',
    'PCPI' : 'Inflation',
    'PCPIPCH' : 'Inflation (Avg Consumer Prices)',
    'TM_RPCH' : 'Imports',
    'TX_RPCH' : 'Exports',
    'LUR' : 'Unemployment rate',
    'LP' : 'Population',
    'GGR' : 'Goverment Revenue',
    'GGX' : 'Goverment Expenditure',
    'GGXCNL' : 'Goverment Borrowing',
    'GGXWDN' : 'Goverment Debt',
}

gdp_subjects = {
    'NGDP_R' : 'GDP (Constant)',
    'NGDPD' : 'GDP (Current)', # USD
    'NGDPRPC' : 'GDP Per Capita (Constant)',
    'NGDPDPC' : 'GDP Per Capita (Current)', #USD
    'PPPSH' : 'GDP (PPP)',
}

economic_subjects = {
    'NID_NGDP' : 'Total Investment',
    'NGSD_NGDP' : 'Gross national savings',
    'PCPI' : 'Inflation',
    'PCPIPCH' : 'Inflation (Avg Consumer Prices)',
    'TM_RPCH' : 'Imports',
    'TX_RPCH' : 'Exports',
    'LUR' : 'Unemployment rate',
    'LP' : 'Population',
}

gov_subjects = {
    'GGR' : 'Goverment Revenue',
    'GGX' : 'Goverment Expenditure',
    'GGXCNL' : 'Goverment Borrowing',
    'GGXWDN' : 'Goverment Debt',
}

* Filtering the choosen subjects
* Also data of some initial years is not required

In [7]:
df_subject_code = df[df['WEO Subject Code'].isin(choosen_subjects.keys())]
df = df_subject_code.iloc[:, :56]
drop_columns = ['Subject Notes', 'Subject Descriptor', 'Country/Series-specific Notes'] + list(map(str, range(1980, 2002)))
df = df.drop(columns=drop_columns)
df

Unnamed: 0,WEO Country Code,ISO,WEO Subject Code,Country,Units,Scale,2002,2003,2004,2005,...,2017,2018,2019,2020,2021,2022,2023,2024,2025,2026
0,512,AFG,NGDP_R,Afghanistan,National currency,Billions,453.484,492.903,496.209,554.91,...,1255.29,1270.22,1319.9,1253.91,1304.06,1363.06,1424.4,1481.5,1540.94,1602.59
2,512,AFG,NGDP,Afghanistan,National currency,Billions,178.756,220.013,246.21,304.926,...,1285.46,1327.69,1469.6,1470.21,1599.35,1745.26,1896.29,2051.19,2218.82,2399.92
3,512,AFG,NGDPD,Afghanistan,U.S. dollars,Billions,4.367,4.553,5.146,6.167,...,18.91,18.401,18.876,19.132,19.938,21.201,22.01,22.754,23.896,24.553
6,512,AFG,NGDPRPC,Afghanistan,National currency,Units,24240.97,25307.04,24519.48,26491.22,...,42265.57,40196.7,40990.73,38065.69,38698.25,39539.58,40389.89,41064.53,41751.8,42445.96
9,512,AFG,NGDPDPC,Afghanistan,U.S. dollars,Units,233.433,233.755,254.259,294.396,...,636.693,582.323,586.204,580.817,591.667,614.991,624.1,630.714,647.452,650.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8757,698,ZWE,LP,Zimbabwe,Persons,Millions,11.632,11.64,11.73,11.83,...,14.437,14.642,14.905,15.189,15.492,15.817,16.164,16.481,16.79,17.095
8758,698,ZWE,GGR,Zimbabwe,National currency,Billions,,,,,...,3.737,3.502,3.87,5.491,22.971,182.586,387.562,494.002,557.901,640.767
8760,698,ZWE,GGX,Zimbabwe,National currency,Billions,,,,,...,4.097,4.863,6.144,7.497,25.314,170.271,406.388,516.652,583.264,658.805
8762,698,ZWE,GGXCNL,Zimbabwe,National currency,Billions,,,,,...,-0.36,-1.361,-2.274,-2.006,-2.344,12.315,-18.825,-22.65,-25.363,-18.038


## Removing Null Values

In [8]:
# null elements
df.isna().sum()

WEO Country Code       0
ISO                    0
WEO Subject Code       0
Country                0
Units                  0
Scale               1560
2002                 430
2003                 406
2004                 366
2005                 349
2006                 342
2007                 337
2008                 330
2009                 326
2010                 321
2011                 316
2012                 315
2013                 313
2014                 301
2015                 291
2016                 290
2017                 287
2018                 291
2019                 296
2020                 304
2021                 334
2022                 334
2023                 341
2024                 343
2025                 343
2026                 343
dtype: int64

### Data Formatting

* Filling Nan data with '0'
* Converting data from string to float

In [9]:
# what fields are causing error for float to string conversion
for _ in df['2002']:
    try :
        st = float(_)
    except :
        print("Error for", _ , type(_))

Error for -- <class 'str'>
Error for -- <class 'str'>
Error for -- <class 'str'>
Error for -- <class 'str'>
Error for -- <class 'str'>
Error for -- <class 'str'>
Error for -- <class 'str'>
Error for -- <class 'str'>
Error for -- <class 'str'>
Error for -- <class 'str'>


In [10]:
# Checking how many fields are "--"
(df == "--").sum()

WEO Country Code     0
ISO                  0
WEO Subject Code     0
Country              0
Units                0
Scale                0
2002                10
2003                 9
2004                 8
2005                 8
2006                 8
2007                 9
2008                 7
2009                 7
2010                 9
2011                 8
2012                 7
2013                 8
2014                 7
2015                 7
2016                 8
2017                 8
2018                 8
2019                 8
2020                 9
2021                 8
2022                 8
2023                 7
2024                 7
2025                 7
2026                 7
dtype: int64

In [11]:
df.replace('--','0', inplace=True) 
df[list(map(str, range(2002, 2027)))] = df[list(map(str, range(2002, 2027)))].fillna('0')
df[list(map(str, range(2002, 2027)))] = df[list(map(str, range(2002, 2027)))].astype(float)
df.dtypes

WEO Country Code      int64
ISO                  object
WEO Subject Code     object
Country              object
Units                object
Scale                object
2002                float64
2003                float64
2004                float64
2005                float64
2006                float64
2007                float64
2008                float64
2009                float64
2010                float64
2011                float64
2012                float64
2013                float64
2014                float64
2015                float64
2016                float64
2017                float64
2018                float64
2019                float64
2020                float64
2021                float64
2022                float64
2023                float64
2024                float64
2025                float64
2026                float64
dtype: object

In [12]:
country_gdp = df[df['WEO Subject Code'] == 'NGDPD'][['ISO', 'Country', 'Units', 'Scale' ,'2021']]
country_gdp

Unnamed: 0,ISO,Country,Units,Scale,2021
3,AFG,Afghanistan,U.S. dollars,Billions,19.938
48,ALB,Albania,U.S. dollars,Billions,17.138
93,DZA,Algeria,U.S. dollars,Billions,151.459
138,AGO,Angola,U.S. dollars,Billions,66.493
183,ATG,Antigua and Barbuda,U.S. dollars,Billions,1.376
...,...,...,...,...,...
8553,VNM,Vietnam,U.S. dollars,Billions,354.868
8598,WBG,West Bank and Gaza,U.S. dollars,Billions,16.481
8643,YEM,Yemen,U.S. dollars,Billions,25.095
8688,ZMB,Zambia,U.S. dollars,Billions,18.955


#### Top 20 countries by GDP 

for year 2021

In [13]:
# country_gdp.isna().sum()
country_gdp.sort_values(by='2021', ascending=False)[:20]

Unnamed: 0,ISO,Country,Units,Scale,2021
8328,USA,United States,U.S. dollars,Billions,22675.27
1578,CHN,China,U.S. dollars,Billions,16642.32
3783,JPN,Japan,U.S. dollars,Billions,5378.14
2838,DEU,Germany,U.S. dollars,Billions,4319.29
8283,GBR,United Kingdom,U.S. dollars,Billions,3124.65
3423,IND,India,U.S. dollars,Billions,3049.7
2658,FRA,France,U.S. dollars,Billions,2938.27
3693,ITA,Italy,U.S. dollars,Billions,2106.29
1398,CAN,Canada,U.S. dollars,Billions,1883.49
4008,KOR,Korea,U.S. dollars,Billions,1806.71


In [14]:
# These are not g20 countries still roughly choosen
g20_countries = country_gdp.sort_values(by='2021', ascending=False)[:19]['ISO']
g20_countries

8328    USA
1578    CHN
3783    JPN
2838    DEU
8283    GBR
3423    IND
2658    FRA
3693    ITA
1398    CAN
4008    KOR
6393    RUS
363     AUS
1038    BRA
7158    ESP
4998    MEX
3468    IDN
5493    NLD
7518    CHE
6618    SAU
Name: ISO, dtype: object

In [15]:
country_gdp_constant = df[df['WEO Subject Code'] == 'NGDP'][['ISO', 'Country', 'Units', 'Scale' ,'2021']]
country_gdp_constant.sort_values(by='2021', ascending=False)[:20]

Unnamed: 0,ISO,Country,Units,Scale,2021
3512,IRN,Islamic Republic of Iran,National currency,Billions,47731850.96
3467,IDN,Indonesia,National currency,Billions,16427918.39
8552,VNM,Vietnam,National currency,Billions,8447685.12
4007,KOR,Korea,National currency,Billions,2002883.99
1622,COL,Colombia,National currency,Billions,1086308.71
8417,UZB,Uzbekistan,National currency,Billions,668939.45
3782,JPN,Japan,National currency,Billions,560059.19
3557,IRQ,Iraq,National currency,Billions,276562.29
6032,PRY,Paraguay,National currency,Billions,262388.41
3422,IND,India,National currency,Billions,228476.39


## Normalizing data

Most of the data is currently in the National Curreny wanted to convert it into USD.

In [24]:
x, y = df[df['WEO Subject Code'].isin(['NGDP','NGDPD'])].shape
x, y

(390, 31)

## Getting the currency exhange rate for each year

In [42]:
new_df_columns = ['WEO Country Code', 'ISO', 'Country',
       '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009',
       '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018',
       '2019', '2020', '2021', '2022', '2023', '2024', '2025', '2026']
years = ['2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009',
       '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018',
       '2019', '2020', '2021', '2022', '2023', '2024', '2025', '2026']



curr_ngdp = {}
curr_country = None

arr = []

# count = 0
for row_num, row in df[df['WEO Subject Code'].isin(['NGDP','NGDPD'])].iterrows():
    new_row = {}
    #print(row_num, row['WEO Subject Code'], curr_country, row['Country'])
    if row['WEO Subject Code'] == 'NGDP':
        for year in years:
            curr_ngdp[year] = row[year]
        curr_country = row['Country']
    elif row['WEO Subject Code'] == 'NGDPD' and curr_country == row['Country']:
        for col in ['WEO Country Code', 'ISO', 'Country']:
            new_row[col] = row[col]
        for year in years:
            if row[year] == 0:
                new_row[year] = 0
            else:
                new_row[year] = curr_ngdp[year]/row[year]
        
        arr.append(new_row)
#     count += 1
#     if(count > 3):
#         break

ExcDF = pd.DataFrame(arr)
ExcDF

Unnamed: 0,WEO Country Code,ISO,Country,2002,2003,2004,2005,2006,2007,2008,...,2017,2018,2019,2020,2021,2022,2023,2024,2025,2026
0,512,AFG,Afghanistan,40.933364,48.322644,47.844928,49.444787,49.937473,49.964353,50.258231,...,67.977790,72.153144,77.855478,76.845599,80.216170,82.319702,86.155838,90.146348,92.853197,97.744471
1,914,ALB,Albania,140.408004,120.787382,102.666110,99.871212,98.104204,90.430645,83.897213,...,118.796445,107.989701,109.873658,108.441936,102.549306,102.037118,101.406266,100.951021,100.905807,100.902816
2,612,DZA,Algeria,79.681471,77.394495,72.065114,73.276614,72.646825,69.292472,64.582663,...,110.901432,116.625705,119.414859,126.853507,137.438317,151.034069,162.274403,174.351226,187.327360,201.269489
3,614,AGO,Angola,43.529046,74.605064,83.541525,87.158854,80.368072,76.706708,75.033036,...,165.915627,252.856320,364.826624,578.247277,716.150422,788.641073,821.363212,838.655648,847.551173,867.376525
4,311,ATG,Antigua and Barbuda,2.698160,2.698598,2.698913,2.700587,2.700086,2.700992,2.701023,...,2.700272,2.700312,2.699759,2.699281,2.699855,2.700000,2.699177,2.699674,2.700565,2.699413
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
190,582,VNM,Vietnam,15265.329758,15505.784843,15737.747030,15854.845554,15989.039395,16083.172744,16445.179925,...,22715.608646,23019.613573,23227.320422,23214.877780,23805.147604,24290.656684,24742.656543,25189.390756,25638.145593,26094.924356
191,487,WBG,West Bank and Gaza,4.737627,4.554183,4.482077,4.487515,4.456058,4.107806,3.588235,...,3.599578,3.590465,3.564433,3.442232,3.205691,3.194957,3.186979,3.181809,3.175747,3.169172
192,474,YEM,Yemen,175.629851,183.444558,184.849293,191.758307,197.242826,199.004203,199.763294,...,374.251945,493.005620,558.590039,740.999007,784.500100,927.454116,1007.098138,1095.967113,1166.561266,1234.043222
193,754,ZMB,Zambia,4.398426,4.733170,4.778974,4.465002,3.601630,4.001636,3.745060,...,9.517353,10.458156,12.889828,18.331318,22.902084,26.323953,28.849339,30.624421,32.104079,33.487141


In [51]:
df_usd = df.copy()

for i, row in df[df['Units'] == 'National currency'].iterrows():
    for year in list(map(str, range(2002, 2027))):
        exc = ExcDF[ExcDF['Country'] == row['Country']][year].iloc[0]
        val = row[year]
        # print(type(exc), exc)
        # print(exc.iloc[0])
        if exc != 0:
            # print(i, year, val/exc)
            df_usd.at[i, year] = val/exc
            df_usd.at[i, 'Units'] = 'U.S. dollars'
        else:
            df_usd.at[i, year] = -1
    
df_usd

Unnamed: 0,WEO Country Code,ISO,WEO Subject Code,Country,Units,Scale,2002,2003,2004,2005,...,2017,2018,2019,2020,2021,2022,2023,2024,2025,2026
0,512,AFG,NGDP_R,Afghanistan,U.S. dollars,Billions,11.078591,10.200249,10.371193,11.222821,...,18.466179,17.604500,16.953207,16.317265,16.256822,16.558126,16.532832,16.434387,16.595444,16.395710
2,512,AFG,NGDP,Afghanistan,U.S. dollars,Billions,4.367000,4.553000,5.146000,6.167000,...,18.910000,18.401000,18.876000,19.132000,19.938000,21.201000,22.010000,22.754000,23.896000,24.553000
3,512,AFG,NGDPD,Afghanistan,U.S. dollars,Billions,4.367000,4.553000,5.146000,6.167000,...,18.910000,18.401000,18.876000,19.132000,19.938000,21.201000,22.010000,22.754000,23.896000,24.553000
6,512,AFG,NGDPRPC,Afghanistan,U.S. dollars,Units,592.205666,523.709750,512.478145,535.773774,...,621.755581,557.102544,526.497700,495.352896,482.424553,480.317337,468.800383,455.531821,449.653876,434.254332
9,512,AFG,NGDPDPC,Afghanistan,U.S. dollars,Units,233.433000,233.755000,254.259000,294.396000,...,636.693000,582.323000,586.204000,580.817000,591.667000,614.991000,624.100000,630.714000,647.452000,650.300000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8757,698,ZWE,LP,Zimbabwe,Persons,Millions,11.632000,11.640000,11.730000,11.830000,...,14.437000,14.642000,14.905000,15.189000,15.492000,15.817000,16.164000,16.481000,16.790000,17.095000
8758,698,ZWE,GGR,Zimbabwe,U.S. dollars,Billions,0.000000,0.000000,0.000000,0.000000,...,2.981374,1.718253,0.459814,0.105966,0.259216,1.823934,3.422930,3.884284,4.085171,4.490508
8760,698,ZWE,GGX,Zimbabwe,U.S. dollars,Billions,0.000000,0.000000,0.000000,0.000000,...,3.268581,2.386026,0.730000,0.144678,0.285656,1.700914,3.589201,4.062378,4.270889,4.616919
8762,698,ZWE,GGXCNL,Zimbabwe,U.S. dollars,Billions,0.000000,0.000000,0.000000,0.000000,...,-0.287208,-0.667773,-0.270186,-0.038712,-0.026451,0.123020,-0.166262,-0.178094,-0.185718,-0.126411


In [53]:
df_usd['Units'].value_counts()

U.S. dollars                    1755
Percent change                   585
Percent of GDP                   390
Percent                          195
Index                            195
Percent of total labor force     195
Persons                          195
Name: Units, dtype: int64

In [46]:
country_gdp_constant = df[df['WEO Subject Code'] == 'NGDP'][['ISO', 'Country', 'Units', 'Scale' ,'2023']]
country_gdp_constant.sort_values(by='2023', ascending=False)[:20]

Unnamed: 0,ISO,Country,Units,Scale,2023
8328,USA,United States,U.S. dollars,Billions,24892.61
1578,CHN,China,U.S. dollars,Billions,19423.48
3783,JPN,Japan,U.S. dollars,Billions,5977.36
2838,DEU,Germany,U.S. dollars,Billions,4757.65
3423,IND,India,U.S. dollars,Billions,3591.03
8283,GBR,United Kingdom,U.S. dollars,Billions,3490.27
2658,FRA,France,U.S. dollars,Billions,3242.5
3693,ITA,Italy,U.S. dollars,Billions,2295.73
1398,CAN,Canada,U.S. dollars,Billions,2142.96
4008,KOR,Korea,U.S. dollars,Billions,1967.85


In [47]:
country_gdp_constant = df[df['WEO Subject Code'] == 'NGDPD'][['ISO', 'Country', 'Units', 'Scale' ,'2023']]
country_gdp_constant.sort_values(by='2023', ascending=False)[:20]

Unnamed: 0,ISO,Country,Units,Scale,2023
8328,USA,United States,U.S. dollars,Billions,24892.61
1578,CHN,China,U.S. dollars,Billions,19423.48
3783,JPN,Japan,U.S. dollars,Billions,5977.36
2838,DEU,Germany,U.S. dollars,Billions,4757.65
3423,IND,India,U.S. dollars,Billions,3591.03
8283,GBR,United Kingdom,U.S. dollars,Billions,3490.27
2658,FRA,France,U.S. dollars,Billions,3242.5
3693,ITA,Italy,U.S. dollars,Billions,2295.73
1398,CAN,Canada,U.S. dollars,Billions,2142.96
4008,KOR,Korea,U.S. dollars,Billions,1967.85


# Saving the data

In [54]:
df_usd

Unnamed: 0,WEO Country Code,ISO,WEO Subject Code,Country,Units,Scale,2002,2003,2004,2005,...,2017,2018,2019,2020,2021,2022,2023,2024,2025,2026
0,512,AFG,NGDP_R,Afghanistan,U.S. dollars,Billions,11.078591,10.200249,10.371193,11.222821,...,18.466179,17.604500,16.953207,16.317265,16.256822,16.558126,16.532832,16.434387,16.595444,16.395710
2,512,AFG,NGDP,Afghanistan,U.S. dollars,Billions,4.367000,4.553000,5.146000,6.167000,...,18.910000,18.401000,18.876000,19.132000,19.938000,21.201000,22.010000,22.754000,23.896000,24.553000
3,512,AFG,NGDPD,Afghanistan,U.S. dollars,Billions,4.367000,4.553000,5.146000,6.167000,...,18.910000,18.401000,18.876000,19.132000,19.938000,21.201000,22.010000,22.754000,23.896000,24.553000
6,512,AFG,NGDPRPC,Afghanistan,U.S. dollars,Units,592.205666,523.709750,512.478145,535.773774,...,621.755581,557.102544,526.497700,495.352896,482.424553,480.317337,468.800383,455.531821,449.653876,434.254332
9,512,AFG,NGDPDPC,Afghanistan,U.S. dollars,Units,233.433000,233.755000,254.259000,294.396000,...,636.693000,582.323000,586.204000,580.817000,591.667000,614.991000,624.100000,630.714000,647.452000,650.300000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8757,698,ZWE,LP,Zimbabwe,Persons,Millions,11.632000,11.640000,11.730000,11.830000,...,14.437000,14.642000,14.905000,15.189000,15.492000,15.817000,16.164000,16.481000,16.790000,17.095000
8758,698,ZWE,GGR,Zimbabwe,U.S. dollars,Billions,0.000000,0.000000,0.000000,0.000000,...,2.981374,1.718253,0.459814,0.105966,0.259216,1.823934,3.422930,3.884284,4.085171,4.490508
8760,698,ZWE,GGX,Zimbabwe,U.S. dollars,Billions,0.000000,0.000000,0.000000,0.000000,...,3.268581,2.386026,0.730000,0.144678,0.285656,1.700914,3.589201,4.062378,4.270889,4.616919
8762,698,ZWE,GGXCNL,Zimbabwe,U.S. dollars,Billions,0.000000,0.000000,0.000000,0.000000,...,-0.287208,-0.667773,-0.270186,-0.038712,-0.026451,0.123020,-0.166262,-0.178094,-0.185718,-0.126411


In [56]:
gdp_df = df_usd[df_usd['WEO Subject Code'].isin(gdp_subjects.keys())]
gdp_df

Unnamed: 0,WEO Country Code,ISO,WEO Subject Code,Country,Units,Scale,2002,2003,2004,2005,...,2017,2018,2019,2020,2021,2022,2023,2024,2025,2026
0,512,AFG,NGDP_R,Afghanistan,U.S. dollars,Billions,11.078591,10.200249,10.371193,11.222821,...,18.466179,17.604500,16.953207,16.317265,16.256822,16.558126,16.532832,16.434387,16.595444,16.395710
3,512,AFG,NGDPD,Afghanistan,U.S. dollars,Billions,4.367000,4.553000,5.146000,6.167000,...,18.910000,18.401000,18.876000,19.132000,19.938000,21.201000,22.010000,22.754000,23.896000,24.553000
6,512,AFG,NGDPRPC,Afghanistan,U.S. dollars,Units,592.205666,523.709750,512.478145,535.773774,...,621.755581,557.102544,526.497700,495.352896,482.424553,480.317337,468.800383,455.531821,449.653876,434.254332
9,512,AFG,NGDPDPC,Afghanistan,U.S. dollars,Units,233.433000,233.755000,254.259000,294.396000,...,636.693000,582.323000,586.204000,580.817000,591.667000,614.991000,624.100000,630.714000,647.452000,650.300000
12,512,AFG,PPPSH,Afghanistan,Percent,,0.036000,0.037000,0.036000,0.038000,...,0.061000,0.060000,0.061000,0.060000,0.059000,0.059000,0.059000,0.060000,0.060000,0.061000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8730,698,ZWE,NGDP_R,Zimbabwe,U.S. dollars,Billions,17.637000,14.775000,13.849000,12.822000,...,15.307363,9.743308,2.184654,0.326446,0.196745,0.181139,0.164151,0.149796,0.142984,0.140266
8733,698,ZWE,NGDPD,Zimbabwe,U.S. dollars,Billions,10.735000,9.574000,9.465000,9.046000,...,21.890000,21.093000,19.273000,21.038000,26.085000,29.363000,29.319000,29.979000,30.880000,32.042000
8736,698,ZWE,NGDPRPC,Zimbabwe,U.S. dollars,Units,1516.260000,1269.350000,1180.630000,1083.860000,...,1060.290393,665.447597,146.570327,21.491891,12.699677,11.452010,10.155070,9.088951,8.515798,8.205060
8739,698,ZWE,NGDPDPC,Zimbabwe,U.S. dollars,Units,922.878000,822.472000,806.898000,764.632000,...,1516.280000,1440.590000,1293.010000,1385.040000,1683.720000,1856.440000,1813.840000,1819.050000,1839.170000,1874.330000


In [57]:
gdp_df['WEO Subject Code'].value_counts()

NGDP_R     195
NGDPD      195
NGDPRPC    195
NGDPDPC    195
PPPSH      195
Name: WEO Subject Code, dtype: int64

In [58]:
eco_df = df_usd[df_usd['WEO Subject Code'].isin(economic_subjects.keys())]
eco_df

Unnamed: 0,WEO Country Code,ISO,WEO Subject Code,Country,Units,Scale,2002,2003,2004,2005,...,2017,2018,2019,2020,2021,2022,2023,2024,2025,2026
14,512,AFG,NID_NGDP,Afghanistan,Percent of GDP,,27.243,30.102,35.354,37.048,...,18.477,18.039,18.192,16.420,17.416,18.336,18.482,19.306,18.541,20.065
15,512,AFG,NGSD_NGDP,Afghanistan,Percent of GDP,,61.151,59.718,72.570,67.274,...,26.063,30.197,29.897,27.132,27.391,26.613,26.222,26.565,23.941,23.739
16,512,AFG,PCPI,Afghanistan,Index,,31.263,42.413,49.351,54.566,...,110.998,111.693,114.264,120.671,126.780,132.479,137.745,143.253,148.984,154.945
17,512,AFG,PCPIPCH,Afghanistan,Percent change,,0.000,35.663,16.358,10.569,...,4.976,0.626,2.302,5.607,5.062,4.495,3.975,3.999,4.000,4.001
21,512,AFG,TM_RPCH,Afghanistan,Percent change,,0.000,0.000,0.000,37.300,...,15.309,-7.929,4.442,-4.551,-7.862,2.892,4.918,2.197,1.139,0.471
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8747,698,ZWE,PCPIPCH,Zimbabwe,Percent change,,-34.445,-8.565,113.569,-31.522,...,0.907,10.607,255.292,557.210,99.254,24.677,12.804,8.679,5.552,3.296
8751,698,ZWE,TM_RPCH,Zimbabwe,Percent change,,0.000,0.000,0.000,0.000,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
8753,698,ZWE,TX_RPCH,Zimbabwe,Percent change,,0.000,0.000,0.000,0.000,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
8755,698,ZWE,LUR,Zimbabwe,Percent of total labor force,,0.000,0.000,0.000,0.000,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000


In [59]:
eco_df['WEO Subject Code'].value_counts()

NID_NGDP     195
NGSD_NGDP    195
PCPI         195
PCPIPCH      195
TM_RPCH      195
TX_RPCH      195
LUR          195
LP           195
Name: WEO Subject Code, dtype: int64

In [61]:
gov_df = df_usd[df_usd['WEO Subject Code'].isin(gov_subjects.keys())]
gov_df

Unnamed: 0,WEO Country Code,ISO,WEO Subject Code,Country,Units,Scale,2002,2003,2004,2005,...,2017,2018,2019,2020,2021,2022,2023,2024,2025,2026
28,512,AFG,GGR,Afghanistan,U.S. dollars,Billions,0.298900,0.447368,0.652295,0.908609,...,5.116804,5.625160,5.085358,5.174935,5.189078,5.705621,6.239531,6.347778,6.203147,6.349280
30,512,AFG,GGX,Afghanistan,U.S. dollars,Billions,0.303200,0.543058,0.775443,0.968110,...,5.244095,5.324453,5.285087,5.645554,5.687544,6.025095,6.402921,6.507008,6.422181,6.587104
32,512,AFG,GGXCNL,Afghanistan,U.S. dollars,Billions,-0.004300,-0.095690,-0.123127,-0.059480,...,-0.127292,0.300708,-0.199729,-0.470619,-0.498466,-0.319486,-0.163402,-0.159230,-0.219034,-0.237814
38,512,AFG,GGXWDN,Afghanistan,U.S. dollars,Billions,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
73,914,ALB,GGR,Albania,U.S. dollars,Billions,1.101041,1.384449,1.795675,2.047968,...,3.622979,4.161406,4.189485,3.927484,4.632318,4.957970,5.307709,5.662261,6.042784,6.463001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8723,754,ZMB,GGXWDN,Zambia,U.S. dollars,Billions,0.000000,0.683686,0.677342,0.898096,...,5.601557,10.820837,9.621075,8.080052,8.983768,10.517873,13.620693,16.600575,20.048885,22.100991
8758,698,ZWE,GGR,Zimbabwe,U.S. dollars,Billions,0.000000,0.000000,0.000000,0.000000,...,2.981374,1.718253,0.459814,0.105966,0.259216,1.823934,3.422930,3.884284,4.085171,4.490508
8760,698,ZWE,GGX,Zimbabwe,U.S. dollars,Billions,0.000000,0.000000,0.000000,0.000000,...,3.268581,2.386026,0.730000,0.144678,0.285656,1.700914,3.589201,4.062378,4.270889,4.616919
8762,698,ZWE,GGXCNL,Zimbabwe,U.S. dollars,Billions,0.000000,0.000000,0.000000,0.000000,...,-0.287208,-0.667773,-0.270186,-0.038712,-0.026451,0.123020,-0.166262,-0.178094,-0.185718,-0.126411


In [62]:
gov_df['WEO Subject Code'].value_counts()

GGR       195
GGX       195
GGXCNL    195
GGXWDN    195
Name: WEO Subject Code, dtype: int64

In [65]:
gov_df.to_csv("gov_data.csv", index=True, header=True, encoding="utf-8")
gdp_df.to_csv("gdp_data.csv", index=True, header=True, encoding="utf-8")
eco_df.to_csv("eco_data.csv", index=True, header=True, encoding="utf-8")