# Task 1
In this task i have to find 10 countries with the biggest gdp per capita growth in the last decade.
As the IMF 2020 OCT dataset is from 2020 and most predictions start after 2019 i will use the period from 2009 to 2019.

In [1]:
# Template code to load data
import pandas as pd

imf_data = pd.read_pickle("IMF_DATA.pkl")

# Purging data of not needed rows, keeping only rows that have GDP per capita data
pattern = r'\bGross domestic product per capita.*'
gdp_data = imf_data[(imf_data['Subject Descriptor'].fillna('Missing')).str.contains(pattern)]

# Note that the data in gdp_data has 3 different destinctions, i.e. there is GDP data with USD, PPP, and national curencies
# For this task we will be only be using the data denoted in USD.
usd_pattern = r'U.S. dollars'
gdp_data = gdp_data[gdp_data['Units'].str.contains(usd_pattern)]

In [2]:
# Here im thinking of removing the estimated data as it would interfere with calculating which countries gdp per capita
# grew the most.
def remove_predictions(row):
    DATA_END_DATE = 2019
    DATA_START_DATE = 2009
    prediction_start_year = int(row['Estimates Start After'])

    if prediction_start_year < DATA_START_DATE:
        row.loc[DATA_START_DATE : DATA_END_DATE] = 0
    elif prediction_start_year < DATA_END_DATE:
        last_number_before_predictions = row[prediction_start_year]
        row.loc[prediction_start_year + 1 : DATA_END_DATE] = last_number_before_predictions
        
    return row

In [3]:
# Here I set all the predictions to either the last actual data point or to 0
final_gdp_data = gdp_data.apply(remove_predictions, axis = 1)
final_gdp_data.set_index('Country', inplace = True)
final_gdp_data = final_gdp_data.loc[:, 2009: 2020]

In [4]:
# Functions to calculate change of GDP per capita in the period from start to end
def change_of_gdp(row):
    DATA_START_DATE = 2009
    DATA_END_DATE = 2019

    if row[DATA_START_DATE] == 0.0:
        row['Period Growth Rate'] = 0.0
    else:
        row['Period Growth Rate'] = ((row[DATA_END_DATE] - row[DATA_START_DATE]) / row[DATA_START_DATE]) * 100
    return row

In [5]:
# Calculating GDP per capita growth rate
final_gdp_data = final_gdp_data.apply(change_of_gdp, axis = 1) 

In [6]:
# Here we finnaly found which countries GDP per capita grew the most in the period of 2009-2019 in percents
top_10_countries = final_gdp_data.nlargest(10, 'Period Growth Rate')['Period Growth Rate']
print(top_10_countries)

Country
China         160.129732
Mongolia      148.989541
Lao P.D.R.    145.522083
Moldova       135.802453
Vietnam       130.544381
Ethiopia      121.236349
Nepal         116.221326
Panama        110.212430
Cambodia      104.757880
Kenya         104.112348
Name: Period Growth Rate, dtype: float64
