## Imports

In [11]:
import requests
import pandas as pd
import numpy as np 
import import_ipynb
import functions as f

## Extract

In [12]:
df_countries = pd.read_csv('https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/master/all/all.csv', usecols=['alpha-2', 'alpha-3', 'name'])
df_countries_detailed = pd.read_csv('./Raw/countries_detailed.csv', index_col=[0])

## Transform

In [13]:
# rename columns

df_countries.columns = f.fixColumnNames(df_countries)

In [14]:
# join both dataframes to bring df_countries's 'alpha-2' column in df_countries_detailed

df_merged = pd.merge(df_countries_detailed, df_countries[['Alpha2', 'Alpha3']], left_on='IsoCode', right_on='Alpha3').drop(columns=['Alpha3'])

In [15]:
# check for missing values in 'Alpha2'

df_merged.loc[df_merged['Alpha2'].isna()]

Unnamed: 0,Continent,Location,IsoCode,Population,PopulationDensity,MedianAge,GdpPerCapita,HumanDevelopmentIndex,LifeExpectancy,Aged65Older,Aged70Older,Alpha2
137,Africa,Namibia,NAM,2587344.0,3.078,22.0,9541.808,0.646,63.71,3.552,2.085,


In [16]:
# set 'Namibia' short code to 'NA' in 'Alpha2'

df_merged.loc[df_merged['Alpha2'].isna() == True, 'Alpha2'] = 'NA'

In [17]:
df_merged.head()

Unnamed: 0,Continent,Location,IsoCode,Population,PopulationDensity,MedianAge,GdpPerCapita,HumanDevelopmentIndex,LifeExpectancy,Aged65Older,Aged70Older,Alpha2
0,Asia,Afghanistan,AFG,39835428.0,54.422,18.6,1803.987,0.511,64.83,2.581,1.337,AF
1,Europe,Albania,ALB,2872934.0,104.871,38.0,11803.431,0.795,78.57,13.188,8.643,AL
2,Africa,Algeria,DZA,44616626.0,17.348,29.1,13913.839,0.748,76.88,6.211,3.857,DZ
3,Europe,Andorra,AND,77354.0,163.755,,,0.868,83.73,,,AD
4,Africa,Angola,AGO,33933611.0,23.89,16.8,5819.495,0.581,61.15,2.405,1.362,AO


In [18]:
# reorder columns

f.reindexCols(df_merged, [0, 1, 2, 11, 3, 4, 5, 6, 7, 8, 9, 10])

Unnamed: 0,Continent,Location,IsoCode,Alpha2,Population,PopulationDensity,MedianAge,GdpPerCapita,HumanDevelopmentIndex,LifeExpectancy,Aged65Older,Aged70Older
0,Asia,Afghanistan,AFG,AF,39835428.0,54.422,18.6,1803.987,0.511,64.83,2.581,1.337
1,Europe,Albania,ALB,AL,2872934.0,104.871,38.0,11803.431,0.795,78.57,13.188,8.643
2,Africa,Algeria,DZA,DZ,44616626.0,17.348,29.1,13913.839,0.748,76.88,6.211,3.857
3,Europe,Andorra,AND,AD,77354.0,163.755,,,0.868,83.73,,
4,Africa,Angola,AGO,AO,33933611.0,23.890,16.8,5819.495,0.581,61.15,2.405,1.362
...,...,...,...,...,...,...,...,...,...,...,...,...
217,Asia,Vietnam,VNM,VN,98168829.0,308.127,32.6,6171.884,0.704,75.40,7.150,4.718
218,Oceania,Wallis and Futuna,WLF,WF,11094.0,,,,,79.94,,
219,Asia,Yemen,YEM,YE,30490639.0,53.508,20.3,1479.147,0.470,66.12,2.922,1.583
220,Africa,Zambia,ZMB,ZM,18920657.0,22.995,17.7,3689.251,0.584,63.89,2.480,1.542


## Load

In [19]:
# save as parquet to preserve data types

df_merged.to_parquet('./Processed/COUNTRIES_DETAILED.parquet', index=False)