**Human Development Index**

In [None]:
%pip install pycountry_convert
import numpy as np
import pandas as pd
import plotly.express as px
import pycountry_convert as pycc

Import data

In [None]:
df_hdi_full = pd.read_csv('HDI.csv')
df_hdi_full.head(4)

All column headers

In [None]:
df_hdi_full.columns

Check NaN

In [None]:
df_hdi_full.isna().sum()

In [5]:
df_hdi_full.drop(df_hdi_full[df_hdi_full['ISO3'].isna()].index, inplace=True)

Remove countries that have all values as NaN

In [None]:
default_cols = list(df_hdi_full.columns)[1:]
df_hdi_full.drop(df_hdi_full[df_hdi_full[default_cols].isna().all(axis=1)].index, inplace=True)
df_hdi_full

Change alpha3 to alpha2 code for all countries

In [None]:
country_a3 = list(df_hdi_full['ISO3'])
country_a2 = []
for c in country_a3:
  country_a2.append(pycc.country_alpha3_to_country_alpha2(c))
df_hdi_full.rename(columns={'ISO3': 'ISO2'}, inplace=True)
df_hdi_full['ISO2'] = country_a2
df_hdi_full.head()

Change column headers

In [None]:
new_heads = [f'{i}' for i in range(1990,2022)]
new_heads.insert(0, 'ISO2')
df_hdi_full.columns = new_heads
df_hdi_full.head()

New column for continents

In [9]:
continents = {
    'NA': 'North America',
    'SA': 'South America', 
    'AS': 'Asia',
    'OC': 'Australia',
    'AF': 'Africa',
    'EU': 'Europe'
}
continent_list = []
for c in country_a2:
  try:
    continent_code = pycc.country_alpha2_to_continent_code(c)
  except:
    continent_list.append('Unknown')
  else:
    continent_list.append(continents[continent_code])
continent_list.count('Unknown')

1

In [None]:
df_hdi_full['Continent'] = continent_list
df_hdi_full.head()

In [None]:
df_hdi_full = df_hdi_full[df_hdi_full['Continent'] != 'Unknown']
df_hdi_full.head()

List of all unique continents

In [12]:
continent_in_df = list(df_hdi_full['Continent'].unique())

Get average for each continent

In [None]:
df_continent = df_hdi_full.groupby('Continent', as_index=False).mean()
df_continent

Plot HDI over all continents

In [14]:
df_continent = df_continent.transpose()
df_continent.columns = df_continent.iloc[0]
df_continent.drop(df_continent.index[0], inplace=True)
df_continent = df_continent.reset_index()
df_continent.rename(columns={'index': 'Year'}, inplace=True)

In [None]:
fig = px.line(df_continent, x='Year', y=continent_in_df, 
                 title='Average progression of HDI over different continents',
                 labels={'value': 'HDI value', 'variable': 'Continents'})
fig.show()