# Human Development Index and Components

## Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
%matplotlib inline

## Read Data

In [2]:
data = pd.read_csv('dataset/Human.csv', encoding='latin-1')

In [3]:
data.head()

Unnamed: 0,HDI rank,Country,HUMAN DEVELOPMENT,Human Development Index (HDI),Life expectancy at birth,Expected years of schooling,Mean years of schooling,Gross national income (GNI) per capita,GNI per capita rank minus HDI rank,HDI rank.1,Unnamed: 10,Unnamed: 11
0,1,Switzerland,VERY HIGH,0.962,84.0,16.5,13.9,66933,5,3,,
1,2,Norway,VERY HIGH,0.961,83.2,18.2,13.0,64660,6,1,,
2,3,Iceland,VERY HIGH,0.959,82.7,19.2,13.8,55782,11,2,,
3,4,"Hong Kong, China (SAR)",VERY HIGH,0.952,85.5,17.3,12.2,62607,6,4,,
4,5,Australia,VERY HIGH,0.951,84.5,21.1,12.7,49238,18,5,,


In [4]:
data.shape

(195, 12)

## Analysis Data

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195 entries, 0 to 194
Data columns (total 12 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   HDI rank                                195 non-null    int64  
 1   Country                                 195 non-null    object 
 2   HUMAN DEVELOPMENT                       195 non-null    object 
 3   Human Development Index (HDI)           195 non-null    object 
 4   Life expectancy at birth                195 non-null    float64
 5   Expected years of schooling             195 non-null    object 
 6   Mean years of schooling                 195 non-null    object 
 7   Gross national income (GNI) per capita  195 non-null    object 
 8   GNI per capita rank minus HDI rank      195 non-null    object 
 9   HDI rank.1                              195 non-null    object 
 10  Unnamed: 10                             0 non-null      float6

In [6]:
data.describe()

Unnamed: 0,HDI rank,Life expectancy at birth,Unnamed: 10,Unnamed: 11
count,195.0,195.0,0.0,0.0
mean,97.815385,71.277949,,
std,56.467551,7.746484,,
min,1.0,52.5,,
25%,49.5,65.7,,
50%,97.0,71.7,,
75%,146.0,76.7,,
max,195.0,85.9,,


### Missed Value

In [7]:
data.isnull().values.sum()

390

In [8]:
data.isnull().sum()

HDI rank                                    0
Country                                     0
HUMAN DEVELOPMENT                           0
Human Development Index (HDI)               0
Life expectancy at birth                    0
Expected years of schooling                 0
Mean years of schooling                     0
Gross national income (GNI) per capita      0
GNI per capita rank minus HDI rank          0
HDI rank.1                                  0
Unnamed: 10                               195
Unnamed: 11                               195
dtype: int64

In [9]:
data.drop(['Unnamed: 10', 'Unnamed: 11'], axis=1, inplace=True)

In [10]:
data.duplicated(keep='first').sum()

0

In [11]:
data.duplicated(keep='last').sum()

0

In [16]:
data

Unnamed: 0,HDI rank,Country,HUMAN DEVELOPMENT,Human Development Index (HDI),Life expectancy at birth,Expected years of schooling,Mean years of schooling,Gross national income (GNI) per capita,GNI per capita rank minus HDI rank,HDI rank.1
0,1,Switzerland,VERY HIGH,0.962,84.0,16.5,13.9,66933,5,3
1,2,Norway,VERY HIGH,0.961,83.2,18.2,13.0,64660,6,1
2,3,Iceland,VERY HIGH,0.959,82.7,19.2,13.8,55782,11,2
3,4,"Hong Kong, China (SAR)",VERY HIGH,0.952,85.5,17.3,12.2,62607,6,4
4,5,Australia,VERY HIGH,0.951,84.5,21.1,12.7,49238,18,5
...,...,...,...,...,...,...,...,...,...,...
190,191,South Sudan,LOW,0.385,55.0,5.5,5.7,768,-1,191
191,192,Korea (Democratic People's Rep. of),OTHER,..,73.3,10.8,..,..,..,..
192,193,Monaco,OTHER,..,85.9,..,..,..,..,..
193,194,Nauru,OTHER,..,63.6,11.7,..,17730,..,..


In [12]:
data.interpolate(method='linear', limit_direction='forward', inplace=True, axis=0)

In [23]:
data.loc[data['Mean years of schooling'] == ".."]

Unnamed: 0,HDI rank,Country,HUMAN DEVELOPMENT,Human Development Index (HDI),Life expectancy at birth,Expected years of schooling,Mean years of schooling,Gross national income (GNI) per capita,GNI per capita rank minus HDI rank,HDI rank.1
191,192,Korea (Democratic People's Rep. of),OTHER,..,73.3,10.8,..,..,..,..
192,193,Monaco,OTHER,..,85.9,..,..,..,..,..
193,194,Nauru,OTHER,..,63.6,11.7,..,17730,..,..
194,195,Somalia,OTHER,..,55.3,..,..,1018,..,..


In [35]:
for col in data:
    if any(data[col] == ".."):
        print(data[col].name)

AttributeError: 'Series' object has no attribute 'columns'

### Convert data type

In [17]:
def df_convert_dtype(df, convert_from, convert_to):
    cols = df.select_dtypes(include=[convert_from]).columns
    for col in cols:
        df[col] = df[col].values.astype(convert_to)
    return df

In [None]:
data.merge()

In [20]:
numerical_data = data.iloc[:, 3:]

Unnamed: 0,Human Development Index (HDI),Life expectancy at birth,Expected years of schooling,Mean years of schooling,Gross national income (GNI) per capita,GNI per capita rank minus HDI rank,HDI rank.1
0,0.962,84.0,16.5,13.9,66933,5,3
1,0.961,83.2,18.2,13.0,64660,6,1
2,0.959,82.7,19.2,13.8,55782,11,2
3,0.952,85.5,17.3,12.2,62607,6,4
4,0.951,84.5,21.1,12.7,49238,18,5
...,...,...,...,...,...,...,...
190,0.385,55.0,5.5,5.7,768,-1,191
191,..,73.3,10.8,..,..,..,..
192,..,85.9,..,..,..,..,..
193,..,63.6,11.7,..,17730,..,..


In [22]:
numerical_data_cols = numerical_data.columns

In [26]:
for col in numerical_data:
    data[col] = pd.to_numeric(data[col])

ValueError: Unable to parse string ".." at position 191