In [1]:
import pandas as pd
from functools import reduce
import re
import numpy as np

In [2]:
country_ids = pd.read_csv('country_ids.csv')

In [3]:
def read_human_dev_report(file, sheet, skiprows, skipfooter):
    xls = pd.ExcelFile(file)
    df = pd.read_excel(xls, sheet, 
                       header = 0,
                       skiprows = skiprows,
                       skipfooter = skipfooter)
    
    return df

In [4]:
def fix_df(df, col_names):
    df.columns = df.columns.str.strip()
    df = df[col_names]
    
    return df

In [5]:
# clean up country names to match Read Country Code file
to_replace_ = ['Palestine, State of', 'Hong Kong, China (SAR)', 'Russian Federation', 
               'Korea (Republic of)', 'Iran (Islamic Republic of)']
replace_with_ = ['Palestine', 'Hong Kong', 'Russia', 
                 'South Korea', 'Iran']

def fix_country_names(df, col_name):
    df[col_name] = df[col_name].replace(to_replace_, replace_with_)
    df[col_name] = df[col_name].replace('[\(].*[\)]', '', regex=True)
    
    return df

In [6]:
# copy human development report from countries to countries with cities and the UK
def update_country_cities(df, df_new_col, df_UK_col):
    df.loc['Canada, Ontario', df_new_col] = df.loc['Canada', df_new_col]
    df.loc['Canada, Quebec', df_new_col] = df.loc['Canada', df_new_col]
    df.loc['Russia, Moscow', df_new_col] = df.loc['Russia', df_new_col]
    df.loc['United Arab Emirates, Abu Dhabi', df_new_col] = df.loc['United Arab Emirates', df_new_col]
    df.loc['United Arab Emirates, Dubai', df_new_col] = df.loc['United Arab Emirates', df_new_col]
    df.loc['England', df_new_col] = df_UK_col
    df.loc['Northern Ireland', df_new_col] = df_UK_col

    return df

In [7]:
# Table 1. Human Development Index and its components 
hdi = read_human_dev_report('Human Development Reports/2020_statistical_annex_all.xlsx', 'Table 1', 4, 41)

hdi_col_names = ['Unnamed: 1', 'Human Development Index (HDI)', 'Life expectancy at birth',
             'Expected years of schooling', 'Mean years of schooling', 'Gross national income (GNI) per capita',
             'HDI rank']

hdi = fix_df(hdi, hdi_col_names)

hdi = hdi.rename(columns={'Unnamed: 1': 'Country'})

In [8]:
hdi

Unnamed: 0,Country,Human Development Index (HDI),Life expectancy at birth,Expected years of schooling,Mean years of schooling,Gross national income (GNI) per capita,HDI rank
0,Country,Value,(years),(years),(years),(2017 PPP $),
1,,2019,2019,2019,2019,2019,2018
2,VERY HIGH HUMAN DEVELOPMENT,,,,,,
3,Norway,0.957,82.4,18.0662,12.8978,66494.3,1
4,Ireland,0.955,82.31,18.7053,12.6663,68370.6,3
...,...,...,...,...,...,...,...
220,Small island developing states,0.728,72.0046,12.3092,8.72368,16824.8,—
221,,,,,,,
222,Organisation for Economic Co-operation and Dev...,0.9,80.3522,16.286,11.9761,44967,—
223,,,,,,,


In [9]:
# Table 4. Gender Development Index
gdi = read_human_dev_report('Human Development Reports/2020_statistical_annex_all.xlsx', 'Table 4', 3, 35)

gdi_col_names = ['Unnamed: 1', 'Gender Development Index', 'Unnamed: 4', 'Human Development Index',
                 'Unnamed: 8', 'Life expectancy at birth', 'Unnamed: 12', 'Expected years of schooling',
                 'Unnamed: 16', 'Mean years of schooling', 'Unnamed: 20', 'Estimated  gross national income per capita',
                 'Unnamed: 24']

gdi = fix_df(gdi, gdi_col_names)

gdi_rename_dict = {'Unnamed: 1': 'Country',
                   'Unnamed: 4': 'GDI Group',
                   'Human Development Index': 'HDI Female',
                   'Unnamed: 8': 'HDI Male',
                   'Life expectancy at birth': 'Life expectancy at birth Female',
                   'Unnamed: 12': 'Life expectancy at birth Male',
                   'Expected years of schooling': 'Expected years of schooling Female',
                   'Unnamed: 16': 'Expected years of schooling Male',
                   'Mean years of schooling': 'Mean years of schooling Female',
                   'Unnamed: 20': 'Mean years of schooling Male',
                   'Estimated gross national income per capita': 'GNI Female',
                   'Unnamed: 24': 'GNI Male'}

gdi = gdi.rename(columns=gdi_rename_dict)

In [10]:
gdi

Unnamed: 0,Country,Gender Development Index,GDI Group,HDI Female,HDI Male,Life expectancy at birth Female,Life expectancy at birth Male,Expected years of schooling Female,Expected years of schooling Male,Mean years of schooling Female,Mean years of schooling Male,Estimated gross national income per capita,GNI Male
0,,,,Value,,(years),,(years),,(years),,(2017 PPP $),
1,Country,Value,Group,Female,Male,Female,Male,Female,Male,Female,Male,Female,Male
2,,2019,2019,2019,2019,2019,2019,2019,2019,2019,2019,2019,2019
3,VERY HIGH HUMAN DEVELOPMENT,,,,,,,,,,,,
4,Norway,0.989939,1,0.948956,0.958601,84.377,80.447,18.7595,17.4136,12.977,12.8166,58548.4,74279.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...
221,Small island developing states,0.959154,—,0.718401,0.748994,74.1144,69.9724,12.9414,12.6875,8.49178,9.18303,12281.1,21333.8
222,,,,,,,,,,,,,
223,Organisation for Economic Co-operation and Dev...,0.978219,—,0.887348,0.907106,82.9409,77.724,16.5545,16.0037,11.8622,12.087,34593.4,55679.2
224,,,,,,,,,,,,,


In [11]:
# Table 5. Gender Inequality Index
gii = read_human_dev_report('Human Development Reports/2020_statistical_annex_all.xlsx', 'Table 5', 3, 41)

gii_col_names = ['Unnamed: 1', 'Gender Inequality Index', 'Unnamed: 4', 'Maternal mortality ratio', 
                 'Adolescent birth rate', 'Share of seats in parliament', 'Population with at least some secondary education', 
                 'Unnamed: 14', 'Labour force participation rate', 'Unnamed: 18']

gii = fix_df(gii, gii_col_names)

gii_rename_dict = {'Unnamed: 4': 'GII Rank',
                   'Unnamed: 1': 'Country',
                   'Share of seats in parliament': 'Share of seats in parliament held by women',
                   'Population with at least some secondary education': 'Share at least some secondary education Female',
                   'Unnamed: 14': 'Share at least some secondary education Male',
                   'Labour force participation rate': 'Labour force participation rate Female',
                   'Unnamed: 18': 'Labour force participation rate Male'}

gii = gii.rename(columns=gii_rename_dict)

In [12]:
gii

Unnamed: 0,Country,Gender Inequality Index,GII Rank,Maternal mortality ratio,Adolescent birth rate,Share of seats in parliament held by women,Share at least some secondary education Female,Share at least some secondary education Male,Labour force participation rate Female,Labour force participation rate Male
0,,Value,Rank,"(deaths per 100,000 live births)","(births per 1,000 women ages 15–19)",(% held by women),(% ages 25 and older),,(% ages 15 and older),
1,Country,,,,,,Female,Male,Female,Male
2,,2019,2019,2017,2015-2020,2019,2015–2019,2015–2019,2019,2019
3,VERY HIGH HUMAN DEVELOPMENT,,,,,,,,,
4,Norway,0.045,6,2,5.141,40.8284,95.3928,94.86,60.37,67.192
...,...,...,...,...,...,...,...,...,...,...
212,Regions,,,,,,,,,
213,Arab States,0.518,—,135.38,46.7762,17.9915,49.3221,55.7774,20.6571,73.0391
214,East Asia and the Pacific,0.324,—,73.0728,22.1456,20.1752,69.3692,76.5283,59.2462,76.5069
215,Europe and Central Asia,0.256,—,19.8915,27.7675,23.1272,79.8655,88.0947,45.0142,69.9611


In [13]:
# Table 9. Education achievements
ea = read_human_dev_report('Human Development Reports/2020_statistical_annex_all.xlsx', 'Table 9', 4, 23)

ea_col_names = ['Unnamed: 1', 'Youth\n(% ages 15–24)', 'Unnamed: 6', 'Population with at least some secondary education',
                'Pre-primary', 'Primary', 'Secondary', 'Tertiary', 'Primary school dropout rate',
                'Survival rate to the last grade of lower secondary general education', 'Government expenditure on education']

ea = fix_df(ea, ea_col_names)

ea_rename_dict = {'Unnamed: 1': 'Country',
                  'Youth\n(% ages 15–24)': 'Youth literacy rate Female',
                  'Unnamed: 6': 'Youth literacy rate Male',
                  'Survival rate to the last grade of lower secondary general education': 'Lower secondary retention'}

ea = ea.rename(columns=ea_rename_dict)

In [14]:
ea

Unnamed: 0,Country,Youth literacy rate Female,Youth literacy rate Male,Population with at least some secondary education,Pre-primary,Primary,Secondary,Tertiary,Primary school dropout rate,Lower secondary retention,Government expenditure on education
0,Country,Female,Male,(% ages 25 and older),(% of preschool-age children),(% of primary school–age population),(% of secondary school–age population),(% of tertiary school–age population),(% of primary school cohort),(%),(% of GDP)
1,,2008-2018,2008-2018,2015-2019,2014-2019,2014-2019,2014-2019,2014-2019,2008-2018,2008-2018,2013-2018
2,VERY HIGH HUMAN DEVELOPMENT,,,,,,,,,,
3,Norway,..,..,94.9208,95.3653,100.26,116.874,81.9924,0.8978,98.7565,7.9761
4,Ireland,..,..,81.1196,163.44,100.853,125.307,77.7806,..,99.4177,3.71973
...,...,...,...,...,...,...,...,...,...,...,...
220,Small island developing states,88.6202,88.321,59.5062,..,104.749,77.3819,..,..,..,..
221,,,,,,,,,,,
222,Organisation for Economic Co-operation and Dev...,..,..,85.4057,80.6191,102.569,106.52,72.4034,..,..,4.83567
223,,,,,,,,,,,


In [15]:
# Table 13. Human and capital mobility
hcm = read_human_dev_report('Human Development Reports/2020_statistical_annex_all.xlsx', 'Table 13', 5, 47)

hcm_col_names = ['Unnamed: 1', 'Exports and imports', 'Stock of immigrants', 'International student mobility',
               'International inbound tourists', 'Total', 'Female']

hcm = fix_df(hcm, hcm_col_names)

hcm_rename_dict = {'Unnamed: 1': 'Country',
                   'Total': 'Share of internet users',
                   'Female': 'Share of female internet users'}               

hcm = hcm.rename(columns=hcm_rename_dict)

In [16]:
hcm

Unnamed: 0,Country,Exports and imports,Stock of immigrants,International student mobility,International inbound tourists,Share of internet users,Share of female internet users
0,Country,(% of GDP),(% of population),(% of total tertiary enrolment),(thousands),(% of population),(% of female population)
1,,2019,2019,2010-2017,2018,2018,2018
2,Very high human development,,,,,,
3,Norway,72.1465,16.1329,-3.07912,5688,96.4917,97
4,Ireland,239.215,17.0725,2.23969,10926,84.5223,85.7
...,...,...,...,...,...,...,...
220,Small island developing states,203.132,6.20709,2.56106,51408.2,50.0993,..
221,,,,,,,
222,Organisation for Economic Co-operation and Dev...,55.825,10.5898,4.52689,791285,82.6607,78.4586
223,,,,,,,


In [17]:
# Dashboard 1. Quality of human development
qhd = read_human_dev_report('Human Development Reports/2020_statistical_annex_all.xlsx', 'Dashboard 1', 8, 35)

qhd_col_names = ['Unnamed: 1', 'Lost health expectancy', 'Physicians', 'Hospital beds', 'Pupil–teacher ratio, primary school',
                 'Schools with access to the Internet', 'Unnamed: 14', 'Unnamed: 16', 'Unnamed: 18', 'Unnamed: 20', 
                 'Unnamed: 22', 'Unnamed: 24', 'Unnamed: 26', 'Unnamed: 28']

qhd = fix_df(qhd, qhd_col_names)

qhd_rename_dict = {'Unnamed: 1': 'Country',
                   'Physicians': 'Physicians per 10,000 people',
                   'Hospital beds': 'Hospital beds per 10,000 people',
                   'Schools with access to the Internet': 'Primary schools with access to the Internet',
                   'Unnamed: 14': 'Secondary schools with access to the Internet',
                   'Unnamed: 16': 'PISA Reading',
                   'Unnamed: 18': 'PISA Math',
                   'Unnamed: 20': 'PISA Science',
                   'Unnamed: 22': 'Share of vulnerable employment',
                   'Unnamed: 24': 'Share of rural population with access to electricity',
                   'Unnamed: 26': 'Share of population with safe drinking water',
                   'Unnamed: 28': 'Share of population with sanitation services'}

qhd = qhd.rename(columns=qhd_rename_dict)

In [18]:
qhd

Unnamed: 0,Country,Lost health expectancy,"Physicians per 10,000 people","Hospital beds per 10,000 people","Pupil–teacher ratio, primary school",Primary schools with access to the Internet,Secondary schools with access to the Internet,PISA Reading,PISA Math,PISA Science,Share of vulnerable employment,Share of rural population with access to electricity,Share of population with safe drinking water,Share of population with sanitation services
0,,,,,,Primary schools,Secondary schools,Programme for International Student Assessment...,,,Vulnerable employment,Rural population with access to electricity,Population using safely managed drinking-water...,Population using safely managed sanitation ser...
1,Country,(%),"(per 10,000 people)",,(pupils per teacher),,,Reading,Mathematics,Science,(% of total employment),(%),,
2,,2019,2010–2018,2010–2019,2010–2019,2010–2019,2010–2019,2018,2018,2018,2019,2018,2017,2017
3,Very high human development,,,,,,,,,,,,,
4,Norway,14.5554,29.164,35.3,8.59418,100,100,499.451,500.964,490.413,4.906,100,98,76
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
221,Small island developing states,12.7277,23.1128,25.1943,18.9996,—,—,—,—,—,40.5134,62.5455,..,..
222,,,,,,,,,,,,,,
223,Organisation for Economic Co-operation and Dev...,14.4986,29.1675,46.7569,15.4605,—,—,—,—,—,12.7567,99.988,91.8787,84.3057
224,,,,,,,,,,,,,,


In [19]:
# Dashboard 2. Life-course gender gap 
lgg = read_human_dev_report('Human Development Reports/2020_statistical_annex_all.xlsx', 'Dashboard 2', 9, 36)

lgg_col_names = ['Unnamed: 1', 'Unnamed: 2', 'Gross enrolment ratio', 'Unnamed: 6', 'Unnamed: 8',
                 'Unnamed: 10', 'Unnamed: 12', 'Unnamed: 14', 'Unnamed: 16', 'Unnamed: 18', 'Unnamed: 20',
                 'Unnamed: 22', 'Unnamed: 24', 'Unnamed: 26']

lgg = fix_df(lgg, lgg_col_names)

lgg_rename_dict = {'Unnamed: 1': 'Country',
                   'Unnamed: 2': 'Male to female births',
                   'Gross enrolment ratio': 'Gender enrolment ratio pre-primary',
                   'Unnamed: 6': 'Gender enrolment ratio primary',
                   'Unnamed: 8': 'Gender enrolment ratio secondary',
                   'Unnamed: 10': 'Youth unemployment rate gender ratio',
                   'Unnamed: 12': 'At least some secondary education gender ratio',
                   'Unnamed: 14': 'Total unemployment rate gender ratio',
                   'Unnamed: 16': 'Share of female employment in nonagriculture',
                   'Unnamed: 18': 'Share of seats held by women in parliament',
                   'Unnamed: 20': 'Share of seats held by women in local government',
                   'Unnamed: 22': 'Share of hour women spent on unpaid domestic chores and care work',
                   'Unnamed: 24': 'Time spent on unpaid domestic chores and care work gender ratio',
                   'Unnamed: 26': 'Old-age pension recipients gender ratio'}

lgg = lgg.rename(columns=lgg_rename_dict)

In [20]:
lgg

Unnamed: 0,Country,Male to female births,Gender enrolment ratio pre-primary,Gender enrolment ratio primary,Gender enrolment ratio secondary,Youth unemployment rate gender ratio,At least some secondary education gender ratio,Total unemployment rate gender ratio,Share of female employment in nonagriculture,Share of seats held by women in parliament,Share of seats held by women in local government,Share of hour women spent on unpaid domestic chores and care work,Time spent on unpaid domestic chores and care work gender ratio,Old-age pension recipients gender ratio
0,,Sex ratio at birth,(female to male ratio),,,Youth unemployment rate,Population with at least some secondary education,Total unemployment rate,"Share of employment in nonagriculture, female",Share of seats held by women,,Time spent on unpaid domestic chores and care ...,,Old-age pension recipients
1,,,,,,,,,,In parliament,In local government,Women ages 15 and older,,
2,Country,(male to female births),Pre-primary,Primary,Secondary,(female to male ratio),(female to male ratio),(female to male ratio),(% of total employment in nonagriculture),(%),,(% of 24-hour day),(female to male ratio),(female to male ratio)
3,,2015–2020,2014–2019,2014–2019,2014–2019,2019,2015–2019,2019,2019,2019,2017–2019,2008–2018,2008–2018,2014–2019
4,Very high human development,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
222,Small island developing states,1.05663,..,0.949199,0.996369,1.5619,0.94215,1.47375,43.7605,25.0699,—,—,—,..
223,,,,,,,,,,,,,,
224,Organisation for Economic Co-operation and Dev...,1.05232,0.99892,0.995047,1.00193,1.02855,0.965681,1.12482,44.9911,30.8327,—,—,—,0.911424
225,,,,,,,,,,,,,,


In [21]:
# Dashboard 3. Women's empowerment
we = read_human_dev_report('Human Development Reports/2020_statistical_annex_all.xlsx', 'Dashboard 3', 9, 32)

we_col_names = ['Unnamed: 1', 'Women married by age 18', 'Share of graduates in science,\ntechnology,\nengineering\nand\nmathematics programmes at tertiary level, female',
                'Share of graduates from science,\ntechnology,\nengineering\nand\nmathematics programmes in tertiary education who are female',
                'Female share of employment in senior and middle management',
                'Women with account at financial institution or with mobile money-service provider',
                'Mandatory paid maternity leave', 'Intimate partner', 'Nonintimate partner']

we = fix_df(we, we_col_names)

In [22]:
we_rename_dict = {'Unnamed: 1': 'Country',
                  'Women married by age 18': 'Share of women married by age 18',
                  'Share of graduates in science,\ntechnology,\nengineering\nand\nmathematics programmes at tertiary level, female': 'Share of female in STEM tertiary education programs',
                  'Share of graduates from science,\ntechnology,\nengineering\nand\nmathematics programmes in tertiary education who are female': 'Share of female STEM tertiary education graduates',
                  'Mandatory paid maternity leave': 'Days of mandatory paid maternity leave',
                  'Intimate partner': 'Share of violence against women ever experienced by intimate partner',
                  'Nonintimate partner': 'Share of violence against women ever experienced by nonintimate partner'}
            
we = we.rename(columns=we_rename_dict)

In [23]:
we

Unnamed: 0,Country,Share of women married by age 18,Share of female in STEM tertiary education programs,Share of female STEM tertiary education graduates,Female share of employment in senior and middle management,Women with account at financial institution or with mobile money-service provider,Days of mandatory paid maternity leave,Share of violence against women ever experienced by intimate partner,Share of violence against women ever experienced by nonintimate partner
0,Country,(% of women ages 20–24 who are married or in u...,(%),(%),(%),(% of female population ages 15 and older),(days),(% of female population ages 15 and older),
1,,2005–2019,2009–2019,2009–2019,2009–2019,2017,2019,2005–2019,2005–2019
2,Very high human development,,,,,,,,
3,Norway,..,10.8837,28.4643,32.75,100,..,27,..
4,Ireland,..,14.1037,29.0019,31.26,95.307,182,15,5
...,...,...,...,...,...,...,...,...,...
220,Small island developing states,23.9381,..,..,..,..,82.3448,..,..
221,,,,,,,,,
222,Organisation for Economic Co-operation and Dev...,..,12.9292,32.5817,36.7172,84.5985,122.194,..,..
223,,,,,,,,,


In [24]:
# Dashboard 5. Socioeconomic sustainability
ses = read_human_dev_report('Human Development Reports/2020_statistical_annex_all.xlsx', 'Dashboard 5', 9, 52)

ses_col_names = ['Unnamed: 1', 'Skilled labour force', 'Ratio of education and health expenditure to military expenditure']

ses = fix_df(ses, ses_col_names)

In [25]:
ses_rename_dict = {'Unnamed: 1': 'Country',
                   'Skilled labour force': 'Share of skilled labour force'}

ses = ses.rename(columns=ses_rename_dict)

In [26]:
ses

Unnamed: 0,Country,Share of skilled labour force,Ratio of education and health expenditure to military expenditure
0,Country,(% of labour force),
1,,2010–2019,2010–2017
2,Very high human development,,
3,Norway,84.2811,11.4477
4,Ireland,85.047,33.4942
...,...,...,...
220,Small island developing states,46.4152,..
221,,,
222,Organisation for Economic Co-operation and Dev...,81.4971,7.81891
223,,,


In [27]:
# All the dfs we created
all_dfs = [hdi, gdi, ea, hcm, qhd, lgg, we, ses]

In [28]:
hdr_df = reduce(lambda left, right: pd.merge(left, right, on='Country', how='outer'), all_dfs).reset_index()

In [29]:
row_to_remove = ['Country', 'Regions', 'Human development groups', 'VERY HIGH HUMAN DEVELOPMENT',
                 'HIGH HUMAN DEVELOPMENT', 'MEDIUM HUMAN DEVELOPMENT', 'LOW HUMAN DEVELOPMENT',
                 'OTHER COUNTRIES OR TERRITORIES']

In [30]:
hdr_df = hdr_df[~hdr_df['Country'].isin(row_to_remove)]

In [31]:
hdr_df = hdr_df[~hdr_df['Country'].isnull()]

In [33]:
hdr_df = pd.merge(hdr_df, gii, on='Country', how='left')

In [37]:
hdr_new = hdr_df.iloc[0:195]

In [38]:
# clean up country names to match Read Country Code file
to_replace_ = ['Palestine, State of', 'Hong Kong, China (SAR)', 'Russian Federation', 
               'Korea (Republic of)', 'Iran (Islamic Republic of)']
replace_with_ = ['Palestine', 'Hong Kong', 'Russia', 
                 'South Korea', 'Iran']

def fix_country_names(df, col_name):
    df[col_name] = df[col_name].replace(to_replace_, replace_with_)
    df[col_name] = df[col_name].replace('[\(].*[\)]', '', regex=True)
    
    return df

In [39]:
# copy human development report from countries to countries with cities and the UK
def update_country_cities(df, df_new_col, df_UK_col):
    df.loc['Canada, Ontario', df_new_col] = df.loc['Canada', df_new_col]
    df.loc['Canada, Quebec', df_new_col] = df.loc['Canada', df_new_col]
    df.loc['Russia, Moscow', df_new_col] = df.loc['Russia', df_new_col]
    df.loc['United Arab Emirates, Abu Dhabi', df_new_col] = df.loc['United Arab Emirates', df_new_col]
    df.loc['United Arab Emirates, Dubai', df_new_col] = df.loc['United Arab Emirates', df_new_col]
    df.loc['England', df_new_col] = df_UK_col
    df.loc['Northern Ireland', df_new_col] = df_UK_col

    return df

In [40]:
# obtain columns and values for cities and UK

hdr_new_col = list(hdr_new.columns.values[2:])
hdr_new_UK_col = hdr_new.set_index('Country').loc['United Kingdom']

# merge columns from Read Country Code
hdr_new_1 = fix_country_names(hdr_new, 'Country')#.set_index('Country')
hdr_new_1 = country_ids.merge(hdr_new_1, how='left', on='Country')#.set_index('Country')
hdr_new_1 = hdr_new_1.set_index('Country')

# populate countries with cities and UK
hdr_new_2 = update_country_cities(hdr_new_1, hdr_new_col, hdr_new_UK_col)#.reset_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col_name] = df[col_name].replace(to_replace_, replace_with_)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col_name] = df[col_name].replace('[\(].*[\)]', '', regex=True)


In [41]:
hdr_new_2 = hdr_new_2.replace('..', np.nan)

In [45]:
hdr_new_2 = hdr_new_2.reset_index().drop(columns='Unnamed: 0')

In [46]:
hdr_new_2

Unnamed: 0,Country,country_ID,grade_4,grade_8,index,Human Development Index (HDI),Life expectancy at birth,Expected years of schooling,Mean years of schooling,Gross national income (GNI) per capita,...,Ratio of education and health expenditure to military expenditure,Gender Inequality Index,GII Rank,Maternal mortality ratio,Adolescent birth rate,Share of seats in parliament held by women,Share at least some secondary education Female,Share at least some secondary education Male,Labour force participation rate Female,Labour force participation rate Male
0,Albania,8,yes,no,9680903.0,0.795,78.57,14.695620,10.145730,13998.29989,...,,0.181,42.0,15.0,19.642,29.508197,93.699746,92.497154,46.712,64.568
1,Armenia,51,yes,no,9680915.0,0.776,75.09,13.068610,11.301740,13894.01302,...,3.403877,0.245,54.0,26.0,21.490,23.484848,97.268054,97.151720,47.074,65.887
2,Australia,36,yes,yes,9680841.0,0.944,83.44,21.954330,12.724691,48084.84207,...,6.912926,0.097,25.0,6.0,11.715,36.563877,91.025070,90.944520,60.300,70.855
3,Austria,40,yes,no,9680851.0,0.922,81.54,16.092070,12.546144,56196.89869,...,21.732280,0.069,14.0,5.0,7.340,38.524590,100.000000,99.787054,55.059,66.573
4,Azerbaijan,31,yes,no,9680922.0,0.756,73.01,12.893640,10.575430,13783.67995,...,2.371953,0.323,73.0,26.0,55.838,16.806723,93.901900,97.500760,63.411,69.737
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,Turkey,792,yes,yes,9680887.0,0.820,77.69,16.602666,8.106820,27700.89481,...,,0.306,68.0,17.0,26.559,17.449664,50.159190,72.237686,34.013,72.586
65,United Arab Emirates,784,yes,yes,9680864.0,0.890,77.97,14.344100,12.111220,67462.09530,...,,0.079,18.0,3.0,6.546,50.000000,76.022570,81.028510,52.391,93.389
66,"United Arab Emirates, Abu Dhabi",7842,yes,yes,,0.890,77.97,14.344100,12.111220,67462.09530,...,,0.079,18.0,3.0,6.546,50.000000,76.022570,81.028510,52.391,93.389
67,"United Arab Emirates, Dubai",7841,yes,yes,,0.890,77.97,14.344100,12.111220,67462.09530,...,,0.079,18.0,3.0,6.546,50.000000,76.022570,81.028510,52.391,93.389


In [36]:
hdr_df.to_csv('hdr_df_new.csv', index=False)