# Loading the Datasets

In [8]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

happiness_data = pd.read_csv("WorldHappinessReport2019.csv")

hdi_data = pd.read_csv("human_development.csv")

print("World Happiness Report Columns:")
print(happiness_data.columns)

print("\nHuman Development Report Columns:")
print(hdi_data.columns)

World Happiness Report Columns:
Index(['Overall rank', 'Country or region', 'Score', 'GDP per capita',
       'Social support', 'Healthy life expectancy',
       'Freedom to make life choices', 'Generosity',
       'Perceptions of corruption'],
      dtype='object')

Human Development Report Columns:
Index(['HDI Rank', 'Country', 'Human Development Index (HDI)',
       'Life Expectancy at Birth', 'Expected Years of Education',
       'Mean Years of Education', 'Gross National Income (GNI) per Capita',
       'GNI per Capita Rank Minus HDI Rank'],
      dtype='object')


# Merge the datasets using the 'Country or region' and 'Country' columns

In [9]:
merged_data = pd.merge(happiness_data, hdi_data, left_on='Country or region', right_on='Country')
print(merged_data)

     Overall rank         Country or region  Score  GDP per capita  \
0               1                   Finland  7.769           1.340   
1               2                   Denmark  7.600           1.383   
2               3                    Norway  7.554           1.488   
3               4                   Iceland  7.494           1.380   
4               5               Netherlands  7.488           1.396   
..            ...                       ...    ...             ...   
130           151                     Yemen  3.380           0.287   
131           152                    Rwanda  3.334           0.359   
132           154               Afghanistan  3.203           0.350   
133           155  Central African Republic  3.083           0.026   
134           156               South Sudan  2.853           0.306   

     Social support  Healthy life expectancy  Freedom to make life choices  \
0             1.587                    0.986                         0.596   
1  

# Check for the number of missing values in each column

In [10]:
missing_values = merged_data.isnull().sum()
print("\nMissing values in each column:")
print(missing_values)


Missing values in each column:
Overall rank                              0
Country or region                         0
Score                                     0
GDP per capita                            0
Social support                            0
Healthy life expectancy                   0
Freedom to make life choices              0
Generosity                                0
Perceptions of corruption                 0
HDI Rank                                  0
Country                                   0
Human Development Index (HDI)             0
Life Expectancy at Birth                  0
Expected Years of Education               0
Mean Years of Education                   0
Gross National Income (GNI) per Capita    0
GNI per Capita Rank Minus HDI Rank        0
dtype: int64


# Select columns to noramlize

In [12]:
columns_to_normalize = [
    'GDP per capita', 
    'Healthy life expectancy', 
    'Life Expectancy at Birth', 
    'Mean Years of Education', 
    'Gross National Income (GNI) per Capita'
]

for column in columns_to_normalize:
    merged_data[column] = merged_data[column].replace(',', '', regex=True).astype(float)


scaler = MinMaxScaler()
merged_data[columns_to_normalize] = scaler.fit_transform(merged_data[columns_to_normalize])

print(merged_data)


     Overall rank         Country or region  Score  GDP per capita  \
0               1                   Finland  7.769        0.792521   
1               2                   Denmark  7.600        0.818456   
2               3                    Norway  7.554        0.881785   
3               4                   Iceland  7.494        0.816647   
4               5               Netherlands  7.488        0.826297   
..            ...                       ...    ...             ...   
130           151                     Yemen  3.380        0.157419   
131           152                    Rwanda  3.334        0.200844   
132           154               Afghanistan  3.203        0.195416   
133           155  Central African Republic  3.083        0.000000   
134           156               South Sudan  2.853        0.168878   

     Social support  Healthy life expectancy  Freedom to make life choices  \
0             1.587                 0.864154                         0.596   
1  