# Dataset and preprocessing

In [1]:
import pandas as pd
import re

#
df_happy = pd.read_csv('2019happy.csv')
df_happy.columns = df_happy.columns.str.strip()
df_happy = df_happy.rename(columns={
    'Country or region': 'Country',
    'Score':             'Happiness',
    'Social support':    'Social_support'
})
df_happy['Year'] = 2019

#
df = pd.read_csv('WDI_new.csv')
df.columns = df.columns.str.strip()
df = df.rename(columns={'Country Name':'Country','Series Code':'Series_Code'})

#
year_cols = [c for c in df.columns if re.match(r'\d{4} \[YR\d{4}\]', c)]
year_map = {c: c.split(' ')[0] for c in year_cols}
df = df.rename(columns=year_map)
df_long = df.melt(
    id_vars=['Country','Series_Code'],
    value_vars=list(year_map.values()),
    var_name='Year', value_name='Value'
)
df_long['Year'] = df_long['Year'].astype(int)
df_long['Value'] = pd.to_numeric(df_long['Value'], errors='coerce')

#
wanted = {
    "NY.GDP.PCAP.KN":    "GDP_per_capita",
    "NY.GNP.PCAP.KN":    "GNI_per_capita",
    "SI.POV.GINI":       "Gini_index",
    "SP.DYN.LE00.IN":    "Life_expectancy",
    "SL.UEM.TOTL.NE.ZS": "Unemployment_rate",
    "SE.XPD.TOTL.GB.ZS": "Edu_expenditure_pct" 
}
w19 = df_long[(df_long['Year']==2019) & 
             (df_long['Series_Code'].isin(wanted))]
wide = (
    w19.pivot_table(
        index=['Country','Year'],
        columns='Series_Code',
        values='Value',
        aggfunc='first'
    )
    .rename(columns=wanted)
    .reset_index()
)

# 
df_final = pd.merge(df_happy[['Country','Year','Happiness']],
                    wide, on=['Country','Year'], how='inner')

df_final.index = df_final.index + 1
display(df_final.head())

Unnamed: 0,Country,Year,Happiness,GDP_per_capita,GNI_per_capita,Edu_expenditure_pct,Gini_index,Unemployment_rate,Life_expectancy
1,Finland,2019,7.769,41368.22,41713.578984,10.522777,27.7,6.695,81.982927
2,Denmark,2019,7.6,384261.9,393851.72834,12.7429,27.7,5.018,81.45122
3,Norway,2019,7.554,618620.7,626366.192611,10.874369,27.7,3.684,82.958537
4,Iceland,2019,7.494,7584458.0,,16.135857,,3.507,83.163415
5,Netherlands,2019,7.488,44392.47,42744.127516,11.834926,29.2,3.379,82.112195
