# World Happiness Report

Happiness Prediction Deep Learning Solution

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import os

## Create Column Mappings & Initialize Variables

In [2]:
data_frames = []
common_columns = ['country', 'rank', 'score', 'gdp', 'family', 'health', 'freedom', 'corruption', 'generosity']
column_mapping = {
    '2015': {
        'Country': 'country',
        'Happiness Rank': 'rank',
        'Happiness Score': 'score',
        'Economy (GDP per Capita)': 'gdp',
        'Family': 'family',
        'Health (Life Expectancy)': 'health',
        'Freedom': 'freedom',
        'Trust (Government Corruption)': 'corruption',
        'Generosity': 'generosity'
    },
    '2016': {
        'Country': 'country',
        'Happiness Rank': 'rank',
        'Happiness Score': 'score',
        'Economy (GDP per Capita)': 'gdp',
        'Family': 'family',
        'Health (Life Expectancy)': 'health',
        'Freedom': 'freedom',
        'Trust (Government Corruption)': 'corruption',
        'Generosity': 'generosity'
    },
    '2017': {
        'Country':'country',
        'Happiness.Rank':'rank',
        'Happiness.Score':'score',
        'Economy..GDP.per.Capita.':'gdp',
        'Family':'family',
        'Health..Life.Expectancy.':'health',
        'Freedom':'freedom',
        'Generosity':'generosity',
        'Trust..Government.Corruption.':'corruption'
    },
    '2018': {
        'Country or region':'country',
        'Overall rank':'rank',
        'Score':'score',
        'GDP per capita':'gdp',
        'Social support':'family',
        'Healthy life expectancy':'health',
        'Freedom to make life choices':'freedom',
        'Generosity':'generosity',
        'Perceptions of corruption':'corruption'
    },
     '2019': {
        'Country or region':'country',
        'Overall rank':'rank',
        'Score':'score',
        'GDP per capita':'gdp',
        'Social support':'family',
        'Healthy life expectancy':'health',
        'Freedom to make life choices':'freedom',
        'Generosity':'generosity',
        'Perceptions of corruption':'corruption'
    }
} 

years = ['2015', '2016','2017','2018','2019']

## Import Datasets and Map Columns

In [3]:
for year in years:
    file_path = f'./dataset/{year}.csv'
    df = pd.read_csv(file_path)
    
    # Rename columns to common names
    df.rename(columns=column_mapping[year], inplace=True)
    
    df = df[common_columns]
    
    print(list(df.head()))
    
    data_frames.append(df)

['country', 'rank', 'score', 'gdp', 'family', 'health', 'freedom', 'corruption', 'generosity']
['country', 'rank', 'score', 'gdp', 'family', 'health', 'freedom', 'corruption', 'generosity']
['country', 'rank', 'score', 'gdp', 'family', 'health', 'freedom', 'corruption', 'generosity']
['country', 'rank', 'score', 'gdp', 'family', 'health', 'freedom', 'corruption', 'generosity']
['country', 'rank', 'score', 'gdp', 'family', 'health', 'freedom', 'corruption', 'generosity']


## Merge Data

In [5]:
merged_data = pd.concat(data_frames, ignore_index=True)
print(merged_data.info())
merged_data = merged_data.dropna()
print(merged_data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 782 entries, 0 to 781
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   country     782 non-null    object 
 1   rank        782 non-null    int64  
 2   score       782 non-null    float64
 3   gdp         782 non-null    float64
 4   family      782 non-null    float64
 5   health      782 non-null    float64
 6   freedom     782 non-null    float64
 7   corruption  781 non-null    float64
 8   generosity  782 non-null    float64
dtypes: float64(7), int64(1), object(1)
memory usage: 55.1+ KB
None
<class 'pandas.core.frame.DataFrame'>
Int64Index: 781 entries, 0 to 781
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   country     781 non-null    object 
 1   rank        781 non-null    int64  
 2   score       781 non-null    float64
 3   gdp         781 non-null    float64
 4   family      781 non-nu

## Define Target and Training Columns

In [None]:
target_column = 'score'

predictor_columns = [ 'gdp', 'family', 'health', 'freedom', 'generosity', 'corruption']

## Split the data

In [None]:
# Split the data into training and testing sets
X = merged_data[predictor_columns].values
y = merged_data[target_column].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creating the model