# World Happiness Report

Happiness Prediction Deep Learning Solution

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import torch.nn.init as init
import os

## Create Column Mappings & Initialize Variables

In [None]:
data_frames = []
common_columns = ['country', 'rank', 'score', 'gdp', 'family', 'health', 'freedom', 'corruption', 'generosity']
column_mapping = {
    '2015': {
        'Country': 'country',
        'Happiness Rank': 'rank',
        'Happiness Score': 'score',
        'Economy (GDP per Capita)': 'gdp',
        'Family': 'family',
        'Health (Life Expectancy)': 'health',
        'Freedom': 'freedom',
        'Trust (Government Corruption)': 'corruption',
        'Generosity': 'generosity'
    },
    '2016': {
        'Country': 'country',
        'Happiness Rank': 'rank',
        'Happiness Score': 'score',
        'Economy (GDP per Capita)': 'gdp',
        'Family': 'family',
        'Health (Life Expectancy)': 'health',
        'Freedom': 'freedom',
        'Trust (Government Corruption)': 'corruption',
        'Generosity': 'generosity'
    },
    '2017': {
        'Country':'country',
        'Happiness.Rank':'rank',
        'Happiness.Score':'score',
        'Economy..GDP.per.Capita.':'gdp',
        'Family':'family',
        'Health..Life.Expectancy.':'health',
        'Freedom':'freedom',
        'Generosity':'generosity',
        'Trust..Government.Corruption.':'corruption'
    },
    '2018': {
        'Country or region':'country',
        'Overall rank':'rank',
        'Score':'score',
        'GDP per capita':'gdp',
        'Social support':'family',
        'Healthy life expectancy':'health',
        'Freedom to make life choices':'freedom',
        'Generosity':'generosity',
        'Perceptions of corruption':'corruption'
    },
     '2019': {
        'Country or region':'country',
        'Overall rank':'rank',
        'Score':'score',
        'GDP per capita':'gdp',
        'Social support':'family',
        'Healthy life expectancy':'health',
        'Freedom to make life choices':'freedom',
        'Generosity':'generosity',
        'Perceptions of corruption':'corruption'
    }
}

years = ['2015', '2016','2017','2018','2019']

## Import Datasets and Map Columns

In [None]:
for year in years:
    file_path = f'./dataset/{year}.csv'
    df = pd.read_csv(file_path)

    # Rename columns to common names
    df.rename(columns=column_mapping[year], inplace=True)

    df = df[common_columns]

    print(list(df.head()))

    data_frames.append(df)

['country', 'rank', 'score', 'gdp', 'family', 'health', 'freedom', 'corruption', 'generosity']
['country', 'rank', 'score', 'gdp', 'family', 'health', 'freedom', 'corruption', 'generosity']
['country', 'rank', 'score', 'gdp', 'family', 'health', 'freedom', 'corruption', 'generosity']
['country', 'rank', 'score', 'gdp', 'family', 'health', 'freedom', 'corruption', 'generosity']
['country', 'rank', 'score', 'gdp', 'family', 'health', 'freedom', 'corruption', 'generosity']


## Merge Data

In [None]:
merged_data = pd.concat(data_frames, ignore_index=True)
print(merged_data.info())
merged_data = merged_data.dropna()
print(merged_data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 782 entries, 0 to 781
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   country     782 non-null    object 
 1   rank        782 non-null    int64  
 2   score       782 non-null    float64
 3   gdp         782 non-null    float64
 4   family      782 non-null    float64
 5   health      782 non-null    float64
 6   freedom     782 non-null    float64
 7   corruption  781 non-null    float64
 8   generosity  782 non-null    float64
dtypes: float64(7), int64(1), object(1)
memory usage: 55.1+ KB
None
<class 'pandas.core.frame.DataFrame'>
Index: 781 entries, 0 to 781
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   country     781 non-null    object 
 1   rank        781 non-null    int64  
 2   score       781 non-null    float64
 3   gdp         781 non-null    float64
 4   family      781 non-null   

## Define Target and Training Columns

In [None]:
target_column = 'score'

predictor_columns = [ 'gdp', 'family', 'health', 'freedom', 'generosity', 'corruption']

## Split the data

In [None]:
# Split the data into training and testing sets
X = merged_data[predictor_columns].values
y = merged_data[target_column].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=69)

## Normalize data

A Min-Max Scaler was used to normalize the data to a range of 0 to 1

In [None]:
from sklearn.preprocessing import RobustScaler

# Create a RobustScaler instance
scaler = RobustScaler()

# Fit and transform the training data
X_train = scaler.fit_transform(X_train)

# Transform the testing data using the same scaler
X_test = scaler.transform(X_test)


## Convert data to Pythorch tensors

In [None]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

## Define the Neural Network

In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init

class HappinessPredictor(nn.Module):
    def __init__(self, input_size):
        super(ComplexHappinessPredictor, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.bn1 = nn.BatchNorm1d(256)
        self.fc2 = nn.Linear(256, 128)
        self.bn2 = nn.BatchNorm1d(128)
        self.fc3 = nn.Linear(128, 64)
        self.bn3 = nn.BatchNorm1d(64)
        self.fc4 = nn.Linear(64, 32)
        self.fc5 = nn.Linear(32, 1)
        self.dropout = nn.Dropout(0.3)
        self.relu = nn.ReLU()

        # Initialize weights using Kaiming (He) initialization
        init.kaiming_uniform_(self.fc1.weight, mode='fan_in', nonlinearity='relu')
        init.kaiming_uniform_(self.fc2.weight, mode='fan_in', nonlinearity='relu')
        init.kaiming_uniform_(self.fc3.weight, mode='fan_in', nonlinearity='relu')
        init.kaiming_uniform_(self.fc4.weight, mode='fan_in', nonlinearity='relu')
        init.kaiming_uniform_(self.fc5.weight, mode='fan_in')

    def forward(self, x):
        x = self.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = self.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = self.relu(self.bn3(self.fc3(x)))
        x = self.dropout(x)
        x = self.relu(self.fc4(x))
        x = self.fc5(x)
        return x



## Loss Funcion & Optimizer

In [None]:
import torch.optim as optim

# Define the momentum factor
momentum = 0.9

# Define the learning rate
learning_rate = 0.001

# Instantiate the model
input_size = X_train_tensor.shape[1]
model = HappinessPredictor(input_size)

# Define loss function and optimizer
criterion = nn.SmoothL1Loss() #Mean Absolute Error (MAE) Loss
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=1e-5)

## Training the Neural Network

In [None]:
num_epochs =6000
for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor.view(-1, 1))
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [100/2000], Loss: 0.4090
Epoch [200/2000], Loss: 0.2626
Epoch [300/2000], Loss: 0.2139
Epoch [400/2000], Loss: 0.1992
Epoch [500/2000], Loss: 0.1737
Epoch [600/2000], Loss: 0.1652
Epoch [700/2000], Loss: 0.1680
Epoch [800/2000], Loss: 0.1491
Epoch [900/2000], Loss: 0.1362
Epoch [1000/2000], Loss: 0.1323
Epoch [1100/2000], Loss: 0.1304
Epoch [1200/2000], Loss: 0.1232
Epoch [1300/2000], Loss: 0.1188
Epoch [1400/2000], Loss: 0.1187
Epoch [1500/2000], Loss: 0.1098
Epoch [1600/2000], Loss: 0.1018
Epoch [1700/2000], Loss: 0.1009
Epoch [1800/2000], Loss: 0.0957
Epoch [1900/2000], Loss: 0.1018
Epoch [2000/2000], Loss: 0.1065


## Evaluate Model

In [None]:
model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)

mse = mean_squared_error(y_test_tensor.numpy(), test_outputs.numpy())
print(f'Mean Squared Error on Test Data: {mse:.4f}')

Mean Squared Error on Test Data: 0.2604
