# World Happiness Report

Happiness Prediction Deep Learning Solution

## Data Preprocessing

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import os

### Create Column Mappings & Initialize Variables

In [2]:
data_frames = []
common_columns = ['country', 'rank', 'score', 'gdp', 'family', 'health', 'freedom', 'corruption', 'generosity']
column_mapping = {
    '2015': {
        'Country': 'country',
        'Happiness Rank': 'rank',
        'Happiness Score': 'score',
        'Economy (GDP per Capita)': 'gdp',
        'Family': 'family',
        'Health (Life Expectancy)': 'health',
        'Freedom': 'freedom',
        'Trust (Government Corruption)': 'corruption',
        'Generosity': 'generosity'
    },
    '2016': {
        'Country': 'country',
        'Happiness Rank': 'rank',
        'Happiness Score': 'score',
        'Economy (GDP per Capita)': 'gdp',
        'Family': 'family',
        'Health (Life Expectancy)': 'health',
        'Freedom': 'freedom',
        'Trust (Government Corruption)': 'corruption',
        'Generosity': 'generosity'
    },
    '2017': {
        'Country':'country',
        'Happiness.Rank':'rank',
        'Happiness.Score':'score',
        'Economy..GDP.per.Capita.':'gdp',
        'Family':'family',
        'Health..Life.Expectancy.':'health',
        'Freedom':'freedom',
        'Generosity':'generosity',
        'Trust..Government.Corruption.':'corruption'
    },
    '2018': {
        'Country or region':'country',
        'Overall rank':'rank',
        'Score':'score',
        'GDP per capita':'gdp',
        'Social support':'family',
        'Healthy life expectancy':'health',
        'Freedom to make life choices':'freedom',
        'Generosity':'generosity',
        'Perceptions of corruption':'corruption'
    },
     '2019': {
        'Country or region':'country',
        'Overall rank':'rank',
        'Score':'score',
        'GDP per capita':'gdp',
        'Social support':'family',
        'Healthy life expectancy':'health',
        'Freedom to make life choices':'freedom',
        'Generosity':'generosity',
        'Perceptions of corruption':'corruption'
    }
} 

years = ['2015', '2016','2017','2018','2019']

### Import Datasets and Map Columns

In [3]:
for year in years:
    file_path = f'./dataset/{year}.csv'
    df = pd.read_csv(file_path)
    
    # Rename columns to common names
    df.rename(columns=column_mapping[year], inplace=True)
    
    df = df[common_columns]
    
    print(list(df.head()))
    
    data_frames.append(df)

['country', 'rank', 'score', 'gdp', 'family', 'health', 'freedom', 'corruption', 'generosity']
['country', 'rank', 'score', 'gdp', 'family', 'health', 'freedom', 'corruption', 'generosity']
['country', 'rank', 'score', 'gdp', 'family', 'health', 'freedom', 'corruption', 'generosity']
['country', 'rank', 'score', 'gdp', 'family', 'health', 'freedom', 'corruption', 'generosity']
['country', 'rank', 'score', 'gdp', 'family', 'health', 'freedom', 'corruption', 'generosity']


### Merge Data

In [4]:
merged_data = pd.concat(data_frames, ignore_index=True)
print(merged_data.info())
merged_data = merged_data.dropna()
print(merged_data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 782 entries, 0 to 781
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   country     782 non-null    object 
 1   rank        782 non-null    int64  
 2   score       782 non-null    float64
 3   gdp         782 non-null    float64
 4   family      782 non-null    float64
 5   health      782 non-null    float64
 6   freedom     782 non-null    float64
 7   corruption  781 non-null    float64
 8   generosity  782 non-null    float64
dtypes: float64(7), int64(1), object(1)
memory usage: 55.1+ KB
None
<class 'pandas.core.frame.DataFrame'>
Int64Index: 781 entries, 0 to 781
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   country     781 non-null    object 
 1   rank        781 non-null    int64  
 2   score       781 non-null    float64
 3   gdp         781 non-null    float64
 4   family      781 non-nu

### Define Target and Training Columns

In [5]:
target_column = 'score'

predictor_columns = [ 'gdp', 'family', 'health', 'freedom', 'generosity', 'corruption']

### Split the data

In [6]:
# Split the data into training and testing sets
X = merged_data[predictor_columns].values
y = merged_data[target_column].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Creating the model

### Standardize the data

In [7]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

### Convert floats into tensors

In [8]:
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

### Define the Neural Network

In [9]:
class HappinessPredictor(nn.Module):
    def __init__(self, input_size):
        super(HappinessPredictor, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

### Create the Model and define the loss function and optimizer

In [10]:
model = HappinessPredictor(input_size=len(predictor_columns))
# torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
criterion = nn.MSELoss()  # Mean Squared Error loss
optimizer = optim.Adam(model.parameters(), lr=0.001)

### Training the neural network

In [11]:
num_epochs = 5000
for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()
    
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [100/5000], Loss: 2.6765
Epoch [200/5000], Loss: 2.0004
Epoch [300/5000], Loss: 1.7052
Epoch [400/5000], Loss: 1.4762
Epoch [500/5000], Loss: 1.3362
Epoch [600/5000], Loss: 1.2959
Epoch [700/5000], Loss: 1.2870
Epoch [800/5000], Loss: 1.2835
Epoch [900/5000], Loss: 1.2817
Epoch [1000/5000], Loss: 1.2805
Epoch [1100/5000], Loss: 1.2797
Epoch [1200/5000], Loss: 1.2791
Epoch [1300/5000], Loss: 1.2785
Epoch [1400/5000], Loss: 1.2781
Epoch [1500/5000], Loss: 1.2778
Epoch [1600/5000], Loss: 1.2775
Epoch [1700/5000], Loss: 1.2772
Epoch [1800/5000], Loss: 1.2770
Epoch [1900/5000], Loss: 1.2768
Epoch [2000/5000], Loss: 1.2766
Epoch [2100/5000], Loss: 1.2764
Epoch [2200/5000], Loss: 1.2763
Epoch [2300/5000], Loss: 1.2762
Epoch [2400/5000], Loss: 1.2761
Epoch [2500/5000], Loss: 1.2760
Epoch [2600/5000], Loss: 1.2759
Epoch [2700/5000], Loss: 1.2758
Epoch [2800/5000], Loss: 1.2758
Epoch [2900/5000], Loss: 1.2757
Epoch [3000/5000], Loss: 1.2756
Epoch [3100/5000], Loss: 1.2756
Epoch [3200/5000]

### Evaluate Model

In [12]:
model.eval()
with torch.no_grad():
    test_outputs = model(X_test)
    
mse = mean_squared_error(y_test.numpy(), test_outputs.numpy())
print(f'Mean Squared Error on Test Data: {mse:.4f}')

Mean Squared Error on Test Data: 1.2488


 1.2551
 1.2371