## **IMPORT THE LIBRARIES**

In [1]:
import os
import pandas as pd
import numpy as np
import sklearn as sk
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

## LOAD THE DATASET AFTER CLEANING & TRANSOFRMATION

In [2]:
# import the MyDrive library from colab
from google.colab import drive
drive.mount('/content/drive')

# the directory owns this notebook and dataset
os.chdir('/content/drive/MyDrive/comp333/')

Mounted at /content/drive


In [3]:
# load the fully transformed data
dataset = pd.read_csv('final_transformed_dataset.csv')
print(dataset.columns)

Index(['Year', 'Atheists rate', 'Child mortality rate %',
       'Consumer price index', 'female labor force participation rate', 'GDP',
       'GDP per capita', 'Period life expectancy', 'Death rate', 'Population',
       ...
       'Entity_United States Virgin Islands', 'Entity_Uruguay',
       'Entity_Uzbekistan', 'Entity_Vanuatu', 'Entity_Venezuela',
       'Entity_Vietnam', 'Entity_Yemen', 'Entity_Zambia', 'Entity_Zimbabwe',
       'Birth rate(per 1000)'],
      dtype='object', length=230)


In [4]:
dataset.head()

Unnamed: 0,Year,Atheists rate,Child mortality rate %,Consumer price index,female labor force participation rate,GDP,GDP per capita,Period life expectancy,Death rate,Population,...,Entity_United States Virgin Islands,Entity_Uruguay,Entity_Uzbekistan,Entity_Vanuatu,Entity_Venezuela,Entity_Vietnam,Entity_Yemen,Entity_Zambia,Entity_Zimbabwe,Birth rate(per 1000)
0,0.0,0.0,0.022342,0.002414,0.441184,7.8e-05,0.193799,0.814113,0.047246,4e-05,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.662
1,0.866667,0.0,0.016182,0.005016,0.513658,0.000151,0.233354,0.849185,0.031638,6.7e-05,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.3
2,0.233333,0.0,0.02166,0.003254,0.466299,0.000116,0.229769,0.819308,0.041741,5.1e-05,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.388
3,0.9,0.0,0.015907,0.004964,0.513834,0.000159,0.244937,0.853136,0.03157,6.8e-05,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.53
4,0.333333,0.0,0.021312,0.003527,0.470444,0.000129,0.234609,0.820918,0.03942,5.6e-05,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.427


# **BIRTH RATE PREDICTION BY COUNTRY**

In [5]:
X = dataset.iloc[:, :-1].values # input features
y = dataset.iloc[:, -1].values # target label : 'Birth rate(per 1000)'

In [6]:
# split the dataset into the train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

### Model 1 - Linear Regression

In [7]:
# train the Linear Regression model
from sklearn.linear_model import LinearRegression
model_1 = LinearRegression()
model_1.fit(X_train, y_train)

In [17]:
# model 1 evaluation
y_pred_1 = model_1.predict(X_test)
mse_1 = mean_squared_error(y_test, y_pred_1)

print(f'MSE from Model 1: {mse_1:.4f}')

MSE from Model 1: 3.9375


### Model 2 - SVM Regression

In [9]:
from sklearn.svm import SVR
model_2 = SVR(kernel = 'rbf')
model_2.fit(X_train, y_train)

In [18]:
# model 2 evaluation
y_pred_2 = model_2.predict(X_test)
mse_2 = mean_squared_error(y_test, y_pred_2)

print(f'MSE from Model 2: {mse_2:.4f}')

MSE from Model 2: 119.7355


### Model 3 - MLP(Multi Layer Perceptron)

In [11]:
# dataloader
train_dataset = TensorDataset(torch.Tensor(X_train), torch.Tensor(y_train))
test_dataset = TensorDataset(torch.Tensor(X_test), torch.Tensor(y_test))

batch_size = 8
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [12]:
# define model 3
class Model(nn.Module):
    def __init__(self, input_size):
        super(Model, self).__init__()
        self.fc = nn.Linear(input_size, 1)

    def forward(self, x):
        return self.fc(x)

In [13]:
input_size = X_train.shape[1]
model_3 = Model(input_size)

In [14]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model_3.parameters(), lr=0.001)

In [15]:
# training
num_epochs = 10

for epoch in range(num_epochs):
    model_3.train()
    total_loss = 0.0

    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model_3(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {total_loss / len(train_loader):.4f}')

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1/10, Train Loss: 605.0936
Epoch 2/10, Train Loss: 510.1776
Epoch 3/10, Train Loss: 431.7811
Epoch 4/10, Train Loss: 364.7514
Epoch 5/10, Train Loss: 309.1211
Epoch 6/10, Train Loss: 264.5535
Epoch 7/10, Train Loss: 228.6176
Epoch 8/10, Train Loss: 202.0896
Epoch 9/10, Train Loss: 181.7824
Epoch 10/10, Train Loss: 168.0281


In [16]:
# testing
model_3.eval()
with torch.no_grad():
    y_pred = []
    for inputs, targets in test_loader:
        outputs = model_3(inputs)
        y_pred.append(outputs.numpy())
y_pred = np.concatenate(y_pred)

mse_3 = mean_squared_error(y_test, y_pred)
print(f'MSE from Model 3: {mse_3:.4f}')

MSE from Model 3: 201.8581


In [19]:
print(f'MSE from Model 1(Linear Regression): {mse_1:.4f}')
print(f'MSE from Model 2(SVM Regression): {mse_2:.4f}')
print(f'MSE from Model 3(MLP): {mse_3:.4f}')

MSE from Model 1: 3.9375
MSE from Model 2: 119.7355
MSE from Model 3: 201.8581


=> Best Model : **Model 1(Linear Regression)**