# Split and normalize the dataset to fit and test

In [None]:
import pandas as pd
from sklearn import preprocessing, linear_model
from sklearn.model_selection import train_test_split
import math
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
import lightgbm as lgb
import xgboost as xgb

In [None]:
df = pd.read_csv('cleaned_csv.csv')

df.columns

## Split the dataset
The target column is 'Life expectancy'.

In [None]:
X = df.drop(['Life expectancy'], axis=1)
Y = df['Life expectancy']
X_train , X_test , Y_train , Y_test = train_test_split(X,Y, test_size = 0.2 , random_state = 1)

## Scale the dataset

In [None]:
y_scaler = preprocessing.MinMaxScaler()
x_scaler = preprocessing.MinMaxScaler()
X_train_scaled = x_scaler.fit_transform(X_train.values)
y_train_scaled = y_scaler.fit_transform(Y_train.values.reshape(-1, 1))
X_test_scaled = x_scaler.fit_transform(X_test.values)
y_test_scaled = y_scaler.fit_transform(Y_test.values.reshape(-1, 1))

## Models

### Lasso Regression

In [None]:
lasso_r = Lasso(alpha=0.01)
lasso_r.fit(X_train_scaled, y_train_scaled)
predictions = lasso_r.predict(X_test_scaled)

#### Test Lasso Regression

In [None]:
print('R2-squared:', lasso_r.score(X_test_scaled, y_test_scaled))
print('mean squared error:', mean_squared_error(y_test_scaled, predictions))
print('root squared mean squared error:', math.sqrt(mean_squared_error(y_test_scaled, predictions)))

#### Test Lasso Regression using original scale

In [None]:
ypred = y_scaler.inverse_transform(predictions.reshape(-1, 1))
print(Y_test.shape)
ypred = ypred.reshape(Y_test.shape[0],)
print(ypred.shape)
print('mean squared error:', mean_squared_error(Y_test, ypred))
print('root squared mean squared error:', math.sqrt(mean_squared_error(Y_test, ypred)))

### Ridge Regression

In [None]:
ridge_r = Ridge(alpha=0.01, random_state=938)
ridge_r.fit(X_train_scaled, y_train_scaled)
predictions = ridge_r.predict(X_test_scaled)

#### Test Ridge Regression

In [None]:
print('R2-squared:', ridge_r.score(X_test_scaled, y_test_scaled))
print('mean squared error:', mean_squared_error(y_test_scaled, predictions))
print('root squared mean squared error:', math.sqrt(mean_squared_error(y_test_scaled, predictions)))

#### Test Ridge Regression using original scale

In [None]:
ypred = y_scaler.inverse_transform(predictions.reshape(-1, 1))
print(Y_test.shape)
ypred = ypred.reshape(Y_test.shape[0],)
print(ypred.shape)
print('mean squared error:', mean_squared_error(Y_test, ypred))
print('root squared mean squared error:', math.sqrt(mean_squared_error(Y_test, ypred)))

### Random Forest

In [None]:
rf_model = RandomForestRegressor(n_estimators = 10, random_state = 123)
rf_model = rf_model.fit(X_train_scaled, y_train_scaled.ravel())
predictions = rf_model.predict(X_test_scaled)

#### Test Random Forest

In [None]:
print('R2-squared:', rf_model.score(X_test_scaled, y_test_scaled))
print('mean squared error:', mean_squared_error(y_test_scaled, predictions))
print('root squared mean squared error:', math.sqrt(mean_squared_error(y_test_scaled, predictions)))

#### Test Random Forest using original scale

In [None]:
ypred = y_scaler.inverse_transform(predictions.reshape(-1, 1))
print(Y_test.shape)
ypred = ypred.reshape(Y_test.shape[0],)
print(ypred.shape)
print('mean squared error:', mean_squared_error(Y_test, ypred))
print('root squared mean squared error:', math.sqrt(mean_squared_error(Y_test, ypred)))

### LightGBM

In [None]:
gbm = lgb.LGBMRegressor()
gbm.fit(X_train_scaled, y_train_scaled.ravel())
predictions = gbm.predict(X_test_scaled)

#### Test LightGBM

In [None]:
print('R2-squared:', gbm.score(X_test_scaled, y_test_scaled))
print('mean squared error:', mean_squared_error(y_test_scaled, predictions))
print('root squared mean squared error:', math.sqrt(mean_squared_error(y_test_scaled, predictions)))

#### Test LightGBM using original scale

In [None]:
ypred = y_scaler.inverse_transform(predictions.reshape(-1, 1))
print(Y_test.shape)
ypred = ypred.reshape(Y_test.shape[0],)
print(ypred.shape)
print('mean squared error:', mean_squared_error(Y_test, ypred))
print('root squared mean squared error:', math.sqrt(mean_squared_error(Y_test, ypred)))

### XGBoost

In [None]:
xg_reg = xgb.XGBRegressor(objective ='reg:logistic', 
                          colsample_bytree = 0.5, 
                          learning_rate = 0.5,
                          max_depth = 20, 
                          alpha = 10, 
                          n_estimators = 10)
xg_reg.fit(X_train_scaled, y_train_scaled.ravel())
predictions = xg_reg.predict(X_test_scaled)

#### Test XGBoost

In [None]:
print('mean squared error:', mean_squared_error(y_test_scaled, predictions))
print('root squared mean squared error:', math.sqrt(mean_squared_error(y_test_scaled, predictions)))

#### Test  XGBoost using original scale

In [None]:
ypred = y_scaler.inverse_transform(predictions.reshape(-1, 1))
ypred = ypred.reshape(Y_test.shape[0],)
print('mean squared error:', mean_squared_error(Y_test, ypred))
print('root squared mean squared error:', math.sqrt(mean_squared_error(Y_test, ypred)))