In [175]:
%autosave 10

Autosaving every 10 seconds


##### Importing the libraries

In [176]:
import numpy as np
import pandas as pd

##### Loading and preparing the data

In [177]:
df = pd.read_csv('Life Expectancy Data - cleaned.csv')

In [178]:
df.head()

Unnamed: 0,Country,Year,Life expectancy,Adult Mortality,infant deaths,Alcohol,percentage expenditure,Hepatitis B,Measles,BMI,...,Total expenditure,Diphtheria,HIV/AIDS,GDP,Population,thinness 1-19 years,thinness 5-9 years,Income composition of resources,Schooling,Developed
0,Afghanistan,2015,65.0,263.0,62,0.01,71.279624,65.0,1154,19.1,...,8.16,65.0,0.1,584.25921,33736494.0,17.2,17.3,0.479,10.1,0
1,Afghanistan,2014,59.9,271.0,64,0.01,73.523582,62.0,492,18.6,...,8.18,62.0,0.1,612.696514,327582.0,17.5,17.5,0.476,10.0,0
2,Afghanistan,2013,59.9,268.0,66,0.01,73.219243,64.0,430,18.1,...,8.13,64.0,0.1,631.744976,31731688.0,17.7,17.7,0.47,9.9,0
3,Afghanistan,2012,59.5,272.0,69,0.01,78.184215,67.0,2787,17.6,...,8.52,67.0,0.1,669.959,3696958.0,17.9,18.0,0.463,9.8,0
4,Afghanistan,2011,59.2,275.0,71,0.01,7.097109,68.0,3013,17.2,...,7.87,68.0,0.1,63.537231,2978599.0,18.2,18.2,0.454,9.5,0


In [179]:
df.shape

(2938, 22)

In [180]:
# Variable to be predicted is'Life expectancy'
y = df['Life expectancy ']

In [181]:
country_list = df['Country'].unique().tolist()

In [182]:
# Creating dictonary for mapping 'Country' feature into numbers
country_mapping = dict()
i = 1

for country in country_list:
    country_mapping[country] = i
    i+=1

In [183]:
# Mapping 'Country' feature into numbers
df['Country'] = df['Country'].map(country_mapping)
df.head()

Unnamed: 0,Country,Year,Life expectancy,Adult Mortality,infant deaths,Alcohol,percentage expenditure,Hepatitis B,Measles,BMI,...,Total expenditure,Diphtheria,HIV/AIDS,GDP,Population,thinness 1-19 years,thinness 5-9 years,Income composition of resources,Schooling,Developed
0,1,2015,65.0,263.0,62,0.01,71.279624,65.0,1154,19.1,...,8.16,65.0,0.1,584.25921,33736494.0,17.2,17.3,0.479,10.1,0
1,1,2014,59.9,271.0,64,0.01,73.523582,62.0,492,18.6,...,8.18,62.0,0.1,612.696514,327582.0,17.5,17.5,0.476,10.0,0
2,1,2013,59.9,268.0,66,0.01,73.219243,64.0,430,18.1,...,8.13,64.0,0.1,631.744976,31731688.0,17.7,17.7,0.47,9.9,0
3,1,2012,59.5,272.0,69,0.01,78.184215,67.0,2787,17.6,...,8.52,67.0,0.1,669.959,3696958.0,17.9,18.0,0.463,9.8,0
4,1,2011,59.2,275.0,71,0.01,7.097109,68.0,3013,17.2,...,7.87,68.0,0.1,63.537231,2978599.0,18.2,18.2,0.454,9.5,0


In [184]:
features_to_remove = ['Life expectancy ']

In [185]:
# Features to be used for prediction
X = df.drop(features_to_remove, axis=1)
X.head()

Unnamed: 0,Country,Year,Adult Mortality,infant deaths,Alcohol,percentage expenditure,Hepatitis B,Measles,BMI,under-five deaths,...,Total expenditure,Diphtheria,HIV/AIDS,GDP,Population,thinness 1-19 years,thinness 5-9 years,Income composition of resources,Schooling,Developed
0,1,2015,263.0,62,0.01,71.279624,65.0,1154,19.1,83,...,8.16,65.0,0.1,584.25921,33736494.0,17.2,17.3,0.479,10.1,0
1,1,2014,271.0,64,0.01,73.523582,62.0,492,18.6,86,...,8.18,62.0,0.1,612.696514,327582.0,17.5,17.5,0.476,10.0,0
2,1,2013,268.0,66,0.01,73.219243,64.0,430,18.1,89,...,8.13,64.0,0.1,631.744976,31731688.0,17.7,17.7,0.47,9.9,0
3,1,2012,272.0,69,0.01,78.184215,67.0,2787,17.6,93,...,8.52,67.0,0.1,669.959,3696958.0,17.9,18.0,0.463,9.8,0
4,1,2011,275.0,71,0.01,7.097109,68.0,3013,17.2,97,...,7.87,68.0,0.1,63.537231,2978599.0,18.2,18.2,0.454,9.5,0


In [186]:
print(X.shape)
print(y.shape)

(2938, 21)
(2938,)


##### Splitting the data

In [187]:
# Splitting the data
from sklearn.model_selection import train_test_split

In [188]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=101)

In [189]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(2056, 21)
(2056,)
(882, 21)
(882,)


##### Training the Model and predicting values

In [190]:
# Using Lasso Regression
from sklearn.linear_model import Lasso

In [191]:
# Here, taking alpha = 0.000001
lasso_reg = Lasso(alpha=0.000001)

In [192]:
lasso_reg

Lasso(alpha=1e-06, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)

In [193]:
lasso_reg.fit(X_train,y_train)

  positive)


Lasso(alpha=1e-06, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)

In [194]:
# Precicting the values using Lasso Regression
predict_lasso_reg = lasso_reg.predict(X_test)

In [195]:
print(predict_lasso_reg[:10])
print()
print(y_test[:10].values)

[51.59674881 60.76079903 77.00130924 65.16574711 71.05534795 70.82793727
 72.4331568  78.18954943 64.54213685 79.7085556 ]

[62.5 53.6 83.3 64.3 73.5 72.7 68.2 81.1 59.7 81.4]


In [196]:
# Checking RMSE Accuracy of the model
from sklearn.metrics import mean_squared_error
mean_squared_error(y_test,predict_lasso_reg)**(0.5)

3.760909963498413

In [197]:
# Checking the R Square Value Accuracy
from sklearn.metrics import r2_score
r2_score(y_test,predict_lasso_reg)

0.8365591580429219

## That's All, Thanks