## Linear Regression

Cleaning the data

In [2]:
import pandas as pd
import warnings

# Ignoring future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Cleaning data
data = pd.read_csv("Life Expectancy Data.csv")
data = data.groupby('Country').mean() # Grouping years by country
data = data.fillna(data.mean()) # Replacing NaN with mean values
data = data.drop(['Year'], axis=1)
data.sort_values(by = 'Life expectancy ', ascending = False)

# Loading data
X = data.drop(['Life expectancy '],axis = 1)
y = data['Life expectancy ']

Training linear regression model

In [3]:
from sklearn import linear_model
from sklearn.model_selection import train_test_split

# Splitting data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

# Training regression model
model = linear_model.LinearRegression()
trained_model = model.fit(X_train,y_train)
y_predicted = trained_model.predict(X_test)

Displaying coefficients

In [4]:
# Displaying coefficients
coefficients = model.coef_
factors = {}

for coef in range(len(coefficients)):
    factors[data.columns[coef]] = coefficients[coef]
    
factors = pd.DataFrame.from_dict(factors, orient='index').round(6)
factors.sort_values(by = 0, ascending = False)

Unnamed: 0,0
thinness 5-9 years,8.765762
thinness 1-19 years,1.176539
Polio,0.208102
Adult Mortality,0.206507
infant deaths,0.166296
Total expenditure,0.124722
Income composition of resources,0.039128
Measles,0.018567
HIV/AIDS,5.7e-05
Hepatitis B,4.3e-05


Calculating metrics

In [5]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

MAE = mean_absolute_error(y_test, y_predicted)
accuracy = mean_squared_error(y_test, y_predicted)

print('Mean Absolute Error = ', MAE)
print('R^2 = ', accuracy)

print(y_predicted)

Mean Absolute Error =  3.329454767420605
R^2 =  41.369504211683854
[52.74435026 66.27812001 71.86904339 76.41197649 42.49492716 51.98700062
 53.15029914 55.37077518 57.00987761 71.29544565 74.23725203 79.07516168
 76.09435311 66.91444201 71.46572301 54.47810865 72.07721495 80.53840394
 73.50539241 65.49583732 35.8136208  68.15101723 73.54634152 80.02931552
 79.85336781 68.63984296 65.04482439 21.87604123 55.937288   73.93352795
 76.5763802  79.94964136 65.98662221 80.03640351 61.36294068 68.43975541
 80.92770746 73.32014071 76.19402093 71.00601037 70.75087621 65.05992379
 77.05761634 60.36398504 71.42259159 75.23816903 83.00819408 71.35191652
 61.86390284 70.85288684 73.41291478 80.77187165 69.73395566 65.40471825
 71.28679739 79.76308282 77.82925449 62.42296089]
