In [47]:
# import the data

import pandas as pd

original = pd.read_csv('covid_data.csv', header = 0)
original

Unnamed: 0,new_deaths,date,total_cases,new_cases,population,population_density,median_age,aged_65_older,gdp_per_capita,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,hospital_beds_per_thousand,total_deaths
0,0,0,0,0,38928341,54.422,18.6,2.581,1803.987,597.029,9.59,6.05,31.4,0.500,0
1,0,0,0,0,98340,208.354,36.2,8.606,26382.287,242.648,10.55,7.10,35.7,3.600,0
2,0,0,0,0,7976985,104.700,19.1,2.538,1390.300,325.721,2.42,8.80,41.3,2.397,0
3,0,0,0,0,20903278,70.151,17.6,2.409,1703.102,269.048,2.42,1.60,23.9,0.400,0
4,0,0,18,2,5850343,7915.731,42.4,12.922,85535.383,92.243,10.99,5.20,28.3,2.400,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58319,0,304,621,4,39244,19347.500,29.4,6.224,11840.846,243.964,5.46,6.05,31.4,13.800,3
58320,1,304,3014,17,8278737,143.366,19.4,2.839,1429.813,280.033,6.15,0.90,14.2,0.700,65
58321,0,304,183,0,98340,208.354,36.2,8.606,26382.287,242.648,10.55,7.10,35.7,3.600,0
58322,2,304,8863,153,4649660,4.289,20.3,3.138,3597.633,232.347,2.42,6.05,31.4,2.397,179


In [48]:
original.dtypes

new_deaths                      int64
date                            int64
total_cases                     int64
new_cases                       int64
population                      int64
population_density            float64
median_age                    float64
aged_65_older                 float64
gdp_per_capita                float64
cardiovasc_death_rate         float64
diabetes_prevalence           float64
female_smokers                float64
male_smokers                  float64
hospital_beds_per_thousand    float64
total_deaths                    int64
dtype: object

In [49]:
total_rows=len(original.axes[0])
total_cols=len(original.axes[1])
print("Number of Rows: "+str(total_rows))
print("Number of Columns: "+str(total_cols))

Number of Rows: 58324
Number of Columns: 15


In [50]:
# Checks to see which columns have missing values

original.isnull().any()

new_deaths                    False
date                          False
total_cases                   False
new_cases                     False
population                    False
population_density            False
median_age                    False
aged_65_older                 False
gdp_per_capita                False
cardiovasc_death_rate         False
diabetes_prevalence           False
female_smokers                False
male_smokers                  False
hospital_beds_per_thousand    False
total_deaths                  False
dtype: bool

In [51]:
# Import the necessary sklearn packages

from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn import preprocessing
import numpy as np

In [52]:
#Total Deaths
target = original.iloc[:, -1]

#New Deaths
target2 = original.iloc[:, 0]

temp = original.iloc[:, 1:14]

In [53]:
target

0          0
1          0
2          0
3          0
4          0
        ... 
58319      3
58320     65
58321      0
58322    179
58323    277
Name: total_deaths, Length: 58324, dtype: int64

In [54]:
target2

0        0
1        0
2        0
3        0
4        0
        ..
58319    0
58320    1
58321    0
58322    2
58323    0
Name: new_deaths, Length: 58324, dtype: int64

In [55]:
temp

Unnamed: 0,date,total_cases,new_cases,population,population_density,median_age,aged_65_older,gdp_per_capita,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,hospital_beds_per_thousand
0,0,0,0,38928341,54.422,18.6,2.581,1803.987,597.029,9.59,6.05,31.4,0.500
1,0,0,0,98340,208.354,36.2,8.606,26382.287,242.648,10.55,7.10,35.7,3.600
2,0,0,0,7976985,104.700,19.1,2.538,1390.300,325.721,2.42,8.80,41.3,2.397
3,0,0,0,20903278,70.151,17.6,2.409,1703.102,269.048,2.42,1.60,23.9,0.400
4,0,18,2,5850343,7915.731,42.4,12.922,85535.383,92.243,10.99,5.20,28.3,2.400
...,...,...,...,...,...,...,...,...,...,...,...,...,...
58319,304,621,4,39244,19347.500,29.4,6.224,11840.846,243.964,5.46,6.05,31.4,13.800
58320,304,3014,17,8278737,143.366,19.4,2.839,1429.813,280.033,6.15,0.90,14.2,0.700
58321,304,183,0,98340,208.354,36.2,8.606,26382.287,242.648,10.55,7.10,35.7,3.600
58322,304,8863,153,4649660,4.289,20.3,3.138,3597.633,232.347,2.42,6.05,31.4,2.397


# RMSE Values For Multilayer Perceptron and Linear Regression when Data has not been Manipulated

In [56]:
# We split the dataset into 80% training and 20% test

X_train, X_test, y_train, y_test = train_test_split(temp, target, test_size=0.2, random_state = 42)

X_train2, X_test2, y_train2, y_test2 = train_test_split(temp, target2, test_size=0.2, random_state = 42)

In [57]:
%%time
# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Multilayer Perceptron when predicting New Deaths:",score)

RMSE value using Multilayer Perceptron when predicting New Deaths: 3525.775602597181
Wall time: 53.4 s


In [58]:
%%time
# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Multilayer Perceptron when predicting Total Deaths:",score)

RMSE value using Multilayer Perceptron when predicting Total Deaths: 14854.013561844153
Wall time: 40.4 s


In [59]:
%%time
# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))

print("RMSE value using Linear Regression when predicting New Deaths:",score)

RMSE value using Linear Regression when predicting New Deaths: 157.5747021957956
Wall time: 205 ms


In [60]:
%%time
# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))

print("RMSE value using Linear Regression when predicting Total Deaths:",score)

RMSE value using Linear Regression when predicting Total Deaths: 9932.205715896125
Wall time: 207 ms


# RMSE Values For Multilayer Perceptron and Linear Regression when Data has been Normalized

In [61]:
# Normalize the data
norm = preprocessing.normalize(original)

#Total Deaths
ntarget = norm[:, -1]

#New Deaths
ntarget2 = norm[:, 0]

ntemp = norm[:, 1:14]

X_train, X_test, y_train, y_test = train_test_split(ntemp, ntarget, test_size=0.2, random_state = 42)

X_train2, X_test2, y_train2, y_test2 = train_test_split(ntemp, ntarget2, test_size=0.2, random_state = 42)

In [62]:
%%time
# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Multilayer Perceptron when predicting New Deaths with data that has been normalized:",score)

RMSE value using Multilayer Perceptron when predicting New Deaths with data that has been normalized: 6.262769576032412e-05
Wall time: 59.4 s


In [63]:
%%time
# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Multilayer Perceptron when predicting Total Deaths with data that has been normalized:",score)

RMSE value using Multilayer Perceptron when predicting Total Deaths with data that has been normalized: 0.00011625435438224795
Wall time: 56.7 s


In [64]:
%%time
# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Linear Regression when predicting New Deaths with data that has been normalized:",score)

RMSE value using Linear Regression when predicting New Deaths with data that has been normalized: 2.4086456177436592e-06
Wall time: 140 ms


In [65]:
%%time
# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Linear Regression when predicting Total Deaths with data that has been normalized:",score)

RMSE value using Linear Regression when predicting Total Deaths with data that has been normalized: 0.00011303578011964128
Wall time: 161 ms


# RMSE Values For Multilayer Perceptron and Linear Regression when Data has been Standardized

In [66]:
# Standardize the data
stand = preprocessing.scale(original)

#Total Deaths
starget = stand[:, -1]

#New Deaths
starget2 = stand[:, 0]

stemp = stand[:, 1:14]

X_train, X_test, y_train, y_test = train_test_split(stemp, starget, test_size=0.2, random_state = 42)

X_train2, X_test2, y_train2, y_test2 = train_test_split(stemp, starget2, test_size=0.2, random_state = 42)



In [67]:
%%time
# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Multilayer Perceptron when predicting New Deaths with data that has been standardized:",score)

RMSE value using Multilayer Perceptron when predicting New Deaths with data that has been standardized: 0.22848062426499152
Wall time: 2min 2s


In [68]:
%%time
# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Multilayer Perceptron when predicting Total Deaths with data that has been standardized:",score)

RMSE value using Multilayer Perceptron when predicting Total Deaths with data that has been standardized: 0.03795370399189848
Wall time: 1min 16s


In [69]:
%%time
# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Linear Regression when predicting New Deaths with data that has been standardized:",score)

RMSE value using Linear Regression when predicting New Deaths with data that has been standardized: 0.3726580168032033
Wall time: 152 ms


In [70]:
%%time
# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Linear Regression when predicting Total Deaths with data that has been standardized:",score)

RMSE value using Linear Regression when predicting Total Deaths with data that has been standardized: 0.18415289713599362
Wall time: 145 ms


# We will used each attribute as a training set to see which one produces the best RMSE values. The data used will be normalized as this had produced the best RMSE values when we were using the entire dataset.

In [71]:
# Normalize the data
norm = preprocessing.normalize(original)

#Total Deaths
ntarget = norm[:, -1]

#New Deaths
ntarget2 = norm[:, 0]

ntemp = norm[:, 1:14]

X_train, X_test, y_train, y_test = train_test_split(ntemp, ntarget, test_size=0.2, random_state = 42)

X_train2, X_test2, y_train2, y_test2 = train_test_split(ntemp, ntarget2, test_size=0.2, random_state = 42)

In [72]:
date = norm[:, 1:2]
total_cases = norm[:, 2:3]
new_cases = norm[:, 3:4]
pop = norm[:, 4:5]
pop_density = norm[:, 5:6]
median_age = norm[:, 6:7]
senior = norm[:, 7:8]
gdp = norm[:, 8:9]
cardio = norm[:, 9:10]
diabetes = norm[:, 10:11]
fm_smokers = norm[:, 11:12]
m_smokers = norm[:, 12:13]
hospital = norm[:, 13:14]

# RMSE Values Using Only the Date Attribute

In [73]:
%%time
X_train, X_test, y_train, y_test = train_test_split(date, ntarget, test_size=0.2, random_state = 42)

X_train2, X_test2, y_train2, y_test2 = train_test_split(date, ntarget2, test_size=0.2, random_state = 42)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Multilayer Perceptron when predicting New Deaths with the date attribute:",score)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Multilayer Perceptron when predicting Total Deaths with the date attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Linear Regression when predicting New Deaths with the date attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Linear Regression when predicting Total Deaths with the date attribute:",score)

RMSE value using Multilayer Perceptron when predicting New Deaths with the date attribute: 2.8460024714049225e-06
RMSE value using Multilayer Perceptron when predicting Total Deaths with the date attribute: 0.00016323239792745653
RMSE value using Linear Regression when predicting New Deaths with the date attribute: 2.844879876512312e-06
RMSE value using Linear Regression when predicting Total Deaths with the date attribute: 0.00016322307677124712
Wall time: 1min 27s


# RMSE Values Using Only the Total Cases Attribute

In [74]:
%%time
X_train, X_test, y_train, y_test = train_test_split(total_cases, ntarget, test_size=0.2, random_state = 42)

X_train2, X_test2, y_train2, y_test2 = train_test_split(total_cases, ntarget2, test_size=0.2, random_state = 42)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Multilayer Perceptron when predicting New Deaths with the total_cases attribute:",score)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Multilayer Perceptron when predicting Total Deaths with the total_cases attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Linear Regression when predicting New Deaths with the total_cases attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Linear Regression when predicting Total Deaths with the total_cases attribute:",score)

RMSE value using Multilayer Perceptron when predicting New Deaths with the total_cases attribute: 2.709032886569258e-06
RMSE value using Multilayer Perceptron when predicting Total Deaths with the total_cases attribute: 0.00011679586364409703
RMSE value using Linear Regression when predicting New Deaths with the total_cases attribute: 2.6487387862297387e-06
RMSE value using Linear Regression when predicting Total Deaths with the total_cases attribute: 0.00011713685605087092
Wall time: 1min 41s


# RMSE Values Using Only the New Cases Attribute

In [75]:
%%time
X_train, X_test, y_train, y_test = train_test_split(new_cases, ntarget, test_size=0.2, random_state = 42)

X_train2, X_test2, y_train2, y_test2 = train_test_split(new_cases, ntarget2, test_size=0.2, random_state = 42)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Multilayer Perceptron when predicting New Deaths with the new_cases attribute:",score)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Multilayer Perceptron when predicting Total Deaths with the new_cases attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Linear Regression when predicting New Deaths with the new_cases attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Linear Regression when predicting Total Deaths with the new_cases attribute:",score)

RMSE value using Multilayer Perceptron when predicting New Deaths with the new_cases attribute: 2.830754212998051e-06
RMSE value using Multilayer Perceptron when predicting Total Deaths with the new_cases attribute: 0.00015490030604336757
RMSE value using Linear Regression when predicting New Deaths with the new_cases attribute: 2.4053377506682844e-06
RMSE value using Linear Regression when predicting Total Deaths with the new_cases attribute: 0.00014850869157141474
Wall time: 59.9 s


# RMSE Values Using Only the Population Attribute

In [76]:
%%time
X_train, X_test, y_train, y_test = train_test_split(pop, ntarget, test_size=0.2, random_state = 42)

X_train2, X_test2, y_train2, y_test2 = train_test_split(pop, ntarget2, test_size=0.2, random_state = 42)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Multilayer Perceptron when predicting New Deaths with the population attribute:",score)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Multilayer Perceptron when predicting Total Deaths with the population attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Linear Regression when predicting New Deaths with the population attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Linear Regression when predicting Total Deaths with the population attribute:",score)

RMSE value using Multilayer Perceptron when predicting New Deaths with the population attribute: 3.523182459079745e-05
RMSE value using Multilayer Perceptron when predicting Total Deaths with the population attribute: 0.0001636942110282686
RMSE value using Linear Regression when predicting New Deaths with the population attribute: 2.8443886357228086e-06
RMSE value using Linear Regression when predicting Total Deaths with the population attribute: 0.00016276360953152976
Wall time: 1min 57s


# RMSE Values Using Only the Population Density Attribute

In [77]:
%%time
X_train, X_test, y_train, y_test = train_test_split(pop_density, ntarget, test_size=0.2, random_state = 42)

X_train2, X_test2, y_train2, y_test2 = train_test_split(pop_density, ntarget2, test_size=0.2, random_state = 42)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Multilayer Perceptron when predicting New Deaths with the population_density attribute:",score)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Multilayer Perceptron when predicting Total Deaths with the population_density attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Linear Regression when predicting New Deaths with the population_denisty attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Linear Regression when predicting Total Deaths with the population_density attribute:",score)

RMSE value using Multilayer Perceptron when predicting New Deaths with the population_density attribute: 2.8469837975330844e-06
RMSE value using Multilayer Perceptron when predicting Total Deaths with the population_density attribute: 0.0001628024263154486
RMSE value using Linear Regression when predicting New Deaths with the population_denisty attribute: 2.8466976135140337e-06
RMSE value using Linear Regression when predicting Total Deaths with the population_density attribute: 0.00016324984655400178
Wall time: 1min 23s


# RMSE Values Using Only the Median Age Attribute

In [78]:
%%time
X_train, X_test, y_train, y_test = train_test_split(median_age, ntarget, test_size=0.2, random_state = 42)

X_train2, X_test2, y_train2, y_test2 = train_test_split(median_age, ntarget2, test_size=0.2, random_state = 42)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Multilayer Perceptron when predicting New Deaths with the median_age attribute:",score)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Multilayer Perceptron when predicting Total Deaths with the median_age attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Linear Regression when predicting New Deaths with the median_age attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Linear Regression when predicting Total Deaths with the median_age attribute:",score)

RMSE value using Multilayer Perceptron when predicting New Deaths with the median_age attribute: 2.845341955388194e-06
RMSE value using Multilayer Perceptron when predicting Total Deaths with the median_age attribute: 0.00016333610461222742
RMSE value using Linear Regression when predicting New Deaths with the median_age attribute: 2.844685332414574e-06
RMSE value using Linear Regression when predicting Total Deaths with the median_age attribute: 0.0001632503470438153
Wall time: 1min 9s


# RMSE Values Using Only the Aged 65 and Older Attribute

In [79]:
%%time
X_train, X_test, y_train, y_test = train_test_split(senior, ntarget, test_size=0.2, random_state = 42)

X_train2, X_test2, y_train2, y_test2 = train_test_split(senior, ntarget2, test_size=0.2, random_state = 42)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Multilayer Perceptron when predicting New Deaths with the aged_65_older attribute:",score)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Multilayer Perceptron when predicting Total Deaths with the aged_65_older attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Linear Regression when predicting New Deaths with the aged_65_older attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Linear Regression when predicting Total Deaths with the aged_65_older attribute:",score)

RMSE value using Multilayer Perceptron when predicting New Deaths with the aged_65_older attribute: 2.8440960041121238e-06
RMSE value using Multilayer Perceptron when predicting Total Deaths with the aged_65_older attribute: 0.0001632830497512589
RMSE value using Linear Regression when predicting New Deaths with the aged_65_older attribute: 2.8446727920222995e-06
RMSE value using Linear Regression when predicting Total Deaths with the aged_65_older attribute: 0.00016324907499908327
Wall time: 1min 1s


# RMSE Values Using Only the GDP Per Capita Attribute

In [80]:
%%time
X_train, X_test, y_train, y_test = train_test_split(gdp, ntarget, test_size=0.2, random_state = 42)

X_train2, X_test2, y_train2, y_test2 = train_test_split(gdp, ntarget2, test_size=0.2, random_state = 42)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Multilayer Perceptron when predicting New Deaths with the gdp_per_capita attribute:",score)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Multilayer Perceptron when predicting Total Deaths with the gdp_per_capita attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Linear Regression when predicting New Deaths with the gdp_per_capita attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Linear Regression when predicting Total Deaths with the gdp_per_capita attribute:",score)

RMSE value using Multilayer Perceptron when predicting New Deaths with the gdp_per_capita attribute: 3.373529039939229e-06
RMSE value using Multilayer Perceptron when predicting Total Deaths with the gdp_per_capita attribute: 0.00015912269138441024
RMSE value using Linear Regression when predicting New Deaths with the gdp_per_capita attribute: 2.844375169169821e-06
RMSE value using Linear Regression when predicting Total Deaths with the gdp_per_capita attribute: 0.00016247061336167458
Wall time: 2min 6s


# RMSE Values Using Only the Cardiovascular Death Rate Attribute

In [81]:
%%time
X_train, X_test, y_train, y_test = train_test_split(cardio, ntarget, test_size=0.2, random_state = 42)

X_train2, X_test2, y_train2, y_test2 = train_test_split(cardio, ntarget2, test_size=0.2, random_state = 42)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Multilayer Perceptron when predicting New Deaths with the cardiovasc_death_rate attribute:",score)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Multilayer Perceptron when predicting Total Deaths with the cardiovasc_death_rate attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Linear Regression when predicting New Deaths with the cardiovasc_death_rate attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Linear Regression when predicting Total Deaths with the cardiovasc_death_rate attribute:",score)

RMSE value using Multilayer Perceptron when predicting New Deaths with the cardiovasc_death_rate attribute: 2.868951368860124e-06
RMSE value using Multilayer Perceptron when predicting Total Deaths with the cardiovasc_death_rate attribute: 0.00016323394570591
RMSE value using Linear Regression when predicting New Deaths with the cardiovasc_death_rate attribute: 2.8443442838468134e-06
RMSE value using Linear Regression when predicting Total Deaths with the cardiovasc_death_rate attribute: 0.00016317939641208025
Wall time: 1min 28s


# RMSE Values Using Only the Diabetes Prevalence Attribute

In [82]:
%%time
X_train, X_test, y_train, y_test = train_test_split(diabetes, ntarget, test_size=0.2, random_state = 42)

X_train2, X_test2, y_train2, y_test2 = train_test_split(diabetes, ntarget2, test_size=0.2, random_state = 42)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Multilayer Perceptron when predicting New Deaths with the diabetes_prevalence attribute:",score)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Multilayer Perceptron when predicting Total Deaths with the diabetes_prevalence attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Linear Regression when predicting New Deaths with the diabetes_prevalence attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Linear Regression when predicting Total Deaths with the diabetes_prevalence_rate attribute:",score)

RMSE value using Multilayer Perceptron when predicting New Deaths with the diabetes_prevalence attribute: 2.84407384356859e-06
RMSE value using Multilayer Perceptron when predicting Total Deaths with the diabetes_prevalence attribute: 0.0001632506051050643
RMSE value using Linear Regression when predicting New Deaths with the diabetes_prevalence attribute: 2.8438066036338493e-06
RMSE value using Linear Regression when predicting Total Deaths with the diabetes_prevalence_rate attribute: 0.00016311781248187098
Wall time: 1min 8s


# RMSE Values Using Only the Female Smokers Attribute

In [83]:
%%time
X_train, X_test, y_train, y_test = train_test_split(fm_smokers, ntarget, test_size=0.2, random_state = 42)

X_train2, X_test2, y_train2, y_test2 = train_test_split(fm_smokers, ntarget2, test_size=0.2, random_state = 42)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Multilayer Perceptron when predicting New Deaths with the female_smokers attribute:",score)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Multilayer Perceptron when predicting Total Deaths with the female_smokers attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Linear Regression when predicting New Deaths with the female_smokers attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Linear Regression when predicting Total Deaths with the female_smokers attribute:",score)

RMSE value using Multilayer Perceptron when predicting New Deaths with the female_smokers attribute: 2.8440973688591445e-06
RMSE value using Multilayer Perceptron when predicting Total Deaths with the female_smokers attribute: 0.00016312550641492954
RMSE value using Linear Regression when predicting New Deaths with the female_smokers attribute: 2.842817458803983e-06
RMSE value using Linear Regression when predicting Total Deaths with the female_smokers attribute: 0.00016263097653592516
Wall time: 1min


# RMSE Values Using Only the Male Smokers Attribute

In [84]:
%%time
X_train, X_test, y_train, y_test = train_test_split(m_smokers, ntarget, test_size=0.2, random_state = 42)

X_train2, X_test2, y_train2, y_test2 = train_test_split(m_smokers, ntarget2, test_size=0.2, random_state = 42)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Multilayer Perceptron when predicting New Deaths with the male_smokers attribute:",score)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Multilayer Perceptron when predicting Total Deaths with the male_smokers attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Linear Regression when predicting New Deaths with the male_smokers attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Linear Regression when predicting Total Deaths with the male_smokers attribute:",score)

RMSE value using Multilayer Perceptron when predicting New Deaths with the male_smokers attribute: 2.8551233760475724e-06
RMSE value using Multilayer Perceptron when predicting Total Deaths with the male_smokers attribute: 0.00016338684785721514
RMSE value using Linear Regression when predicting New Deaths with the male_smokers attribute: 2.8447444979377836e-06
RMSE value using Linear Regression when predicting Total Deaths with the male_smokers attribute: 0.00016325423773686128
Wall time: 1min 13s


# RMSE Values Using Only the Hospital Beds Per Thousand Attribute

In [85]:
%%time
X_train, X_test, y_train, y_test = train_test_split(hospital, ntarget, test_size=0.2, random_state = 42)

X_train2, X_test2, y_train2, y_test2 = train_test_split(hospital, ntarget2, test_size=0.2, random_state = 42)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Multilayer Perceptron when predicting New Deaths with the hospital_beds_per_thousand attribute:",score)

# Multilayer Perceptron
mlp = MLPRegressor(max_iter=10000, early_stopping = True)

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = mlp, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
mlp_best_model = gs_random.best_estimator_

y_pred = mlp_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Multilayer Perceptron when predicting Total Deaths with the hospital_beds_per_thousand attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train2, y_train2)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test2)
score = np.sqrt(metrics.mean_squared_error(y_test2, y_pred))


print("RMSE value using Linear Regression when predicting New Deaths with the hospital_beds_per_thousand attribute:",score)

# Linear Regression
linr = LinearRegression()

# Uses the cross validation available in Grid Search
# Grid Search is run with the default parameters
gs_random = GridSearchCV(estimator = linr, param_grid = {}, scoring = 'neg_root_mean_squared_error', cv= 10)
gs_random.fit(X_train, y_train)
linr_best_model = gs_random.best_estimator_

y_pred = linr_best_model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))


print("RMSE value using Linear Regression when predicting Total Deaths with the hospital_beds_per_thousand attribute:",score)

RMSE value using Multilayer Perceptron when predicting New Deaths with the hospital_beds_per_thousand attribute: 2.844126161269132e-06
RMSE value using Multilayer Perceptron when predicting Total Deaths with the hospital_beds_per_thousand attribute: 0.00016329598450074255
RMSE value using Linear Regression when predicting New Deaths with the hospital_beds_per_thousand attribute: 2.8467883310372065e-06
RMSE value using Linear Regression when predicting Total Deaths with the hospital_beds_per_thousand attribute: 0.00016325491720478857
Wall time: 1min


# Correlation Matrix

In [86]:
cor_matrix = original.corr()
cor_matrix

Unnamed: 0,new_deaths,date,total_cases,new_cases,population,population_density,median_age,aged_65_older,gdp_per_capita,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,hospital_beds_per_thousand,total_deaths
new_deaths,1.0,0.049101,0.826616,0.896303,0.825857,-0.017026,0.039839,0.03794,0.020037,-0.048722,0.018501,0.010623,-0.012686,-0.008088,0.877596
date,0.049101,1.0,0.093152,0.082703,-0.000134,0.004635,0.001739,0.00138,0.002106,-0.000155,0.000118,-0.000368,-8.3e-05,-0.000242,0.084388
total_cases,0.826616,0.093152,1.0,0.967246,0.671062,-0.012432,0.02835,0.025024,0.01777,-0.033776,0.018595,0.004618,-0.009342,-0.008142,0.964175
new_cases,0.896303,0.082703,0.967246,1.0,0.734048,-0.01374,0.035635,0.034267,0.020637,-0.036714,0.017516,0.010968,-0.007743,-0.003731,0.955609
population,0.825857,-0.000134,0.671062,0.734048,1.0,-0.015459,0.015587,0.004666,-0.020285,-0.017235,0.019979,-0.044878,0.022763,-0.014873,0.746169
population_density,-0.017026,0.004635,-0.012432,-0.01374,-0.015459,1.0,0.056547,0.006649,0.101381,-0.077114,0.015029,-0.042626,-0.003877,0.311107,-0.015559
median_age,0.039839,0.001739,0.02835,0.035635,0.015587,0.056547,1.0,0.904423,0.620938,-0.325962,0.144414,0.638054,0.190283,0.590152,0.040378
aged_65_older,0.03794,0.00138,0.025024,0.034267,0.004666,0.006649,0.904423,1.0,0.493562,-0.318004,-0.066841,0.750615,0.110144,0.592442,0.040368
gdp_per_capita,0.020037,0.002106,0.01777,0.020637,-0.020285,0.101381,0.620938,0.493562,1.0,-0.451079,0.174984,0.364722,-0.07298,0.279676,0.025508
cardiovasc_death_rate,-0.048722,-0.000155,-0.033776,-0.036714,-0.017235,-0.077114,-0.325962,-0.318004,-0.451079,1.0,0.126128,-0.178104,0.365735,0.005006,-0.051005


In [87]:
labels = ["new_deaths", "total_deaths"]

In [88]:
cor_matrix[list(labels)].drop(index=list(labels))

Unnamed: 0,new_deaths,total_deaths
date,0.049101,0.084388
total_cases,0.826616,0.964175
new_cases,0.896303,0.955609
population,0.825857,0.746169
population_density,-0.017026,-0.015559
median_age,0.039839,0.040378
aged_65_older,0.03794,0.040368
gdp_per_capita,0.020037,0.025508
cardiovasc_death_rate,-0.048722,-0.051005
diabetes_prevalence,0.018501,0.016007
