In [1]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
from numpy import mean
from numpy import absolute
from numpy import sqrt
import pandas as pd

Country = ['Iceland', 'Denmark', 'Costa Rica', 'New Zealand', 'Canada', 'United Kingdom', 'Luxembourg', 'Belgium', 'Ireland', 'Austria']

Population_By_Age_P = [15.47, 19.91, 8.76, 15.54, 18.98, 18.48, 15.37, 19.21, 19.87, 19.87]

Hospital_bed = [2.8, 2.6, 1.1, 2.6, 2.5, 2.5, 4.3, 5.6, 3.0, 7.3]

Covid_deaths = [211, 8304, 9214, 2544, 51397, 219721, 1215 , 33717, 8655, 21872]

#in Here we creat a pandas DataFrame that contains two predictor variables, x1 and x2, and a single response variable y
df = pd.DataFrame({'y': [211, 8304, 9214, 2544, 51397, 219721, 1215, 33717, 8655, 21872],
                   'x1': [15.47, 19.91, 8.76, 15.54, 18.98, 18.48, 15.37, 19.21, 19.87, 19.87],
                   'x2': [2.8, 2.6, 1.1, 2.6, 2.5, 2.5, 4.3, 5.6, 3.0, 7.3]})

#Now we need multiple linear regression to the data set to find out which linear regression is fit to the data 

X = df[['x1', 'x2']]
y = df['y']

#define cross-validation method to use
#(CV) object cv using the KFold class from scikit-learn. The KFold class implements k-fold cross-validation, which is a technique used to evaluate the performance of a machine learning model.
#n_splits is the numbers of fold, the data splits to 5 folds

#random_state: an integer that is used to seed the random number generator used to split the data. 
#Setting this parameter to a fixed value ensures that the same splits are used every time the code is run. 

#shuffle: a boolean that determines whether to shuffle the data before splitting it into folds. 
#Setting this parameter to True shuffles the data randomly before splitting it into folds, which can help to reduce bias in the model evaluation
cv = KFold(n_splits=5, random_state=1, shuffle=True)

#build multiple linear regression model
model = LinearRegression()

#use k-fold CV to evaluate model
#This code uses cross_val_score, function from scikit-learn's model_selection. module to perform k-fold cross-validation.

#The model object is the model to be evaluated, X is the feature matrix, y is the target variable. 

#scoring='neg_mean_absolute_error' specifies the scoring metric to be used (negative mean absolute error)

#cv is the number of folds to be used in the cross-validation

scores = cross_val_score(model, X, y, scoring='neg_mean_absolute_error',
                         cv=cv)

#Here is our mean absolute scores

error  = sqrt(mean(absolute(scores)))

print (error)

212.08931286114526
