In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA

## Modelling
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, ExtraTreesRegressor, BaggingRegressor
from sklearn.model_selection import train_test_split, KFold, cross_val_score, learning_curve
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR


In [3]:
frequency_domain_features_train = pd.read_csv('frequency_domain_features_train.csv')

In [4]:
heart_rate_non_linear_features_train = pd.read_csv('heart_rate_non_linear_features_train.csv')

In [5]:
time_domain_features_train = pd.read_csv('time_domain_features_train.csv')

In [6]:
print(frequency_domain_features_train.shape)
print(heart_rate_non_linear_features_train.shape)
print(time_domain_features_train.shape)

(369289, 12)
(369289, 7)
(369289, 20)


In [7]:
data_train_temp = pd.merge(frequency_domain_features_train,heart_rate_non_linear_features_train, on = 'uuid')
print(data_train_temp.shape)

(369289, 18)


In [8]:
data_train = pd.merge(data_train_temp,time_domain_features_train, on = 'uuid')
print(data_train.shape)

(369289, 37)


In [9]:
data_train.head()

Unnamed: 0,uuid,VLF,VLF_PCT,LF,LF_PCT,LF_NU,HF,HF_PCT,HF_NU,TP,...,KURT,SKEW,MEAN_REL_RR,MEDIAN_REL_RR,SDRR_REL_RR,RMSSD_REL_RR,SDSD_REL_RR,SDRR_RMSSD_REL_RR,KURT_REL_RR,SKEW_REL_RR
0,89df2855-56eb-4706-a23b-b39363dd605a,2661.894136,72.203287,1009.249419,27.375666,98.485263,15.522603,0.421047,1.514737,3686.666157,...,-0.856554,0.335218,-0.000203,-0.000179,0.01708,0.007969,0.007969,2.143342,-0.856554,0.335218
1,80c795e4-aa56-4cc0-939c-19634b89cbb2,2314.26545,76.975728,690.113275,22.954139,99.695397,2.108525,0.070133,0.304603,3006.487251,...,-0.40819,-0.155286,-5.9e-05,0.000611,0.013978,0.004769,0.004769,2.930855,-0.40819,-0.155286
2,c2d5d102-967c-487d-88f2-8b005a449f3e,1373.887112,51.152225,1298.222619,48.335104,98.950472,13.769729,0.512671,1.049528,2685.879461,...,0.351789,-0.656813,-1.1e-05,-0.000263,0.018539,0.008716,0.008716,2.127053,0.351789,-0.656813
3,37eabc44-1349-4040-8896-0d113ad4811f,2410.357408,70.180308,1005.981659,29.290305,98.224706,18.181913,0.529387,1.775294,3434.52098,...,-0.504947,-0.386138,0.000112,0.000494,0.017761,0.00866,0.00866,2.050988,-0.504947,-0.386138
4,aa777a6a-7aa3-4f6e-aced-70f8691dd2b7,1151.17733,43.918366,1421.782051,54.24216,96.720007,48.215822,1.839473,3.279993,2621.175204,...,-0.548408,-0.154252,-0.0001,-0.002736,0.023715,0.013055,0.013055,1.816544,-0.548408,-0.154252


In [10]:
data_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 369289 entries, 0 to 369288
Data columns (total 37 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   uuid               369289 non-null  object 
 1   VLF                369289 non-null  float64
 2   VLF_PCT            369289 non-null  float64
 3   LF                 369289 non-null  float64
 4   LF_PCT             369289 non-null  float64
 5   LF_NU              369289 non-null  float64
 6   HF                 369289 non-null  float64
 7   HF_PCT             369289 non-null  float64
 8   HF_NU              369289 non-null  float64
 9   TP                 369289 non-null  float64
 10  LF_HF              369289 non-null  float64
 11  HF_LF              369289 non-null  float64
 12  SD1                369289 non-null  float64
 13  SD2                369289 non-null  float64
 14  sampen             369289 non-null  float64
 15  higuci             369289 non-null  float64
 16  da

In [11]:
df_train = data_train.drop(['uuid'],axis = 1)

In [12]:
df_train.describe()

Unnamed: 0,VLF,VLF_PCT,LF,LF_PCT,LF_NU,HF,HF_PCT,HF_NU,TP,LF_HF,...,KURT,SKEW,MEAN_REL_RR,MEDIAN_REL_RR,SDRR_REL_RR,RMSSD_REL_RR,SDSD_REL_RR,SDRR_RMSSD_REL_RR,KURT_REL_RR,SKEW_REL_RR
count,369289.0,369289.0,369289.0,369289.0,369289.0,369289.0,369289.0,369289.0,369289.0,369289.0,...,369289.0,369289.0,369289.0,369289.0,369289.0,369289.0,369289.0,369289.0,369289.0,369289.0
mean,2199.58017,64.289242,946.530252,34.095182,95.566718,39.245603,1.615576,4.433282,3185.356025,115.9772,...,0.523235,0.041628,-1.756602e-06,-0.000465,0.018571,0.009701,0.009701,2.006817,0.523235,0.041628
std,1815.773422,16.774844,574.17178,16.04029,4.123365,45.398869,1.761073,4.123365,1923.227187,360.855129,...,1.790348,0.699522,0.0001630256,0.000868,0.005455,0.003897,0.003897,0.375845,1.790348,0.699522
min,159.480176,19.031219,90.048557,2.165119,69.879083,0.061783,0.00215,0.012825,377.692795,2.319952,...,-1.89482,-2.136278,-0.001233914,-0.004425,0.008987,0.00322,0.00322,1.169342,-1.89482,-2.136278
25%,1001.18928,52.909877,545.449386,22.305936,93.645734,10.720312,0.346803,1.228054,1828.147788,14.737458,...,-0.352783,-0.359291,-7.28e-05,-0.000917,0.014261,0.006984,0.006984,1.749801,-0.352783,-0.359291
50%,1667.903111,66.350237,782.716291,32.047025,96.64314,24.841938,1.039513,3.35686,2796.856587,28.789747,...,0.040736,-0.060966,-9.33e-07,-0.000312,0.017318,0.008691,0.008691,1.934416,0.040736,-0.060966
75%,2654.121052,76.825032,1201.432256,44.647115,98.771946,45.272368,2.245115,6.354266,4052.260157,80.429614,...,0.722833,0.282417,6.91e-05,0.000131,0.021827,0.01146,0.01146,2.221232,0.722833,0.282417
max,12617.977191,97.738848,3291.548112,77.928847,99.987175,364.486936,13.095664,30.120917,13390.684098,7796.443096,...,64.088107,6.7778,0.001244098,0.002095,0.036571,0.026955,0.026955,3.724134,64.088107,6.7778


In [13]:
def missing_check(df):
    total = df.isnull().sum().sort_values(ascending=False)   # total number of null values
    percent = (df.isnull().sum()/df.isnull().count()).sort_values(ascending=False)  # percentage of values that are null
    missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])  # putting the above two together
    return missing_data # return the dataframe
missing_check(df_train)

Unnamed: 0,Total,Percent
SKEW_REL_RR,0,0.0
KURT_REL_RR,0,0.0
datasetId,0,0.0
higuci,0,0.0
sampen,0,0.0
SD2,0,0.0
SD1,0,0.0
HF_LF,0,0.0
LF_HF,0,0.0
TP,0,0.0


In [14]:
## User defined function to calculate statistics for the attributes
def stats_measure(x):
  print('Number of Unique Values')
  print(x.nunique())
  print(" ")
  print('Summary of the attribute')
  print(x.describe())
  print(" ")
  print('Skewness of the attribute')
  if x.dtype=='O':
    print('Categorical Variable')
  else:
    print(x.skew())
  print(" ")
  print('Kurtosis of the attribute')
  if x.dtype=='O':
    print('Categorical Variable')
  else:
    print(x.kurt())   


In [15]:
for p in df_train.columns:
  print('-'*100)  
  print(f'Statistical Measures of the attribute: ',p)
  col = df_train[p]
  stats_measure(col)
  print('-'*100)


----------------------------------------------------------------------------------------------------
Statistical Measures of the attribute:  VLF
Number of Unique Values
369289
 
Summary of the attribute
count    369289.000000
mean       2199.580170
std        1815.773422
min         159.480176
25%        1001.189280
50%        1667.903111
75%        2654.121052
max       12617.977191
Name: VLF, dtype: float64
 
Skewness of the attribute
1.9607349536265608
 
Kurtosis of the attribute
4.493563049525889
----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------
Statistical Measures of the attribute:  VLF_PCT
Number of Unique Values
369289
 
Summary of the attribute
count    369289.000000
mean         64.289242
std          16.774844
min          19.031219
25%          52.909877
50%          66.350237
75%          76.825032
max          97.738848
Na

In [16]:
# List the numerical and categorical columns
numeric_cols = df_train.select_dtypes(include = [np.number]).columns.tolist()
categorical_cols = df_train.select_dtypes(include = [np.object]).columns.tolist()
print('The numeric attributes are:', numeric_cols)
print('The categorical attributes are:', categorical_cols)

The numeric attributes are: ['VLF', 'VLF_PCT', 'LF', 'LF_PCT', 'LF_NU', 'HF', 'HF_PCT', 'HF_NU', 'TP', 'LF_HF', 'HF_LF', 'SD1', 'SD2', 'sampen', 'higuci', 'datasetId', 'MEAN_RR', 'MEDIAN_RR', 'SDRR', 'RMSSD', 'SDSD', 'SDRR_RMSSD', 'HR', 'pNN25', 'pNN50', 'KURT', 'SKEW', 'MEAN_REL_RR', 'MEDIAN_REL_RR', 'SDRR_REL_RR', 'RMSSD_REL_RR', 'SDSD_REL_RR', 'SDRR_RMSSD_REL_RR', 'KURT_REL_RR', 'SKEW_REL_RR']
The categorical attributes are: ['condition']


In [17]:
df_train['condition'].value_counts()

no stress        200082
interruption     105150
time pressure     64057
Name: condition, dtype: int64

In [18]:
dummy_var = pd.get_dummies(df_train['condition'],drop_first = True)
dummy_var.head()

Unnamed: 0,no stress,time pressure
0,1,0
1,0,0
2,0,0
3,1,0
4,1,0


In [19]:
df_train = pd.concat([df_train, dummy_var], axis = 1)
df_train = df_train.drop('condition', axis = 1)

In [20]:
df_train.head()

Unnamed: 0,VLF,VLF_PCT,LF,LF_PCT,LF_NU,HF,HF_PCT,HF_NU,TP,LF_HF,...,MEAN_REL_RR,MEDIAN_REL_RR,SDRR_REL_RR,RMSSD_REL_RR,SDSD_REL_RR,SDRR_RMSSD_REL_RR,KURT_REL_RR,SKEW_REL_RR,no stress,time pressure
0,2661.894136,72.203287,1009.249419,27.375666,98.485263,15.522603,0.421047,1.514737,3686.666157,65.018055,...,-0.000203,-0.000179,0.01708,0.007969,0.007969,2.143342,-0.856554,0.335218,1,0
1,2314.26545,76.975728,690.113275,22.954139,99.695397,2.108525,0.070133,0.304603,3006.487251,327.296635,...,-5.9e-05,0.000611,0.013978,0.004769,0.004769,2.930855,-0.40819,-0.155286,0,0
2,1373.887112,51.152225,1298.222619,48.335104,98.950472,13.769729,0.512671,1.049528,2685.879461,94.28091,...,-1.1e-05,-0.000263,0.018539,0.008716,0.008716,2.127053,0.351789,-0.656813,0,0
3,2410.357408,70.180308,1005.981659,29.290305,98.224706,18.181913,0.529387,1.775294,3434.52098,55.328701,...,0.000112,0.000494,0.017761,0.00866,0.00866,2.050988,-0.504947,-0.386138,1,0
4,1151.17733,43.918366,1421.782051,54.24216,96.720007,48.215822,1.839473,3.279993,2621.175204,29.487873,...,-0.0001,-0.002736,0.023715,0.013055,0.013055,1.816544,-0.548408,-0.154252,1,0


In [21]:
df_train_imputed = df_train.copy(deep = True)

In [22]:
for col in numeric_cols:
    Q3 = df_train_imputed[col].quantile(0.75)
    Q1 = df_train_imputed[col].quantile(0.25)
    IQR = Q3 - Q1
    upper_lim = Q3 + (3 * IQR)
    lower_lim = Q1 - (3 * IQR)
    df_train_imputed.loc[(df_train_imputed[col] < lower_lim) | (df_train_imputed[col] > upper_lim), col] = df_train_imputed[col].median()

#df_train_imputed.isnull().sum()

In [23]:
# Using SimpleImputer to fill missing values by median
print('Descriptive Stats before handling outliers: \n', '--'*30)
display(df_train.describe().T)

print('Descriptive Stats after handling outliers: \n', '--'*30)
display(df_train_imputed.describe().T)


Descriptive Stats before handling outliers: 
 ------------------------------------------------------------


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
VLF,369289.0,2199.58017,1815.773422,159.480176,1001.18928,1667.903,2654.121052,12617.977191
VLF_PCT,369289.0,64.289242,16.774844,19.031219,52.909877,66.35024,76.825032,97.738848
LF,369289.0,946.530252,574.17178,90.048557,545.449386,782.7163,1201.432256,3291.548112
LF_PCT,369289.0,34.095182,16.04029,2.165119,22.305936,32.04703,44.647115,77.928847
LF_NU,369289.0,95.566718,4.123365,69.879083,93.645734,96.64314,98.771946,99.987175
HF,369289.0,39.245603,45.398869,0.061783,10.720312,24.84194,45.272368,364.486936
HF_PCT,369289.0,1.615576,1.761073,0.00215,0.346803,1.039513,2.245115,13.095664
HF_NU,369289.0,4.433282,4.123365,0.012825,1.228054,3.35686,6.354266,30.120917
TP,369289.0,3185.356025,1923.227187,377.692795,1828.147788,2796.857,4052.260157,13390.684098
LF_HF,369289.0,115.9772,360.855129,2.319952,14.737458,28.78975,80.429614,7796.443096


Descriptive Stats after handling outliers: 
 ------------------------------------------------------------


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
VLF,369289.0,2005.718669,1426.877298,159.480176,1001.18928,1667.903,2496.617475,7612.783844
VLF_PCT,369289.0,64.289242,16.774844,19.031219,52.909877,66.35024,76.825032,97.738848
LF,369289.0,944.821855,571.037542,90.048557,545.449386,782.7163,1199.536481,3169.378593
LF_PCT,369289.0,34.095182,16.04029,2.165119,22.305936,32.04703,44.647115,77.928847
LF_NU,369289.0,95.69871,3.805403,78.269762,93.729698,96.64314,98.771946,99.987175
HF,369289.0,33.564026,32.819622,0.061783,10.720312,24.84194,41.551803,148.921411
HF_PCT,369289.0,1.508332,1.508504,0.00215,0.346803,1.039513,2.137243,7.938278
HF_NU,369289.0,4.30129,3.805403,0.012825,1.228054,3.35686,6.270302,21.730238
TP,369289.0,3147.309366,1836.098125,377.692795,1828.147788,2796.857,4024.752713,10724.569836
LF_HF,369289.0,47.087608,53.973996,2.319952,14.737458,28.78975,52.13228,277.505613


In [24]:
# Splitting the dataset into the Training and Test sets 
from sklearn.model_selection import train_test_split
X = df_train.drop(['HR'],axis = 1)
y = df_train['HR']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.3, random_state = 42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((258502, 36), (110787, 36), (258502,), (110787,))

In [25]:
## Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)
X_train = pd.DataFrame(X_train, columns = X.columns)
X_test = pd.DataFrame(X_test, columns = X.columns)

In [57]:
X_train.head()

Unnamed: 0,VLF,VLF_PCT,LF,LF_PCT,LF_NU,HF,HF_PCT,HF_NU,TP,LF_HF,...,MEAN_REL_RR,MEDIAN_REL_RR,SDRR_REL_RR,RMSSD_REL_RR,SDSD_REL_RR,SDRR_RMSSD_REL_RR,KURT_REL_RR,SKEW_REL_RR,no stress,time pressure
0,-0.337147,0.959736,-1.054647,-1.048462,-1.815251,0.152738,0.408434,1.815251,-0.629175,-0.300201,...,-0.311869,0.672531,-0.766268,-0.070457,-0.070458,-1.276527,2.122184,2.903013,0.919629,-0.458159
1,-0.592396,-1.296683,0.902949,1.328997,0.209151,0.331519,0.24753,-0.209151,-0.282537,-0.246284,...,-1.070697,-2.609775,0.964367,0.87231,0.872309,-0.498063,-0.597993,-0.281476,0.919629,-0.458159
2,-0.36881,1.059505,-1.13979,-1.14801,-1.980925,0.064074,0.364668,1.980925,-0.686538,-0.301451,...,-0.834048,0.360071,-0.848841,-0.173726,-0.173726,-1.228314,2.78755,3.393158,0.919629,-0.458159
3,2.223109,1.296993,0.101254,-1.26197,0.906393,-0.709788,-0.862387,-0.906393,2.113033,0.071285,...,-0.109317,-0.03404,0.009207,-0.709902,-0.709898,1.801348,0.32904,-0.395905,-1.087395,2.182649
4,-0.230664,-0.644641,0.714834,0.412462,-1.961633,3.418022,2.38929,1.961633,0.075501,-0.301313,...,-0.706962,0.734184,1.813499,2.250219,2.250219,-1.237276,-0.483919,-1.140503,0.919629,-0.458159


### LR-unimputed

In [58]:
from sklearn import svm
from sklearn.ensemble import (RandomForestRegressor, GradientBoostingRegressor,AdaBoostRegressor,BaggingRegressor)
from sklearn.linear_model import LinearRegression
lr_model = LinearRegression()
lr_model.fit(X_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [27]:
lr_model.score(X_test,y_test)

-5.250559028346631e+17

In [32]:
kfold = KFold(n_splits=10,random_state=42)
results = cross_val_score(lr_model,X,y,cv=kfold)
print(results.mean()*100.0,results.std()*100.0)



98.09214343204381 0.02246981216314609


But can't rely on LR as the score is -ve.

### Random forest regressor - unimputed

In [59]:
model=RandomForestRegressor()
model.fit(X_train, y_train)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

In [61]:
from sklearn import metrics
y_pred = model.predict(X_test)
# performance on train data
print('Performance on training data using RFR:',model.score(X_train,y_train))
# performance on test data
print('Performance on testing data using RFR:',model.score(X_test,y_test))
#Evaluate the model using accuracy
acc_RFR=metrics.r2_score(y_test, y_pred)
print('Accuracy DT: ',acc_RFR)
print('MSE: ',metrics.mean_squared_error(y_test, y_pred))

Performance on training data using RFR: 0.9999982767016157
Performance on testing data using RFR: 0.9998146043562646
Accuracy DT:  0.9998146043562646
MSE:  0.01997927012183766


In [None]:
kfold = KFold(n_splits=10,random_state=42,shuffle=True)
results = cross_val_score(model,X,y,cv=kfold)
print(results.mean()*100.0,results.std()*100.0)

### Random forest regressor - imputed

In [26]:
X_imp = df_train_imputed.drop(['HR'],axis = 1)
y_imp = df_train_imputed['HR']
X_train_imp, X_test_imp, y_train_imp, y_test_imp = train_test_split(X_imp,y_imp,test_size = 0.3, random_state = 42)
X_train_imp.shape, X_test_imp.shape, y_train_imp.shape, y_test_imp.shape

((258502, 36), (110787, 36), (258502,), (110787,))

In [27]:
X_train_imp = sc.fit_transform(X_train_imp)
X_test_imp = sc.fit_transform(X_test_imp)
X_train_imp = pd.DataFrame(X_train_imp, columns = X.columns)
X_test_imp = pd.DataFrame(X_test_imp, columns = X.columns)

In [28]:
model_imp=RandomForestRegressor()
model_imp.fit(X_train_imp, y_train_imp)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

In [34]:
from sklearn import metrics
y_pred_imp = model_imp.predict(X_test_imp)
# performance on train data
print('Performance on training data using RFR:',model_imp.score(X_train_imp,y_train_imp))
# performance on test data
print('Performance on testing data using RFR:',model_imp.score(X_test_imp,y_test_imp))
#Evaluate the model using accuracy
acc_RFR_imp=metrics.r2_score(y_test_imp, y_pred_imp)
print('Accuracy DT: ',acc_RFR_imp)
print('MSE: ',metrics.mean_squared_error(y_test_imp, y_pred_imp))

Performance on training data using RFR: 0.9999987022564398
Performance on testing data using RFR: 0.9998831751337665
Accuracy DT:  0.9998831751337665
MSE:  0.01258970012671195


### Linear SVR
##### Imputed

In [35]:
from sklearn.svm import LinearSVR

regr = LinearSVR(random_state=0, tol=1e-5)
regr.fit(X_train,y_train)



LinearSVR(C=1.0, dual=True, epsilon=0.0, fit_intercept=True,
          intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000,
          random_state=0, tol=1e-05, verbose=0)

In [36]:
y_pred = regr.predict(X_test)
# performance on train data
print('Performance on training data using RFR:',regr.score(X_train,y_train))
# performance on test data
print('Performance on testing data using RFR:',regr.score(X_test,y_test))
#Evaluate the model using accuracy
acc_RFR=metrics.r2_score(y_test, y_pred)
print('Accuracy DT: ',acc_RFR)
print('MSE: ',metrics.mean_squared_error(y_test, y_pred))

Performance on training data using RFR: 0.9778520312903042
Performance on testing data using RFR: 0.9773735572335698
Accuracy DT:  0.9773735572335698
MSE:  2.4383518556230284


##### imputed

In [37]:
regr_imp = LinearSVR(random_state=0, tol=1e-5)
regr_imp.fit(X_train_imp,y_train_imp)



LinearSVR(C=1.0, dual=True, epsilon=0.0, fit_intercept=True,
          intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000,
          random_state=0, tol=1e-05, verbose=0)

In [38]:
y_pred_imp = regr_imp.predict(X_test_imp)
# performance on train data
print('Performance on training data using RFR:',regr_imp.score(X_train_imp,y_train_imp))
# performance on test data
print('Performance on testing data using RFR:',regr_imp.score(X_test_imp,y_test_imp))
#Evaluate the model using accuracy
acc_RFR_imp=metrics.r2_score(y_test_imp, y_pred_imp)
print('Accuracy DT: ',acc_RFR_imp)
print('MSE: ',metrics.mean_squared_error(y_test_imp, y_pred_imp))

Performance on training data using RFR: 0.9679473971511207
Performance on testing data using RFR: 0.9673769013779434
Accuracy DT:  0.9673769013779434
MSE:  3.515647328323491
