In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 
import seaborn as sns
import scipy.stats as stats

from sklearn import preprocessing
from sklearn import metrics
from sklearn import model_selection
from sklearn import feature_selection

from sklearn.preprocessing import StandardScaler

from sklearn.tree import DecisionTreeRegressor

from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.ensemble import VotingRegressor
#from xgboost import XGBRegressor


import warnings
warnings.filterwarnings('ignore')
from scipy.stats import zscore
from datetime import datetime
from IPython.display import display
pd.set_option('display.max_columns', None)

In [2]:
def drop_high_correlations(DataFrame, CorrelationThreshold):
    
    df_local = DataFrame.copy(deep=True)
    
    # Create the correlation matrix with absolute values
    corr_matrix = df_local.corr().abs()
    
    # Select upper triangle of correlation matrix
    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))
    
    # Find index of columns with correlation greater than CorrelationThreshold
    to_drop = [column for column in upper.columns if any(upper[column] > CorrelationThreshold)]
    
    # Drop the correlated columns identified above
    df_local.drop(to_drop, axis=1, inplace=True)
    
    return df_local

In [3]:
# Loading the datasets
#df_time_train = pd.read_csv('Data/Train/time_domain_features_train.csv')
#df_freq_train = pd.read_csv('Data/Train/frequency_domain_features_train.csv')
#df_nlf_train = pd.read_csv('Data/Train/heart_rate_non_linear_features_train.csv')

#df_time_test = pd.read_csv('Data/Test/time_domain_features_test.csv')
#df_freq_test = pd.read_csv('Data/Test/frequency_domain_features_test.csv')
#df_nlf_test = pd.read_csv('Data/Test/heart_rate_non_linear_features_test.csv')

# df_time = pd.read_csv('/content/drive/MyDrive/PGP-AIML/Hackathon Dec-2020/Data/Train/time_domain_features_train.csv')
# df_freq = pd.read_csv('/content/drive/MyDrive/PGP-AIML/Hackathon Dec-2020/Data/Train/frequency_domain_features_train.csv')
# df_nlf = pd.read_csv('/content/drive/MyDrive/PGP-AIML/Hackathon Dec-2020/Data/Train/heart_rate_non_linear_features_train.csv')

# df_time_test = pd.read_csv('/content/drive/MyDrive/PGP-AIML/Hackathon Dec-2020/Data/Test/time_domain_features_test.csv')
# df_freq_test = pd.read_csv('/content/drive/MyDrive/PGP-AIML/Hackathon Dec-2020/Data/Test/frequency_domain_features_test.csv')
# df_nlf_test = pd.read_csv('/content/drive/MyDrive/PGP-AIML/Hackathon Dec-2020/Data/Test/heart_rate_non_linear_features_test.csv')
df_time_train = pd.read_csv("../Files/Train Data/Train Data Zip/time_domain_features_train.csv")
df_freq_train = pd.read_csv("../Files/Train Data/Train Data Zip/frequency_domain_features_train.csv")
df_nlf_train = pd.read_csv("../Files/Train Data/Train Data Zip/heart_rate_non_linear_features_train.csv")

df_time_test = pd.read_csv('../Files/Test Data/Test Zip/time_domain_features_test.csv')
df_freq_test = pd.read_csv('../Files/Test Data/Test Zip/frequency_domain_features_test.csv')
df_nlf_test = pd.read_csv('../Files/Test Data/Test Zip/heart_rate_non_linear_features_test.csv')

df_test_y = pd.read_csv('C:\Piyush\Study\Great Learning\Hackathons\Files\SolutionFiles\RandomForestRegressor/0.018414562355024856__RandomForestRegressor__submission_0.2610179.csv')

In [4]:
# Merging all the datasets into one single datasets
df_train = pd.concat([df_time_train, df_freq_train, df_nlf_train], axis=1, sort=False)
df_test = pd.concat([df_time_test, df_freq_test, df_nlf_test], axis=1, sort=False)

In [5]:
df_train.columns

Index(['MEAN_RR', 'MEDIAN_RR', 'SDRR', 'RMSSD', 'SDSD', 'SDRR_RMSSD', 'HR',
       'pNN25', 'pNN50', 'KURT', 'SKEW', 'MEAN_REL_RR', 'MEDIAN_REL_RR',
       'SDRR_REL_RR', 'RMSSD_REL_RR', 'SDSD_REL_RR', 'SDRR_RMSSD_REL_RR',
       'KURT_REL_RR', 'SKEW_REL_RR', 'uuid', 'uuid', 'VLF', 'VLF_PCT', 'LF',
       'LF_PCT', 'LF_NU', 'HF', 'HF_PCT', 'HF_NU', 'TP', 'LF_HF', 'HF_LF',
       'uuid', 'SD1', 'SD2', 'sampen', 'higuci', 'datasetId', 'condition'],
      dtype='object')

In [6]:
# Reordering the columns of the dataframe
df_train = df_train[['MEAN_RR', 'MEDIAN_RR', 'SDRR', 'RMSSD', 'SDSD', 'SDRR_RMSSD',
       'pNN25', 'pNN50', 'KURT', 'SKEW', 'MEAN_REL_RR', 'MEDIAN_REL_RR',
       'SDRR_REL_RR', 'RMSSD_REL_RR', 'SDSD_REL_RR', 'SDRR_RMSSD_REL_RR',
       'KURT_REL_RR', 'SKEW_REL_RR', 'VLF', 'VLF_PCT', 'LF',
       'LF_PCT', 'LF_NU', 'HF', 'HF_PCT', 'HF_NU', 'TP', 'LF_HF', 'HF_LF',
       'SD1', 'SD2', 'sampen', 'higuci', 'datasetId', 'condition', 'HR']]


df_test = df_test[['MEAN_RR', 'MEDIAN_RR', 'SDRR', 'RMSSD', 'SDSD', 'SDRR_RMSSD',
       'pNN25', 'pNN50', 'KURT', 'SKEW', 'MEAN_REL_RR', 'MEDIAN_REL_RR',
       'SDRR_REL_RR', 'RMSSD_REL_RR', 'SDSD_REL_RR', 'SDRR_RMSSD_REL_RR',
       'KURT_REL_RR', 'SKEW_REL_RR', 'VLF', 'VLF_PCT', 'LF',
       'LF_PCT', 'LF_NU', 'HF', 'HF_PCT', 'HF_NU', 'TP', 'LF_HF', 'HF_LF',
       'SD1', 'SD2', 'sampen', 'higuci', 'datasetId', 'condition']]


In [7]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 369289 entries, 0 to 369288
Data columns (total 36 columns):
MEAN_RR              369289 non-null float64
MEDIAN_RR            369289 non-null float64
SDRR                 369289 non-null float64
RMSSD                369289 non-null float64
SDSD                 369289 non-null float64
SDRR_RMSSD           369289 non-null float64
pNN25                369289 non-null float64
pNN50                369289 non-null float64
KURT                 369289 non-null float64
SKEW                 369289 non-null float64
MEAN_REL_RR          369289 non-null float64
MEDIAN_REL_RR        369289 non-null float64
SDRR_REL_RR          369289 non-null float64
RMSSD_REL_RR         369289 non-null float64
SDSD_REL_RR          369289 non-null float64
SDRR_RMSSD_REL_RR    369289 non-null float64
KURT_REL_RR          369289 non-null float64
SKEW_REL_RR          369289 non-null float64
VLF                  369289 non-null float64
VLF_PCT              369289 non-nu

In [8]:
df_train.head()

Unnamed: 0,MEAN_RR,MEDIAN_RR,SDRR,RMSSD,SDSD,SDRR_RMSSD,pNN25,pNN50,KURT,SKEW,MEAN_REL_RR,MEDIAN_REL_RR,SDRR_REL_RR,RMSSD_REL_RR,SDSD_REL_RR,SDRR_RMSSD_REL_RR,KURT_REL_RR,SKEW_REL_RR,VLF,VLF_PCT,LF,LF_PCT,LF_NU,HF,HF_PCT,HF_NU,TP,LF_HF,HF_LF,SD1,SD2,sampen,higuci,datasetId,condition,HR
0,885.157845,853.76373,140.972741,15.554505,15.553371,9.063146,11.133333,0.533333,-0.856554,0.335218,-0.000203,-0.000179,0.01708,0.007969,0.007969,2.143342,-0.856554,0.335218,2661.894136,72.203287,1009.249419,27.375666,98.485263,15.522603,0.421047,1.514737,3686.666157,65.018055,0.01538,11.001565,199.061782,2.139754,1.163485,2,no stress,69.499952
1,939.425371,948.357865,81.317742,12.964439,12.964195,6.272369,5.6,0.0,-0.40819,-0.155286,-5.9e-05,0.000611,0.013978,0.004769,0.004769,2.930855,-0.40819,-0.155286,2314.26545,76.975728,690.113275,22.954139,99.695397,2.108525,0.070133,0.304603,3006.487251,327.296635,0.003055,9.170129,114.634458,2.174499,1.084711,2,interruption,64.36315
2,898.186047,907.00686,84.497236,16.305279,16.305274,5.182201,13.066667,0.2,0.351789,-0.656813,-1.1e-05,-0.000263,0.018539,0.008716,0.008716,2.127053,0.351789,-0.656813,1373.887112,51.152225,1298.222619,48.335104,98.950472,13.769729,0.512671,1.049528,2685.879461,94.28091,0.010607,11.533417,118.939253,2.13535,1.176315,2,interruption,67.450066
3,881.757865,893.46003,90.370537,15.720468,15.720068,5.748591,11.8,0.133333,-0.504947,-0.386138,0.000112,0.000494,0.017761,0.00866,0.00866,2.050988,-0.504947,-0.386138,2410.357408,70.180308,1005.981659,29.290305,98.224706,18.181913,0.529387,1.775294,3434.52098,55.328701,0.018074,11.119476,127.318597,2.178341,1.179688,2,no stress,68.809562
4,809.625331,811.184865,62.766242,19.213819,19.213657,3.266724,20.2,0.2,-0.548408,-0.154252,-0.0001,-0.002736,0.023715,0.013055,0.013055,1.816544,-0.548408,-0.154252,1151.17733,43.918366,1421.782051,54.24216,96.720007,48.215822,1.839473,3.279993,2621.175204,29.487873,0.033912,13.590641,87.718281,2.221121,1.249612,2,no stress,74.565728


In [9]:
df_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41033 entries, 0 to 41032
Data columns (total 35 columns):
MEAN_RR              41033 non-null float64
MEDIAN_RR            41033 non-null float64
SDRR                 41033 non-null float64
RMSSD                41033 non-null float64
SDSD                 41033 non-null float64
SDRR_RMSSD           41033 non-null float64
pNN25                41033 non-null float64
pNN50                41033 non-null float64
KURT                 41033 non-null float64
SKEW                 41033 non-null float64
MEAN_REL_RR          41033 non-null float64
MEDIAN_REL_RR        41033 non-null float64
SDRR_REL_RR          41033 non-null float64
RMSSD_REL_RR         41033 non-null float64
SDSD_REL_RR          41033 non-null float64
SDRR_RMSSD_REL_RR    41033 non-null float64
KURT_REL_RR          41033 non-null float64
SKEW_REL_RR          41033 non-null float64
VLF                  41033 non-null float64
VLF_PCT              41033 non-null float64
LF         

In [10]:
df_test.head()

Unnamed: 0,MEAN_RR,MEDIAN_RR,SDRR,RMSSD,SDSD,SDRR_RMSSD,pNN25,pNN50,KURT,SKEW,MEAN_REL_RR,MEDIAN_REL_RR,SDRR_REL_RR,RMSSD_REL_RR,SDSD_REL_RR,SDRR_RMSSD_REL_RR,KURT_REL_RR,SKEW_REL_RR,VLF,VLF_PCT,LF,LF_PCT,LF_NU,HF,HF_PCT,HF_NU,TP,LF_HF,HF_LF,SD1,SD2,sampen,higuci,datasetId,condition
0,934.665288,939.03173,82.139495,11.801781,11.801772,6.959924,3.933333,0.133333,-0.680262,-0.233075,1.6e-05,0.000288,0.012933,0.004578,0.004578,2.825038,-0.680262,-0.233075,1868.532278,76.511189,570.643114,23.366245,99.478197,2.993254,0.122565,0.521803,2442.168645,190.643094,0.005245,8.347898,115.862444,2.209659,1.100715,2,time pressure
1,817.06238,816.33879,55.492332,20.55881,20.558768,2.6992,24.6,0.533333,-0.034454,-0.051689,4e-05,-0.002749,0.025148,0.013921,0.013921,1.806517,-0.034454,-0.051689,568.742845,26.30135,1553.971621,71.862973,97.509212,39.69485,1.835677,2.490788,2162.409316,39.14794,0.025544,14.542096,77.118903,2.186132,1.290615,2,no stress
2,876.762022,894.19889,88.69082,13.853737,13.85373,6.401942,7.066667,0.533333,-0.206953,-0.58994,1e-05,-0.000172,0.015533,0.008149,0.008149,1.90618,-0.206953,-0.58994,2101.871207,75.836461,655.175895,23.639042,97.829386,14.536877,0.524497,2.170614,2771.583978,45.069921,0.022188,9.799336,125.044377,2.051571,1.226663,2,no stress
3,1038.640693,998.91429,213.72585,16.457194,16.454801,12.986774,10.8,1.866667,-0.820407,0.487198,-0.000238,-0.000464,0.016882,0.007587,0.007587,2.225116,-0.820407,0.487198,5757.544433,90.562305,592.913021,9.326123,98.817806,7.093235,0.111572,1.182194,6357.550689,83.588517,0.011963,11.639185,302.029812,2.08091,1.085143,2,time pressure
4,774.548508,778.90508,51.577855,10.273114,10.273049,5.020664,2.0,0.066667,1.738453,-0.005082,5.4e-05,-6e-06,0.013479,0.007811,0.007811,1.725606,1.738453,-0.005082,964.696325,70.256575,374.93953,27.305968,91.805057,33.468834,2.437457,8.194943,1373.104689,11.202647,0.089265,7.266567,72.579248,2.068728,1.252547,2,interruption


In [11]:
# Encoding the condition dimension using OneHotEncoder
from sklearn.preprocessing import OneHotEncoder

oe_condition = OneHotEncoder()
oe_condition_results_train = oe_condition.fit_transform(df_train[['condition']])
oe_condition_results_test = oe_condition.transform(df_test[['condition']])

In [12]:
# Adding the newly added columns (as a result of OneHotEncoding) to the original dataframe
df_train = df_train.join(pd.DataFrame(oe_condition_results_train.toarray(), columns=df_train['condition'].value_counts().index))
df_test = df_test.join(pd.DataFrame(oe_condition_results_test.toarray(), columns=df_test['condition'].value_counts().index))

In [13]:
df_train.columns

Index(['MEAN_RR', 'MEDIAN_RR', 'SDRR', 'RMSSD', 'SDSD', 'SDRR_RMSSD', 'pNN25',
       'pNN50', 'KURT', 'SKEW', 'MEAN_REL_RR', 'MEDIAN_REL_RR', 'SDRR_REL_RR',
       'RMSSD_REL_RR', 'SDSD_REL_RR', 'SDRR_RMSSD_REL_RR', 'KURT_REL_RR',
       'SKEW_REL_RR', 'VLF', 'VLF_PCT', 'LF', 'LF_PCT', 'LF_NU', 'HF',
       'HF_PCT', 'HF_NU', 'TP', 'LF_HF', 'HF_LF', 'SD1', 'SD2', 'sampen',
       'higuci', 'datasetId', 'condition', 'HR', 'no stress', 'interruption',
       'time pressure'],
      dtype='object')

In [14]:
df_train.head()

Unnamed: 0,MEAN_RR,MEDIAN_RR,SDRR,RMSSD,SDSD,SDRR_RMSSD,pNN25,pNN50,KURT,SKEW,MEAN_REL_RR,MEDIAN_REL_RR,SDRR_REL_RR,RMSSD_REL_RR,SDSD_REL_RR,SDRR_RMSSD_REL_RR,KURT_REL_RR,SKEW_REL_RR,VLF,VLF_PCT,LF,LF_PCT,LF_NU,HF,HF_PCT,HF_NU,TP,LF_HF,HF_LF,SD1,SD2,sampen,higuci,datasetId,condition,HR,no stress,interruption,time pressure
0,885.157845,853.76373,140.972741,15.554505,15.553371,9.063146,11.133333,0.533333,-0.856554,0.335218,-0.000203,-0.000179,0.01708,0.007969,0.007969,2.143342,-0.856554,0.335218,2661.894136,72.203287,1009.249419,27.375666,98.485263,15.522603,0.421047,1.514737,3686.666157,65.018055,0.01538,11.001565,199.061782,2.139754,1.163485,2,no stress,69.499952,0.0,1.0,0.0
1,939.425371,948.357865,81.317742,12.964439,12.964195,6.272369,5.6,0.0,-0.40819,-0.155286,-5.9e-05,0.000611,0.013978,0.004769,0.004769,2.930855,-0.40819,-0.155286,2314.26545,76.975728,690.113275,22.954139,99.695397,2.108525,0.070133,0.304603,3006.487251,327.296635,0.003055,9.170129,114.634458,2.174499,1.084711,2,interruption,64.36315,1.0,0.0,0.0
2,898.186047,907.00686,84.497236,16.305279,16.305274,5.182201,13.066667,0.2,0.351789,-0.656813,-1.1e-05,-0.000263,0.018539,0.008716,0.008716,2.127053,0.351789,-0.656813,1373.887112,51.152225,1298.222619,48.335104,98.950472,13.769729,0.512671,1.049528,2685.879461,94.28091,0.010607,11.533417,118.939253,2.13535,1.176315,2,interruption,67.450066,1.0,0.0,0.0
3,881.757865,893.46003,90.370537,15.720468,15.720068,5.748591,11.8,0.133333,-0.504947,-0.386138,0.000112,0.000494,0.017761,0.00866,0.00866,2.050988,-0.504947,-0.386138,2410.357408,70.180308,1005.981659,29.290305,98.224706,18.181913,0.529387,1.775294,3434.52098,55.328701,0.018074,11.119476,127.318597,2.178341,1.179688,2,no stress,68.809562,0.0,1.0,0.0
4,809.625331,811.184865,62.766242,19.213819,19.213657,3.266724,20.2,0.2,-0.548408,-0.154252,-0.0001,-0.002736,0.023715,0.013055,0.013055,1.816544,-0.548408,-0.154252,1151.17733,43.918366,1421.782051,54.24216,96.720007,48.215822,1.839473,3.279993,2621.175204,29.487873,0.033912,13.590641,87.718281,2.221121,1.249612,2,no stress,74.565728,0.0,1.0,0.0


In [15]:
# Removing the base columns of the principal components
# Removing Dataset ID column
# Removing 
df_train.drop(['VLF', 'LF', 'HF', 'datasetId', 'condition'], axis=1, inplace=True)
df_test.drop(['VLF', 'LF', 'HF', 'datasetId', 'condition'], axis=1, inplace=True)

In [16]:
df_train.columns

Index(['MEAN_RR', 'MEDIAN_RR', 'SDRR', 'RMSSD', 'SDSD', 'SDRR_RMSSD', 'pNN25',
       'pNN50', 'KURT', 'SKEW', 'MEAN_REL_RR', 'MEDIAN_REL_RR', 'SDRR_REL_RR',
       'RMSSD_REL_RR', 'SDSD_REL_RR', 'SDRR_RMSSD_REL_RR', 'KURT_REL_RR',
       'SKEW_REL_RR', 'VLF_PCT', 'LF_PCT', 'LF_NU', 'HF_PCT', 'HF_NU', 'TP',
       'LF_HF', 'HF_LF', 'SD1', 'SD2', 'sampen', 'higuci', 'HR', 'no stress',
       'interruption', 'time pressure'],
      dtype='object')

In [17]:
df_test.columns

Index(['MEAN_RR', 'MEDIAN_RR', 'SDRR', 'RMSSD', 'SDSD', 'SDRR_RMSSD', 'pNN25',
       'pNN50', 'KURT', 'SKEW', 'MEAN_REL_RR', 'MEDIAN_REL_RR', 'SDRR_REL_RR',
       'RMSSD_REL_RR', 'SDSD_REL_RR', 'SDRR_RMSSD_REL_RR', 'KURT_REL_RR',
       'SKEW_REL_RR', 'VLF_PCT', 'LF_PCT', 'LF_NU', 'HF_PCT', 'HF_NU', 'TP',
       'LF_HF', 'HF_LF', 'SD1', 'SD2', 'sampen', 'higuci', 'no stress',
       'interruption', 'time pressure'],
      dtype='object')

In [18]:
print(df_train.shape)
print(df_test.shape)

(369289, 34)
(41033, 33)


In [19]:
# Dropping one of the columns from all the high correlation pairs
df_train_lc = drop_high_correlations(df_train, 0.95)

In [20]:
df_train.shape

(369289, 34)

In [21]:
df_train_lc.drop('HR', axis=1).columns

Index(['MEAN_RR', 'SDRR', 'RMSSD', 'SDRR_RMSSD', 'pNN50', 'KURT', 'SKEW',
       'MEAN_REL_RR', 'MEDIAN_REL_RR', 'SDRR_REL_RR', 'RMSSD_REL_RR',
       'SDRR_RMSSD_REL_RR', 'VLF_PCT', 'LF_NU', 'HF_PCT', 'TP', 'LF_HF',
       'sampen', 'higuci', 'no stress', 'interruption', 'time pressure'],
      dtype='object')

In [22]:
df_train_lc.shape

(369289, 23)

In [23]:
df_test_lc = df_test[df_train_lc.drop('HR', axis=1).columns]

In [24]:
df_test_lc.shape

(41033, 22)

In [25]:
%%time
#sns.pairplot(data=df, diag_kind='kde')

Wall time: 0 ns


In [26]:
# Creating the training and testing sets
X_train = df_train_lc.drop('HR', axis=1)
X_test = df_test_lc
y_train = pd.DataFrame(df_train_lc['HR'])

In [27]:
print(X_train.shape)
print(X_test.shape)

(369289, 22)
(41033, 22)


In [28]:
# Applying power transform. standardize=True would apply the Standard Scaler also
pt = preprocessing.PowerTransformer(method='yeo-johnson', standardize=True)
X_train_PT = pt.fit_transform(X_train)
X_test_PT = pt.transform(X_test)

In [29]:
# Applying PCA
from sklearn.decomposition import PCA
# Limiting the number of components to 10
pca = PCA(n_components=10)
X_train_PCA = pca.fit_transform(X_train_PT)
X_test_PCA = pca.transform(X_test_PT)
print(pca.explained_variance_)
print(pca.explained_variance_ratio_)

[6.47605423 4.32359647 3.57966175 1.59392245 1.2054449  1.06034141
 1.04318135 0.89145007 0.5833419  0.43713352]
[0.2943653  0.19652658 0.16271146 0.07245082 0.0547928  0.04819721
 0.04741721 0.04052035 0.02651547 0.01986965]


#### Using BaggedSVM

In [None]:
print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"))

from sklearn.svm import SVR
from sklearn.ensemble import BaggingRegressor
svbr = BaggingRegressor(base_estimator=SVR(), n_estimators=100, random_state=76).fit(X_train_PCA, y_train.values.ravel())
y_pred_svbr = svbr.predict(X_test_PCA)

print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"))

20/12/2020 17:55:19


#### Using Random forest

In [None]:
%%time
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
regr = RandomForestRegressor(criterion='mae', max_depth=5)
print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"))
regr.fit(X_train_PCA, y_train.values.ravel())
y_pred_rf = regr.predict(X_test_PCA)
print('Random Forest Performance Parameters')
print('Training Score: {}'.format(regr.score(X_train_PCA, y_train)))
print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"))

20/12/2020 17:56:29


In [None]:
regr.get_params(deep=True)

In [None]:
df_f = pd.DataFrame(y_pred_rf)

In [None]:
df_sub = pd.concat([df_nlf_test['uuid'], df_f], axis=1, sort=False)

In [None]:
df_sub.columns = ['uuid', 'HR']

In [None]:
df_sub.to_csv('RF_submission.csv',index=False)

#### Using ADABoost Regressor

In [None]:
from sklearn.ensemble import AdaBoostRegressor
adaregr = AdaBoostRegressor(n_estimators=100)
print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"))
adaregr.fit(X_train_PCA, y_train.values.ravel())
y_pred_adab = adaregr.predict(X_test_PCA)
print('ADABoost Performance Parameters')
print('Training Score: {}'.format(adaregr.score(X_train_PCA, y_train)))
print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"))

In [None]:
adaregr.get_params(deep=True)

#### Using Gradient Boosting Regressor

In [None]:
from sklearn.ensemble import GradientBoostingRegressor
gbregr = GradientBoostingRegressor(min_samples_leaf=5)
print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"))
gbregr.fit(X_train_PCA, y_train.values.ravel())
y_pred_gbr = gbregr.predict(X_test_PCA)
print('Gradient Boost Performance Parameters')
print('Training Score: {}'.format(gbregr.score(X_train_PCA, y_train)))
print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"))

In [None]:
gbregr.get_params(deep=True)

#### Using XGBoost

In [None]:
import xgboost as xgb

In [None]:
xgbst = xgb.XGBRegressor(n_estimators=100, reg_lambda=1, gamma=0, max_depth=5)
print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"))
xgbst.fit(X_train_PCA, y_train.values.ravel())
y_pred_xgb = xgbst.predict(X_test_PCA)
print('ADABoost Performance Parameters')
print('Training Score: {}'.format(xgbst.score(X_train_PCA, y_train)))
print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"))

In [None]:
xgbst.get_params(deep=True)

In [None]:
y_pred_rf