In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
data = pd.read_csv("../Data/Biometric Data Analysis/train.csv").fillna(0)

In [3]:
data.rho.unique()
data.drop("id", axis=1, inplace=True)

In [4]:
# group by rho

data_rho_25 = data[data['rho'] == 25]
data_rho_20 = data[data['rho'] == 20]
data_rho_15 = data[data['rho'] == 15]
data_rho_10 = data[data['rho'] == 10]

## XG Boost

In [5]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [19]:
def XY(df):
    X = df.iloc[:,:-4]    
    
    y_hhb = df.iloc[:,-4]
    y_hbo2 = df.iloc[:,-3]
    y_ca = df.iloc[:,-2]
    y_na = df.iloc[:,-1]
    
    return X, y_hhb, y_hbo2, y_ca, y_na

### XG Boost Tunning with Hyperopt

https://medium.com/analytics-vidhya/hyperparameter-tuning-hyperopt-bayesian-optimization-for-xgboost-and-neural-network-8aedf278a1c9

In [20]:
import hyperopt
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials

In [36]:
def objective(space):
    print(space)
    clf = xgb.XGBRegressor(objective ='reg:squarederror', n_estimators =2200,colsample_bytree=space['colsample_bytree'],
                           learning_rate = .3,
                            max_depth = int(space['max_depth']),
                            min_child_weight = space['min_child_weight'],
                            subsample = space['subsample'],
                           gamma = space['gamma'],
                           reg_lambda = space['reg_lambda'],
                           reg_alpha = space['reg_alpha'])

    eval_set  = [(X_train, y_train), (X_test, y_test)]

    clf.fit(X_train, y_train,
            eval_set=eval_set, eval_metric="rmse",
            early_stopping_rounds=10,verbose=False)

    pred = clf.predict(X_test)
    mse_scr = mean_squared_error(y_test, pred)
    print ("SCORE:", np.sqrt(mse_scr))
    #change the metric if you like
    return {'loss':mse_scr, 'status': STATUS_OK }

space ={'max_depth': hp.quniform("x_max_depth", 4, 15, 1),
        'min_child_weight': hp.quniform ('x_min_child', 1, 10, 1),
        'subsample': hp.uniform ('x_subsample', 0.7, 1),
        'gamma' : hp.uniform ('x_gamma', 0.1,0.5),
        'colsample_bytree' : hp.uniform ('x_colsample_bytree', 0.7,1),
        'reg_lambda' : hp.uniform ('x_reg_lambda', 0,1),
        'reg_alpha' : hp.uniform ('x_reg_alpha', 0,1)
    }

In [37]:
X, y_hhb, y_hbo2, y_ca, y_na = XY(data_rho_25)

In [41]:
X_train, X_test, y_train, y_test = train_test_split(X, y_hhb, test_size=0.25, shuffle=1234)

trials = Trials()
best_hhb = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=100,
            trials=trials)

X_train, X_test, y_train, y_test = train_test_split(X, y_hbo2, test_size=0.25, shuffle=1234)

trials = Trials()
best_hbo2 = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=100,
            trials=trials)

X_train, X_test, y_train, y_test = train_test_split(X, y_ca, test_size=0.25, shuffle=1234)

trials = Trials()
best_ca = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=100,
            trials=trials)

X_train, X_test, y_train, y_test = train_test_split(X, y_na, test_size=0.25, shuffle=1234)

trials = Trials()
best_na = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=100,
            trials=trials)

{'colsample_bytree': 0.9553204200083195, 'gamma': 0.36897476068043245, 'max_depth': 6.0, 'min_child_weight': 8.0, 'reg_alpha': 0.6944384482658559, 'reg_lambda': 0.1334920176276707, 'subsample': 0.9570077640723701}
SCORE:                                                                                                                 
1.4200321999680545                                                                                                     
{'colsample_bytree': 0.7419918251794856, 'gamma': 0.18666128658433923, 'max_depth': 12.0, 'min_child_weight': 7.0, 'reg_alpha': 0.28368333207319774, 'reg_lambda': 0.3049170572066505, 'subsample': 0.9802391748182668}
SCORE:                                                                                                                 
1.4199335848230343                                                                                                     
{'colsample_bytree': 0.9862188854612, 'gamma': 0.4115564944082356, 'max_depth': 13.0, 'min

{'colsample_bytree': 0.9403171227940106, 'gamma': 0.3100740768281627, 'max_depth': 8.0, 'min_child_weight': 3.0, 'reg_alpha': 0.2492905550297504, 'reg_lambda': 0.5572475373202297, 'subsample': 0.9403381155666002}
SCORE:                                                                                                                 
1.431199053759652                                                                                                      
{'colsample_bytree': 0.7756545082881172, 'gamma': 0.24710684477009642, 'max_depth': 15.0, 'min_child_weight': 1.0, 'reg_alpha': 0.10187113195339226, 'reg_lambda': 0.3348904965620514, 'subsample': 0.7730207971186547}
SCORE:                                                                                                                 
1.4721896025618706                                                                                                     
{'colsample_bytree': 0.7022726504764706, 'gamma': 0.4316222755027675, 'max_depth': 15.0, 'm

{'colsample_bytree': 0.7105180321764235, 'gamma': 0.49883247523532503, 'max_depth': 7.0, 'min_child_weight': 4.0, 'reg_alpha': 0.378017150241842, 'reg_lambda': 0.9361449244309552, 'subsample': 0.7933827758916209}
SCORE:                                                                                                                 
1.45176592304656                                                                                                       
{'colsample_bytree': 0.7967510228752761, 'gamma': 0.4054353441684337, 'max_depth': 10.0, 'min_child_weight': 8.0, 'reg_alpha': 0.13733329806425518, 'reg_lambda': 0.833273372575996, 'subsample': 0.7790245676681401}
SCORE:                                                                                                                 
1.4246636903472112                                                                                                     
{'colsample_bytree': 0.7004804648902102, 'gamma': 0.35255567698073337, 'max_depth': 13.0, 'mi

{'colsample_bytree': 0.8323599291116848, 'gamma': 0.40733128555469605, 'max_depth': 5.0, 'min_child_weight': 5.0, 'reg_alpha': 0.7982355264953005, 'reg_lambda': 0.20501671236580982, 'subsample': 0.9599096040764435}
SCORE:                                                                                                                 
1.3996238858966534                                                                                                     
{'colsample_bytree': 0.8090396382026925, 'gamma': 0.39272498562314206, 'max_depth': 10.0, 'min_child_weight': 1.0, 'reg_alpha': 0.62548424314286, 'reg_lambda': 0.9393431116480964, 'subsample': 0.895870331229859}
SCORE:                                                                                                                 
1.418320035387506                                                                                                      
{'colsample_bytree': 0.7063392112441271, 'gamma': 0.4177705844015174, 'max_depth': 8.0, 'min_

{'colsample_bytree': 0.7579382577340219, 'gamma': 0.448684996447391, 'max_depth': 15.0, 'min_child_weight': 7.0, 'reg_alpha': 0.001781761539497162, 'reg_lambda': 0.5433731894423455, 'subsample': 0.8188251905788859}
SCORE:                                                                                                                 
1.3976466961354055                                                                                                     
{'colsample_bytree': 0.7833803346954437, 'gamma': 0.41418184061448904, 'max_depth': 14.0, 'min_child_weight': 4.0, 'reg_alpha': 0.2296422252525351, 'reg_lambda': 0.8096715645177412, 'subsample': 0.8782481635984298}
SCORE:                                                                                                                 
1.4605860820043048                                                                                                     
{'colsample_bytree': 0.7110406669615965, 'gamma': 0.3241463270076629, 'max_depth': 11.0, '

{'colsample_bytree': 0.9003675348008748, 'gamma': 0.4148245751691648, 'max_depth': 8.0, 'min_child_weight': 7.0, 'reg_alpha': 0.343812888427382, 'reg_lambda': 0.9977890121317716, 'subsample': 0.9431281586917596}
SCORE:                                                                                                                 
1.4673282746709642                                                                                                     
{'colsample_bytree': 0.9203581863710592, 'gamma': 0.13988486559703897, 'max_depth': 6.0, 'min_child_weight': 8.0, 'reg_alpha': 0.39088014859868697, 'reg_lambda': 0.9222277575150826, 'subsample': 0.9110392561311306}
SCORE:                                                                                                                 
1.42229558376141                                                                                                       
{'colsample_bytree': 0.8278244309435391, 'gamma': 0.38441755544103956, 'max_depth': 5.0, 'min

0.8121175797645593                                                                                                     
{'colsample_bytree': 0.7642722897187325, 'gamma': 0.45891075900274925, 'max_depth': 7.0, 'min_child_weight': 2.0, 'reg_alpha': 0.4104363750782326, 'reg_lambda': 0.9379118225521522, 'subsample': 0.9524348513734984}
SCORE:                                                                                                                 
0.8127160594143321                                                                                                     
{'colsample_bytree': 0.8028177208762711, 'gamma': 0.33162083424854705, 'max_depth': 13.0, 'min_child_weight': 8.0, 'reg_alpha': 0.6242933133802022, 'reg_lambda': 0.7703004793051847, 'subsample': 0.9188897188627678}
SCORE:                                                                                                                 
0.8428683526185606                                                                         

0.8078219295253755                                                                                                     
{'colsample_bytree': 0.7035134287161061, 'gamma': 0.27314514503661713, 'max_depth': 12.0, 'min_child_weight': 7.0, 'reg_alpha': 0.999288582038941, 'reg_lambda': 0.8367712806426627, 'subsample': 0.8924495312323206}
SCORE:                                                                                                                 
0.8462968582286651                                                                                                     
{'colsample_bytree': 0.8867919524167805, 'gamma': 0.37484994049126846, 'max_depth': 14.0, 'min_child_weight': 7.0, 'reg_alpha': 0.8549415569171394, 'reg_lambda': 0.5743882862059121, 'subsample': 0.9769881771467457}
SCORE:                                                                                                                 
0.8227606615656752                                                                         

0.8390052994088665                                                                                                     
{'colsample_bytree': 0.7506707174052305, 'gamma': 0.3402197137380725, 'max_depth': 9.0, 'min_child_weight': 9.0, 'reg_alpha': 0.5516781845433347, 'reg_lambda': 0.4359040937167458, 'subsample': 0.7939874630454679}
SCORE:                                                                                                                 
0.8574627779708387                                                                                                     
{'colsample_bytree': 0.8607724254488429, 'gamma': 0.31862615606686995, 'max_depth': 12.0, 'min_child_weight': 8.0, 'reg_alpha': 0.3464412570854215, 'reg_lambda': 0.8890484424872032, 'subsample': 0.7464039926687525}
SCORE:                                                                                                                 
0.8277523304454454                                                                          

0.8286530337234554                                                                                                     
{'colsample_bytree': 0.7219833415437927, 'gamma': 0.38452786295519115, 'max_depth': 15.0, 'min_child_weight': 7.0, 'reg_alpha': 0.3989945592131941, 'reg_lambda': 0.7950701224343683, 'subsample': 0.8553673212026023}
SCORE:                                                                                                                 
0.8172893198143182                                                                                                     
{'colsample_bytree': 0.7390542765916621, 'gamma': 0.3628924552998899, 'max_depth': 8.0, 'min_child_weight': 4.0, 'reg_alpha': 0.6739689918081757, 'reg_lambda': 0.0747451119098635, 'subsample': 0.8701097268909046}
SCORE:                                                                                                                 
0.854132614430493                                                                           

SCORE:                                                                                                                 
0.8340903167091797                                                                                                     
{'colsample_bytree': 0.7607310555546065, 'gamma': 0.34882186178454877, 'max_depth': 11.0, 'min_child_weight': 9.0, 'reg_alpha': 0.801207210661195, 'reg_lambda': 0.5057263970324177, 'subsample': 0.9419922202409798}
SCORE:                                                                                                                 
0.8079918960672663                                                                                                     
{'colsample_bytree': 0.8122308972811108, 'gamma': 0.4334504661114288, 'max_depth': 11.0, 'min_child_weight': 9.0, 'reg_alpha': 0.8085159732639704, 'reg_lambda': 0.19575771791666913, 'subsample': 0.9421570303498931}
SCORE:                                                                                     

SCORE:                                                                                                                 
0.8221114980483659                                                                                                     
{'colsample_bytree': 0.9641919078373461, 'gamma': 0.3545958787042476, 'max_depth': 15.0, 'min_child_weight': 8.0, 'reg_alpha': 0.6274769050775495, 'reg_lambda': 0.3730398567561962, 'subsample': 0.9808680359382133}
SCORE:                                                                                                                 
0.8042580295090482                                                                                                     
{'colsample_bytree': 0.9612003257440465, 'gamma': 0.3541254409298795, 'max_depth': 15.0, 'min_child_weight': 7.0, 'reg_alpha': 0.6170473663886424, 'reg_lambda': 0.39324681565754316, 'subsample': 0.8945087720928522}
SCORE:                                                                                     

{'colsample_bytree': 0.8374894036143516, 'gamma': 0.11772054146110125, 'max_depth': 8.0, 'min_child_weight': 8.0, 'reg_alpha': 0.7319845848991361, 'reg_lambda': 0.5627163635067222, 'subsample': 0.944969426340559}
SCORE:                                                                                                                 
2.763214697872158                                                                                                      
{'colsample_bytree': 0.9986974241979949, 'gamma': 0.38032854003996097, 'max_depth': 8.0, 'min_child_weight': 4.0, 'reg_alpha': 0.9173216405270236, 'reg_lambda': 0.1042579592432763, 'subsample': 0.7818330445258017}
SCORE:                                                                                                                 
2.7336708447079396                                                                                                     
{'colsample_bytree': 0.9683800468758572, 'gamma': 0.3119444228150262, 'max_depth': 14.0, 'min

{'colsample_bytree': 0.9121205432011129, 'gamma': 0.3468123602374936, 'max_depth': 4.0, 'min_child_weight': 2.0, 'reg_alpha': 0.6110923003118337, 'reg_lambda': 0.18558304927697847, 'subsample': 0.923176597690062}
SCORE:                                                                                                                 
2.6649350695925023                                                                                                     
{'colsample_bytree': 0.9435073907639336, 'gamma': 0.1975553569468526, 'max_depth': 7.0, 'min_child_weight': 4.0, 'reg_alpha': 0.36774780425624876, 'reg_lambda': 0.05797774462210872, 'subsample': 0.8248977992333131}
SCORE:                                                                                                                 
2.7621111257574076                                                                                                     
{'colsample_bytree': 0.8182820213513918, 'gamma': 0.12650565577771783, 'max_depth': 5.0, 'mi

{'colsample_bytree': 0.9011124854956216, 'gamma': 0.3996177674942555, 'max_depth': 8.0, 'min_child_weight': 1.0, 'reg_alpha': 0.3141894854828291, 'reg_lambda': 0.2946107618346326, 'subsample': 0.7086069730257054}
SCORE:                                                                                                                 
2.7656964758200195                                                                                                     
{'colsample_bytree': 0.7915034148062102, 'gamma': 0.18300645788880143, 'max_depth': 7.0, 'min_child_weight': 2.0, 'reg_alpha': 0.8033412840112438, 'reg_lambda': 0.7602622636141059, 'subsample': 0.9275110650229175}
SCORE:                                                                                                                 
2.7695247745497427                                                                                                     
{'colsample_bytree': 0.7636742323473855, 'gamma': 0.12672624480915073, 'max_depth': 4.0, 'min

{'colsample_bytree': 0.8214287764828097, 'gamma': 0.21223221931276975, 'max_depth': 4.0, 'min_child_weight': 5.0, 'reg_alpha': 0.3479981443931537, 'reg_lambda': 0.23391570706289544, 'subsample': 0.9135926043791193}
SCORE:                                                                                                                 
2.7060529114110516                                                                                                     
{'colsample_bytree': 0.9082298234056274, 'gamma': 0.20322450474232726, 'max_depth': 7.0, 'min_child_weight': 3.0, 'reg_alpha': 0.45038198454047285, 'reg_lambda': 0.08038671058393859, 'subsample': 0.9699952520152645}
SCORE:                                                                                                                 
2.7382095488805756                                                                                                     
{'colsample_bytree': 0.8045696198887012, 'gamma': 0.17124694342246907, 'max_depth': 6.0, 

{'colsample_bytree': 0.8503330168803153, 'gamma': 0.3874580302595394, 'max_depth': 4.0, 'min_child_weight': 3.0, 'reg_alpha': 0.08423792658841533, 'reg_lambda': 0.8090889329556633, 'subsample': 0.9130052409494824}
SCORE:                                                                                                                 
2.718758111831871                                                                                                      
{'colsample_bytree': 0.9042603605124832, 'gamma': 0.1672634700919391, 'max_depth': 11.0, 'min_child_weight': 8.0, 'reg_alpha': 0.6858875880225976, 'reg_lambda': 0.1600376723221911, 'subsample': 0.9489837522569791}
SCORE:                                                                                                                 
2.722484665439011                                                                                                      
{'colsample_bytree': 0.7677454558397456, 'gamma': 0.2746928288779809, 'max_depth': 6.0, 'min

1.8944750766572584                                                                                                     
{'colsample_bytree': 0.8677502447620238, 'gamma': 0.20903079762049515, 'max_depth': 14.0, 'min_child_weight': 10.0, 'reg_alpha': 0.12525259682497902, 'reg_lambda': 0.3474377199985399, 'subsample': 0.8407722018017346}
SCORE:                                                                                                                 
1.8936066074092512                                                                                                     
{'colsample_bytree': 0.7480686279405987, 'gamma': 0.14880824451796154, 'max_depth': 15.0, 'min_child_weight': 2.0, 'reg_alpha': 0.517532099205094, 'reg_lambda': 0.36918997528931263, 'subsample': 0.8265682469509874}
SCORE:                                                                                                                 
1.9696084068697488                                                                      

1.8806467702403598                                                                                                     
{'colsample_bytree': 0.7959350771318527, 'gamma': 0.32489521068705673, 'max_depth': 4.0, 'min_child_weight': 6.0, 'reg_alpha': 0.8397734188821324, 'reg_lambda': 0.07963391760498008, 'subsample': 0.9894429633682148}
SCORE:                                                                                                                 
1.8337805584717222                                                                                                     
{'colsample_bytree': 0.8325152055484424, 'gamma': 0.3628599294856716, 'max_depth': 7.0, 'min_child_weight': 4.0, 'reg_alpha': 0.7789674564611523, 'reg_lambda': 0.005842981612782655, 'subsample': 0.9287678192748428}
SCORE:                                                                                                                 
1.8669326646964186                                                                        

1.8114478360907897                                                                                                     
{'colsample_bytree': 0.8799137561734982, 'gamma': 0.2171121835858086, 'max_depth': 4.0, 'min_child_weight': 8.0, 'reg_alpha': 0.0719549118495267, 'reg_lambda': 0.9727617840643507, 'subsample': 0.9111126143003706}
SCORE:                                                                                                                 
1.8365433597787617                                                                                                     
{'colsample_bytree': 0.8337911294435829, 'gamma': 0.18373085861156144, 'max_depth': 11.0, 'min_child_weight': 9.0, 'reg_alpha': 0.008434715841920959, 'reg_lambda': 0.5163083906998996, 'subsample': 0.9736575516755399}
SCORE:                                                                                                                 
1.9146038517358699                                                                        

1.8884470553544492                                                                                                     
{'colsample_bytree': 0.7656603819034757, 'gamma': 0.2775547173077797, 'max_depth': 8.0, 'min_child_weight': 5.0, 'reg_alpha': 0.6411940556075034, 'reg_lambda': 0.16602003941821164, 'subsample': 0.9604315395712814}
SCORE:                                                                                                                 
1.8475226510389866                                                                                                     
{'colsample_bytree': 0.9805336066696674, 'gamma': 0.3134734700856103, 'max_depth': 4.0, 'min_child_weight': 3.0, 'reg_alpha': 0.7758989164261099, 'reg_lambda': 0.21457598947637102, 'subsample': 0.9328933727018511}
SCORE:                                                                                                                 
1.8259392446358524                                                                          

1.8179203593635447                                                                                                     
{'colsample_bytree': 0.8475568101923411, 'gamma': 0.21198579082003655, 'max_depth': 5.0, 'min_child_weight': 8.0, 'reg_alpha': 0.5042396784181412, 'reg_lambda': 0.6391978991467224, 'subsample': 0.7906222807828428}
SCORE:                                                                                                                 
1.8617151123675404                                                                                                     
{'colsample_bytree': 0.7393866481100643, 'gamma': 0.34441050869635803, 'max_depth': 6.0, 'min_child_weight': 7.0, 'reg_alpha': 0.2567607247663216, 'reg_lambda': 0.4515001380007797, 'subsample': 0.8763237926509359}
SCORE:                                                                                                                 
1.8391043256944262                                                                          

1.9302565931339288                                                                                                     
{'colsample_bytree': 0.7795327436717728, 'gamma': 0.4267003333825844, 'max_depth': 4.0, 'min_child_weight': 9.0, 'reg_alpha': 0.7458851503169025, 'reg_lambda': 0.3573914928476921, 'subsample': 0.9255213296800021}
SCORE:                                                                                                                 
1.7903244546901766                                                                                                     
{'colsample_bytree': 0.7835616128608737, 'gamma': 0.4903972162864186, 'max_depth': 6.0, 'min_child_weight': 10.0, 'reg_alpha': 0.7433090765885265, 'reg_lambda': 0.08753834958611341, 'subsample': 0.9901661661415246}
SCORE:                                                                                                                 
1.852963013155211                                                                           

In [42]:
def XG_Boost(df, num):
    X, y_hhb, y_hhbo2, y_ca, y_na = XY(df)
    
    # Split data
    X1_train, X1_test, y1_train, y1_test = train_test_split(X, y_hhb, test_size=0.2, shuffle=1234)
    X2_train, X2_test, y2_train, y2_test = train_test_split(X, y_hhbo2, test_size=0.2, shuffle=1234)
    X3_train, X3_test, y3_train, y3_test = train_test_split(X, y_ca, test_size=0.2, shuffle=1234)
    X4_train, X4_test, y4_train, y4_test = train_test_split(X, y_na, test_size=0.2, shuffle=1234)
    
    
    xg_reg_hbb = xgb.XGBRegressor(parameters=best_hhb)
    
    xg_reg_hbo2 = xgb.XGBRegressor(parameters=best_hbo2)
    
    xg_reg_ca = xgb.XGBRegressor(parameters=best_ca)
    
    xg_reg_na = xgb.XGBRegressor(parameters=best_na)
    
    xg_reg_hbb.fit(X1_train,y1_train)
    preds_hbb = xg_reg_hbb.predict(X1_test)

    xg_reg_hbo2.fit(X2_train,y2_train)
    preds_hbo2 = xg_reg_hbo2.predict(X2_test)

    xg_reg_ca.fit(X3_train,y3_train)
    preds_ca = xg_reg_ca.predict(X3_test)

    xg_reg_na.fit(X4_train,y4_train)
    preds_na = xg_reg_na.predict(X4_test)
    
    
    rmse_hbb = np.sqrt(mean_squared_error(y1_test, preds_hbb))
    rmse_hbo2 = np.sqrt(mean_squared_error(y2_test, preds_hbo2))
    rmse_ca = np.sqrt(mean_squared_error(y3_test, preds_ca))
    rmse_na = np.sqrt(mean_squared_error(y4_test, preds_na))

    print(num, " mm")
    print("RMSE - hbb  : %f" % (rmse_hbb))
    print("RMSE - hbo2 : %f" % (rmse_hbo2))
    print("RMSE - ca   : %f" % (rmse_ca))
    print("RMSE - na   : %f" % (rmse_na))
    
    
    return xg_reg_hbb, xg_reg_hbo2, xg_reg_ca, xg_reg_na

In [43]:
# data_rho_25, data_rho_20, data_rho_15, data_rho_10

hbb_25, hbo2_25, ca_25, na_25 = XG_Boost(data_rho_25, 25)

hbb_20, hbo2_20, ca_20, na_20 = XG_Boost(data_rho_20, 20)

hbb_15, hbo2_15, ca_15, na_15 = XG_Boost(data_rho_15, 15)

hbb_10, hbo2_10, ca_10, na_10 = XG_Boost(data_rho_10, 10)

25  mm
RMSE - hbb  : 1.440114
RMSE - hbo2 : 0.840065
RMSE - ca   : 2.657275
RMSE - na   : 1.921076
20  mm
RMSE - hbb  : 1.367592
RMSE - hbo2 : 0.835751
RMSE - ca   : 2.579431
RMSE - na   : 1.793540
15  mm
RMSE - hbb  : 1.428716
RMSE - hbo2 : 0.855677
RMSE - ca   : 2.484416
RMSE - na   : 1.659544
10  mm
RMSE - hbb  : 1.377497
RMSE - hbo2 : 0.901228
RMSE - ca   : 2.394575
RMSE - na   : 1.548741


In [44]:
test = pd.read_csv("../Data/Biometric Data Analysis/test.csv").fillna(0)

In [45]:
test_rho_25 = test[test['rho'] == 25]
test_rho_20 = test[test['rho'] == 20]
test_rho_15 = test[test['rho'] == 15]
test_rho_10 = test[test['rho'] == 10]

In [46]:
def prediction(hbb, hbo2, ca, na, df):
    
    test = df.iloc[:, 1:]
    
    preds_hbb = hbb.predict(test)
    preds_hbo2 = hbo2.predict(test)
    preds_ca = ca.predict(test)
    preds_na = na.predict(test)
    
    df_1 = df.copy()
    
    df_1['hhb'] = preds_hbb
    df_1['hbo2'] = preds_hbo2
    df_1['ca'] = preds_ca
    df_1['na'] = preds_na
    
    return df_1

In [47]:
# test_rho_25 - hbb_25, hbo2_25, ca_25, na_25
# test_rho_20 - hbb_20, hbo2_20, ca_20, na_20
# test_rho_15 - hbb_15, hbo2_15, ca_15, na_15
# test_rho_10 - hbb_10, hbo2_10, ca_10, na_10

rho_25 = prediction(hbb_25, hbo2_25, ca_25, na_25, test_rho_25)
rho_20 = prediction(hbb_20, hbo2_20, ca_20, na_20, test_rho_20)
rho_15 = prediction(hbb_15, hbo2_15, ca_15, na_15, test_rho_15)
rho_10 = prediction(hbb_10, hbo2_10, ca_10, na_10, test_rho_10)

In [48]:
temp = pd.concat([rho_25, rho_20, rho_15, rho_10]).sort_values('id')
id = temp.id
temp = temp.iloc[:, -4:]

In [49]:
submission = pd.concat([id, temp], axis=1)
submission.to_csv("submission/06.csv")

In [26]:
submission.head()

Unnamed: 0,id,hhb,hbo2,ca,na
0,10000,8.532439,5.101151,10.130732,3.09725
1,10001,5.707074,4.179231,7.486853,2.767264
2,10002,10.141157,4.541951,11.031622,2.940061
3,10003,8.519643,3.921633,9.055074,3.404695
4,10004,5.722478,3.620933,8.215435,2.713413


10000