In [2]:
import pandas as pd 
import numpy as np
from numpy import loadtxt
import warnings

from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
import multiprocessing

from sklearn.metrics import mean_absolute_percentage_error as MAPE
from sklearn.metrics import mean_squared_error as MSE
import matplotlib.pyplot as plt

In [3]:
realdata = pd.read_csv(f"C:\\Users\\namil\\Documents\\stmi-lab-namila\\allmetrics1")
savedval = 0.111

def basemodel(participant, arr, start, end, topredict, removesnacks, nodinners):
    global realdata
    global savedval
    
    test = realdata
    
    #remove specified entries
    if nodinners:
        test = test[test['Meal Type'] != "dinner"]
    
    if removesnacks:
        if(len(test[test['snack in 3hrs'] == True]) == 0):
            arr.append(savedval)
            print(f"RMSREcopy:{savedval}")
            return savedval
        else:
            test = test[test['snack in 3hrs'] == False]

    valid = test[test['dexcom 3hr auc'] != "error"]
    valid = valid[valid['mets 3 hr auc'] != "error"]
    
    #for predicting macros
    #X = valid.loc[:, 'peakheight':'mets 3 hr avg'].to_numpy()
    X = valid.loc[:, start:end].to_numpy()
    #for predicting ppgr 
    #X = valid.iloc[:, 10:18].to_numpy()
    #X = valid.loc[:, 'mets 3 hr auc':'fat'].to_numpy()
    #X = valid.loc[:, 'calories':'fat'].to_numpy()
    
    Y = valid[topredict].to_numpy().astype(float)
    warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)


    #split into train and test sets
    seed = 7
    test_size = 0.25
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)
    
    # HP TUNING
    # Define the parameter grid
    param_grid = {'alpha': [0.0, 0.5, 1.0, 2.0],
                  'learning_rate': [0.3, 0.1, 0.01, 0.001],  # Learning rate of the model
                  'max_depth': [2, 3, 4, 5, 6],  # Maximum depth of each tree
                  'n_estimators': [100, 200, 250],  # Number of trees in the ensemble
                  }
    # Define the XGBoost regressor
    model = XGBRegressor()
    # Set up the grid search cross-validation
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
    # Perform the grid search
    grid_search.fit(X_train, y_train)
    # print(grid_search.best_params_)
    
    model = XGBRegressor(alpha = grid_search.best_params_['alpha'],
                        learning_rate = grid_search.best_params_['learning_rate'],
                        max_depth = grid_search.best_params_['max_depth'],
                        n_estimators = grid_search.best_params_['n_estimators'])
    #make model
    model.fit(X_train, y_train)
    prediction = model.predict(X_test)

#     plt.plot(y_test)
#     plt.plot(prediction)
#     plt.show()
    
#     # Fits the explainer
#     explainer = shap.Explainer(model.predict, X_test)
#     # Calculates the SHAP values - It takes some time
#     shap_values = explainer(X_train)
#     #shap.summary_plot(shap_values)
#     shap.plots.bar(shap_values)
    
    #sicong's normalized rmse
    errors = ((y_test - prediction)/y_test)**2
    rmsre = (np.average(errors))**.5
    print("RMSRE:", rmsre)
    
    arr.append(rmsre)
    return rmsre
    
def runfour(errors, start, end, topredict):
    global savedval
    savedval = basemodel(x, errors[0], start, end, topredict, 0, 1)
    basemodel(x, errors[1], start, end, topredict, 1, 1)
    savedval = basemodel(x, errors[2], start, end, topredict,  0, 0)
    basemodel(x, errors[3], start, end, topredict, 1, 0)

In [None]:
participants = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,26,27,28,29]
#participants = [26]
inputerrors = [[],[],[],[]]
inputerrors2 = [[],[],[],[]]

predpeakh = [[],[],[],[]]
predauc = [[],[],[],[]]
prediauc = [[],[],[],[]]
predpt = [[],[],[],[]]
predpd = [[],[],[],[]]
sbg = [[],[],[],[]]

for x in participants:
    print(x)
    
    global realdata
    realdata = pd.read_csv(f"C:\\Users\\namil\\Documents\\stmi-lab-namila\\allmetrics{x}")
    
    #predicting macros
    start = 'peakheight'
    end = 'mets 3 hr avg'
    
    pool = multiprocessing.Pool()
    pool.apply_async(runfour, (inputerrors, start, end, 'carbs'))
    pool.apply_async(runfour, (inputerrors2, start, end, 'calories'))
        
    #predicting ppgr
    start = 'mets 3 hr auc'
    end = 'fat'

    runfour(predpeakh, start, end, 'peakheight')
    runfour(predauc, start, end, 'dexcom 3hr auc')
    runfour(prediauc, start, end, 'iauc')
    runfour(predpt, start, end, 'peaktime')
    runfour(predpd, start, end, 'peakduration_40')
    runfour(sbg, start, end, 'startbg')
    
df = pd.DataFrame({'participants': participants, 
                  'carbs bl raw rmsre': inputerrors[0], 'carbs bl removed rmsre': inputerrors[1],
                  'carbs bld raw rmsre': inputerrors[2], 'carbs bld removed rmsre': inputerrors[3],
                  'calories bl raw rmsre': inputerrors2[0], 'calories bl removed rmsre': inputerrors2[1],
                  'calories bld raw rmsre': inputerrors2[2], 'calories bld removed rmsre': inputerrors2[3],
#                   'ph bl raw rmsre': predpeakh[0], 'ph bl removed rmsre': predpeakh[1],
#                   'ph bld raw rmsre': predpeakh[2], 'ph bld removed rmsre': predpeakh[3],
#                   'auc bl raw rmsre': predauc[0], 'auc bl removed rmsre': predauc[1],
#                   'auc bld raw rmsre': predauc[2], 'auc bld removed rmsre': predauc[3],
                  'iauc bl raw rmsre': prediauc[0], 'iauc bl removed rmsre': prediauc[1],
                  'iauc bld raw rmsre': prediauc[2], 'iauc bld removed rmsre': prediauc[3],
                  'pt bl raw rmsre': predpt[0], 'pt bl removed rmsre': predpt[1],
                  'pt bld raw rmsre': predpt[2], 'pt bld removed rmsre': predpt[3],
                  'pd40 bl raw rmsre': predpd[0], 'pd40 bl removed rmsre': predpd[1],
                  'pd40 bld raw rmsre': predpd[2], 'pd40 bld removed rmsre': predpd[3],
                  'sbg bl raw rmsre': sbg[0], 'sbg bl removed rmsre': sbg[1],
                  'sbg bld raw rmsre': sbg[2], 'sbg bld removed rmsre': sbg[3]
                  })
df.to_csv("tunedactivitynohr")

#print(f"carbs raw error: {np.average(rawerrors)} \ncarbs removed error: {np.average(removederrors)} \ncarbs bld raw error: {np.average(drawerrors)}\ncarbs bld removed: {np.average(dremovederrors)}")
print("done!")

1
RMSRE: 0.07155159207046084
RMSRE: 0.10337331343666169
RMSRE: 0.09772897662003471
RMSRE: 0.13419160323559137
RMSRE: 0.16880648991460795
RMSRE: 0.16705725317933554
RMSRE: 0.11176208695357338
RMSRE: 0.15934799425949892
RMSRE: 0.9891783498658072
RMSRE: 1.127345825147201
RMSRE: 0.8682737749819984
RMSRE: 0.9697616365772538
RMSRE: 4.4610925571281985
RMSRE: 0.5312843280687165
RMSRE: 2.6681503933037107
RMSRE: 2.0468969235455283
RMSRE: 0.23172324274876102
RMSRE: 0.2026621144694682
RMSRE: 0.20810368442653132
RMSRE: 0.10233938497397313
RMSRE: 0.14306836358990985
RMSRE: 0.16408864133599887
RMSRE: 0.1129024386883258
RMSRE: 0.1764066581836325
2
RMSRE: 0.222089153072149
RMSRE: 0.2524770299618984
RMSRE: 0.19993450638606716
RMSRE: 0.1776818656833931
RMSRE: 0.1888332433028632
RMSRE: 0.2099512401006849
RMSRE: 0.20868913210751117
RMSRE: 0.12301285666735436
RMSRE: 1.2209121370743927
RMSRE: 0.6014937413993955
RMSRE: 1.6033413003953423
RMSRE: 0.8029190173286797
RMSRE: 2.52114588113588
RMSRE: 0.5413494069057

RMSRE: 1.3213505821781653
RMSRE: 0.32309447034048505
RMSREcopy:0.32309447034048505
RMSRE: 0.2123395588217771
RMSRE: 0.5337474907437384
RMSRE: 0.5620829072961259
RMSREcopy:0.5620829072961259
RMSRE: 0.1549616270894436
RMSRE: 0.19855673544612104
14
RMSRE: 0.16651235485745694
RMSREcopy:0.16651235485745694
RMSRE: 0.12376615115334412
RMSRE: 0.12632840701212458
RMSRE: 0.1584896169158064
RMSREcopy:0.1584896169158064
RMSRE: 0.13472738191563102
RMSRE: 0.15621602653327452
RMSRE: 0.6662625029147069
RMSREcopy:0.6662625029147069
RMSRE: 1.0081405836542383
RMSRE: 1.7319885681203993
RMSRE: 1.4890627981254578
RMSREcopy:1.4890627981254578
