|                                                                    title                                                                    |          authors          |  N |     age group    |     data     |  window |           Features           |    methods   |           results           |
|:-------------------------------------------------------------------------------------------------------------------------------------------:|:-------------------------:|:--:|:----------------:|:------------:|:-------:|:----------------------------:|:------------:|:---------------------------:|
| Improving energy expenditure estimates from wearable devices: A machine learning approach                                                   | Driscoll et al., 2020     | 59 |      44(14)      | accel+, demo |  1 min  |                              |   RF regres  | RMSE=1-1.37 METS,   R2=0.85 |
| Comparison of linear and non-linear models for predicting energy expenditure from raw accelerometer data                                    | Montoye et al., 2017      |    |                  |              |  30sec  | 6 to 36-time domain and demo | LM, LMM, ANN |                             |
| A random forest classifier for the prediction of energy expenditure and type of physical activity from wrist and hip accelerometers         | Elleis et al., 2014       | 40 |     35.8(12)     |    accel+    |  1 min  |      45-time-freq on svm     |   RF regres  |        RMSE=1.09 METS       |
| An   artificial neural network to estimate physical activity energy expenditure   and identify physical activity type from an accelerometer | Staudenmayer et al., 2009 | 48 |        35        |  accel, demo |  1 min  |               6              |      ANN     |     RMSE=1.22(0.08) METS    |
| Using   Deep Learning for Energy Expenditure Estimation with Wearable Sensors                                                               | Zhu et al., 2015          | 30 | 27.8(6.9)[19-45] | accel+, demo | 5.12sec |                              |      CNN     |                             |

In [None]:
import os
os.getcwd()

In [None]:
import os
os.chdir("...")
os.listdir()

In [None]:
import datetime
import pandas as pd
import pickle
import random
import glob
import numpy as np
import matplotlib.pyplot as plt
# Display figures inline in Jupyter notebook
import seaborn as sns
# Use seaborn style defaults and set the default figure size
sns.set(rc={'figure.figsize':(11, 4)})
from sklearn import linear_model
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from math import sqrt
import csv
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [None]:
def read_data(data, dataDir, withActivities= False): 

    ankle = ['ankle_x', 'ankle_y', 'ankle_z']
    wrist = ['wrist_x', 'wrist_y', 'wrist_z'] 

    print('read data:', data)
    df = pd.read_csv(dataDir+data+".csv", header = 0, index_col = None, low_memory=False)

    df_cosmed = df[['EEm', 'time']]
    df_cosmed.rename(columns={'time':'time_cosmed'})

    # df = df.drop('labels', axis=1).dropna(axis=0, how='any')

    time = df.time

    # accel data
    df_ankle = df[ankle]
    # df_ankle = SQSumSq(df_ankle) # accel data in SVM
    df_wrist = df[wrist]
    # df_wrist = SQSumSq(df_wrist) # accel data in SVM

    df = pd.concat([time, df_ankle, df_wrist], axis=1)
    #         df = df.rename(columns={0:'ankle',1:'wrist'})

    if withActivities == True:
        # load the predicted activities
        predAR_path = '/dataDir/origEEm_predActivities/'
        predAR = pd.read_csv(predAR_path+data+"_ARpred_origEEm.csv", 
                             header = 0, index_col = None, low_memory=False)
        predAR = predAR[['time','label']]

        # concat with activities
        df = df.merge(predAR, left_on='time', right_on='time', how='left')
        df['label'] = df['label'].fillna(method='ffill')
        df['label'] = df['label'].fillna(method='bfill')
    #eIF

    inv_yhat = np.empty((df.shape[0], 2))
    inv_yhat.fill(np.nan)
    inv_yhat[:df_cosmed.shape[0]] = df_cosmed
    df_cosmed = pd.DataFrame(inv_yhat, columns=['EEm','time_cos'])

    df = pd.concat([df, df_cosmed], axis=1)
    df['participant'] = data
    
    return df
#eDEF

In [None]:
# Feature construction
def featConstr(df, windowSize):
#     INPUT:
#             - df: the dataframe with accel and EEm data
#             - windowSize: the aggregation window in seconds to build the features
#     OUTPUT: Returns the set of features in a dataframe

    print('Building features ...')
    # Prepare predictors/target sets
    cols = ['time_cos', 'EEm']
    df_geneA = df.drop(cols, axis=1)
    df_cosmed = df[cols]

    df_geneA.set_index('time', inplace=True)
    df_geneA.index = pd.to_datetime(df_geneA.index, unit = "ms")

    df_cosmed.set_index('time_cos', inplace=True)
    df_cosmed.index = pd.to_datetime(df_cosmed.index, unit = "ms")

    preds = df_geneA.drop(['participant'], axis=1).sort_index()
    idx = df_cosmed['EEm'].notnull()
    target = df_cosmed['EEm'][idx].sort_index()
    
    # compute features
    key = str(windowSize) + 'S' # sampling rate for downsampling
    print('=== Mean ...')
    preds_mean = preds.resample(key).mean()
    aggFun = 'mean'
    preds_mean.columns = ['ankle_x_'+aggFun,'ankle_y_'+aggFun,'ankle_z_'+aggFun,
                         'wrist_x_'+aggFun,'wrist_y_'+aggFun,'wrist_z_'+aggFun]
    print('=== SD ...')
    preds_std = preds.resample(key).std()
    aggFun = 'std'
    preds_std.columns = ['ankle_x_'+aggFun,'ankle_y_'+aggFun,'ankle_z_'+aggFun,
                         'wrist_x_'+aggFun,'wrist_y_'+aggFun,'wrist_z_'+aggFun]
    print('=== Min ...')
    preds_min = preds.resample(key).min()
    aggFun = 'min'
    preds_min.columns = ['ankle_x_'+aggFun,'ankle_y_'+aggFun,'ankle_z_'+aggFun,
                         'wrist_x_'+aggFun,'wrist_y_'+aggFun,'wrist_z_'+aggFun]
    print('=== Max ...')
    preds_max = preds.resample(key).max()
    aggFun = 'max'
    preds_max.columns = ['ankle_x_'+aggFun,'ankle_y_'+aggFun,'ankle_z_'+aggFun,
                         'wrist_x_'+aggFun,'wrist_y_'+aggFun,'wrist_z_'+aggFun]
    print('=== 10th percentile ...')
    preds_q10 = preds.resample(key).quantile(.1)
    aggFun = 'q10'
    preds_q10.columns = ['ankle_x_'+aggFun,'ankle_y_'+aggFun,'ankle_z_'+aggFun,
                         'wrist_x_'+aggFun,'wrist_y_'+aggFun,'wrist_z_'+aggFun]
    print('=== 25th percentile ...')
    preds_q25 = preds.resample(key).quantile(.25)
    aggFun = 'q25'
    preds_q25.columns = ['ankle_x_'+aggFun,'ankle_y_'+aggFun,'ankle_z_'+aggFun,
                         'wrist_x_'+aggFun,'wrist_y_'+aggFun,'wrist_z_'+aggFun]
    print('=== 50th percentile ...')
    preds_q50 = preds.resample(key).quantile(.5)
    aggFun = 'q50'
    preds_q50.columns = ['ankle_x_'+aggFun,'ankle_y_'+aggFun,'ankle_z_'+aggFun,
                         'wrist_x_'+aggFun,'wrist_y_'+aggFun,'wrist_z_'+aggFun]
    print('=== 75th percentile ...')
    preds_q75 = preds.resample(key).quantile(.75)
    aggFun = 'q75'
    preds_q75.columns = ['ankle_x_'+aggFun,'ankle_y_'+aggFun,'ankle_z_'+aggFun,
                         'wrist_x_'+aggFun,'wrist_y_'+aggFun,'wrist_z_'+aggFun]
    print('=== 90th percentile ...')
    preds_q90 = preds.resample(key).quantile(.9)
    aggFun = 'q90'
    preds_q90.columns = ['ankle_x_'+aggFun,'ankle_y_'+aggFun,'ankle_z_'+aggFun,
                         'wrist_x_'+aggFun,'wrist_y_'+aggFun,'wrist_z_'+aggFun]

    features = pd.concat([preds_mean, preds_std, 
                          preds_min, preds_max,
                          preds_q10, preds_q25,
                          preds_q50, preds_q75,
                          preds_q90
                         ], axis=1)

    print('=== Covariance of adjacent window ...')
    ankle_x_cov = []
    ankle_y_cov = []
    ankle_z_cov = []
    wrist_x_cov = []
    wrist_y_cov = []
    wrist_z_cov = []

    for i in range(len(features.index)):
        a= features.index[i]
        b= a + datetime.timedelta(seconds=30)
        c= b + datetime.timedelta(seconds=30)

        x= preds.ankle_x.loc[a:b].reset_index().ankle_x
        y= preds.ankle_x.loc[b:c].reset_index().ankle_x
        ankle_x_cov.append(x.cov(y))

        x= preds.ankle_y.loc[a:b].reset_index().ankle_y
        y= preds.ankle_y.loc[b:c].reset_index().ankle_y
        ankle_y_cov.append(x.cov(y))

        x= preds.ankle_z.loc[a:b].reset_index().ankle_z
        y= preds.ankle_z.loc[b:c].reset_index().ankle_z
        ankle_z_cov.append(x.cov(y))

        x= preds.wrist_x.loc[a:b].reset_index().wrist_x
        y= preds.wrist_x.loc[b:c].reset_index().wrist_x
        wrist_x_cov.append(x.cov(y))

        x= preds.wrist_y.loc[a:b].reset_index().wrist_y
        y= preds.wrist_y.loc[b:c].reset_index().wrist_y
        wrist_y_cov.append(x.cov(y))

        x= preds.wrist_z.loc[a:b].reset_index().wrist_z
        y= preds.wrist_z.loc[b:c].reset_index().wrist_z
        wrist_z_cov.append(x.cov(y))
    #eFOR
    
    features['ankle_x_cov'] = ankle_x_cov
    features['ankle_y_cov'] = ankle_y_cov
    features['ankle_z_cov'] = ankle_z_cov
    features['wrist_x_cov'] = wrist_x_cov
    features['wrist_y_cov'] = wrist_y_cov
    features['wrist_z_cov'] = wrist_z_cov
    
    # add target
    features['EEm'] = target.resample(key).mean()

    # delete nan
    features = features.dropna()
    
    return features
#eDEF

In [None]:
def select_Data(name, randomSeed, numberForVal, df_gene):
# selects participant's data and builds the test*, val and training sets
# *test set is always the data from the selected (name) participant
# INPUT:
#     - name: The id-name of test participant (e.g. GOTOV00)
#     - randomSeed: the given random seed
#     - numberForVal: number of participants used as validation set
#     - df_gene: the data frame including predictors (accel,activity data), and target data (EE)
# OUTPUT: Returns train,test and validation df for the given name    

    df_train, df_val, df_test = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
#     data_to_select_random_ = []
    print(name)
    random.seed(randomSeed)
    print('Random seed is:',randomSeed)

    data_to_select_random_ = list(df_gene['ID'].unique())

    if name in data_to_select_random_: data_to_select_random_.remove(name) #eIF
    all_act_data = ['GOTOV08', 'GOTOV10', 'GOTOV11', 'GOTOV12', #'GOTOV16,'
                    'GOTOV17', 'GOTOV20', 'GOTOV21', 'GOTOV28',
                    'GOTOV29', 'GOTOV31', 'GOTOV33', 'GOTOV35'] 
    
    if name in all_act_data: all_act_data.remove(name)#eIF
    indoors_act_data = ['GOTOV22', 'GOTOV23', 'GOTOV13','GOTOV14', 
                        'GOTOV24', 'GOTOV25', 'GOTOV26', 'GOTOV27', 
                        'GOTOV30', 'GOTOV32', 'GOTOV34', 'GOTOV36']
    
    if name in indoors_act_data: indoors_act_data.remove(name) #fi
    validation_data = [random.sample(all_act_data, numberForVal)[0], 
                       random.sample(indoors_act_data, numberForVal)[0]]

    print('Getting val and train data.....')
    for data in data_to_select_random_: 
        if data in validation_data:
            df_val = df_val.append(df_gene.query('ID == "'+data+'"'))
            print('val_data:', data)
        else:
            df_train = df_train.append(df_gene.query('ID == "'+data+'"'))
        #eIF
    #eFOR

    print('Getting test data.....')
    df_test = df_test.append(df_gene.query('ID == "'+name+'"'))

    print('Done creating all dataframes.....')
    return df_train, df_val, df_test
#eDEF

In [None]:
dataDir= '/...'
saveDir = 'sequences/features_MontoyePaper_replication/'

## Read data and build features

In [None]:
dataDir= '/...'
saveDir = 'sequences/features_MontoyePaper_replication/'
features_bindAll = pd.DataFrame()

w = 30 # window seconds
for data in glob.glob(dataDir+'*csv'):
    data = os.path.splitext(os.path.basename(data))[0]
    # skip participants with no cosmoed or some accel are missing
    if data in ['GOTOV02', 'GOTOV03', 'GOTOV04', 'GOTOV19']:
        continue
    else:
        df = read_data(data, dataDir, withActivities= False)
#         print(df)
        features = featConstr(df, windowSize=w)
        features['ID'] = data
#         print(features)
        # save features
        print('Saving features.... ')
        features.to_csv(saveDir+data+'_features_'+str(w)+'sec.csv')  
        features_bindAll = features_bindAll.append(features)

    #eIF
#eFOR

In [None]:
features_bindAll.to_csv(saveDir+'features_bindAll_'+str(w)+'sec.csv')  

## Linear Model

In [None]:
w=30
print(saveDir+'features_bindAll_'+str(w)+'sec.csv')
features_bindAll = pd.read_csv(saveDir+'features_bindAll_'+str(w)+'sec.csv')
# print(features_bindAll.columns)

features_bindAll.set_index('time', inplace=True)
features_bindAll.index = pd.to_datetime(features_bindAll.index)
features_bindAll

In [None]:
ankle_cols = []
ankle_cols = [col for col in features_bindAll.columns if 'ankle' in col]
ankle_cols.append('EEm')
ankle_cols.append('ID')

wrist_cols =[]
wrist_cols = [col for col in features_bindAll.columns if 'wrist' in col]
wrist_cols.append('EEm')
wrist_cols.append('ID')


In [None]:
devices = 'aw'

if devices == 'ankle':
    features_bindAll = features_bindAll[ankle_cols]
elif devices == 'wrist':
    features_bindAll = features_bindAll[wrist_cols]
features_bindAll

In [None]:
features_bindAll.columns

In [None]:
EEm_details = pd.read_csv('EEm_details_per_partic.csv')  


numberForVal = 1 # the number of participants you want per gtoup as validation set (random seed is the number of participants' id)
# randomSeed = i

data = features_bindAll

model_name = 'LM_'+devices+'_'+str(w)+'sec'
figures_folder = 'images/' + model_name + '/predPlot/'
csv_folder = figures_folder

if not os.path.exists(figures_folder):
    os.makedirs(figures_folder)

get_all_patients_rsquared = pd.DataFrame(columns=['participant', 'rsquared', 'inRsquared', 'outRsquared'])
get_all_patients_rms = pd.DataFrame(columns=['participant', 'rms', 'inRms', 'outRms'])
stats_EEm_results = pd.DataFrame(columns=['participant', 
                                          'true_tot_mean', 'true_in_mean', 'true_out_mean', 
                                          'pred_tot_mean', 'pred_in_mean', 'pred_out_mean'])


In [None]:
df_allPred = pd.DataFrame()
df_allPredin = pd.DataFrame()
df_allPredout = pd.DataFrame()

for i in range(5, 37):
    randomSeed = i
    if i in [2,3,4,19]: #[12, 19, 23, 16]: 
        continue
#         print('Skip ID')
    else: 
        if len(str(i)) == 1:
            name = 'GOTOV0'+str(i)
        else: 
            name = 'GOTOV'+str(i)
        #eIF
    #eIF
    
    # select data for train, val, test sets           
    df_train, df_val, df_test = select_Data(name, randomSeed, numberForVal, data)        

    time = EEm_details['timeToSplit'][EEm_details['participant']==name].values[0] #time to split in and outdoors
    
    cols = ['EEm', 'ID']
    X_train = df_train.drop(cols, axis=1)
    y_train = df_train.EEm

    X_test = df_test.drop(cols, axis=1)
    y_test = df_test.EEm
    
    print('train model....')
    lm = linear_model.LinearRegression()
    model = lm.fit(X_train,y_train)

    predictions = model.predict(X_test)
    
    df_test = pd.DataFrame(data=y_test.values, index=y_test.index, columns=['True'])
    df_test['Predicted'] = predictions
    df_test['participant'] = name        
    df_allPred = df_allPred.append(df_test)

    rsquared = r2_score(y_test, predictions)
    print('rsquared...', rsquared)

    rms = sqrt(mean_squared_error(y_test, predictions))
    print('rms...', rms)
    
    print('Create figures ....')

    true_tot_mean = np.mean(df_test['True'])
    pred_tot_mean = np.mean(df_test['Predicted'])        
    true_tot_std  = np.std(df_test['True'])
    pred_tot_std  = np.std(df_test['Predicted'])        

    z = np.polyfit(df_test['True'], df_test['Predicted'], 1)
    p = np.poly1d(z)
    x_max = int(np.max(df_test['True'])+2)
    y_max = int(np.max(df_test['Predicted'])+2)

    plt.figure(figsize=(15,8))
    plt.plot(df_test['True'], label='True_EE')
    plt.plot(df_test['Predicted'], label='Predicted_EE')
    plt.legend(loc='upper left')
    plt.title(name)
    plt.savefig(figures_folder+name+'.pdf')
    # plt.show()
    plt.close()
    
    if np.isnan(EEm_details['outEEm'][EEm_details['participant']==name].values) == False:
        # indoors
        indoors = df_test[df_test.index <= time]#'2016-02-19 11:12:25.004000']
        indoors['participant'] = name        
        df_allPredin = df_allPredin.append(indoors)

        in_rsquared = r2_score(indoors['True'], indoors['Predicted'])
        in_rms = sqrt(mean_squared_error(indoors['True'], indoors['Predicted']))
        print('in :', in_rsquared)
        true_in_mean = np.mean(indoors['True'])
        pred_in_mean = np.mean(indoors['Predicted'])
        true_in_std  = np.std(indoors['True'])
        pred_in_std  = np.std(indoors['Predicted'])        

        # plot
        plt.figure(figsize=(15,8))
        plt.plot(indoors['True'], label='True_EE')
        plt.plot(indoors['Predicted'], label='Predicted_EE')
        plt.title(name)
        plt.legend(loc='upper left')
        plt.savefig(figures_folder+name+'_in.pdf')
    #             plt.show()
        plt.close()

        # outdoors
        outdoors = df_test[df_test.index > time]#'2016-02-19 11:12:25.004000']
        outdoors['participant'] = name        
        df_allPredout = df_allPredout.append(outdoors)

        out_rsquared = r2_score(outdoors['True'], outdoors['Predicted'])
        out_rms = sqrt(mean_squared_error(outdoors['True'], outdoors['Predicted']))            
        print('out:', out_rsquared)
        true_out_mean = np.mean(outdoors['True'])
        pred_out_mean = np.mean(outdoors['Predicted'])            
        true_out_std  = np.std(outdoors['True'])
        pred_out_std  = np.std(outdoors['Predicted'])        

        # plot
        plt.figure(figsize=(15,8))
        plt.title(name)
        plt.plot(outdoors['True'], label='True_EE')
        plt.plot(outdoors['Predicted'], label='Predicted_EE')
        plt.legend(loc='upper left')
        plt.savefig(figures_folder+name+'_out.pdf')
    #             plt.show()
        plt.close()

        # scatter plot
        plt.figure(figsize=(15,8))
        plt.xlim([-1,x_max])
        plt.ylim([-1,y_max])            
        plt.scatter(indoors['True'], indoors['Predicted'], c ='green', alpha=0.5, label='Indoors')
        plt.scatter(outdoors['True'], outdoors['Predicted'], c ='orange', alpha=0.5, label='Outdoors')
        plt.plot(df_test['True'],p(df_test['True']),"b")
        plt.legend(loc='upper right')
        plt.title(name)
        plt.xlabel('True')
        plt.ylabel('Predicted')
        plt.savefig(figures_folder+name+'_scatter.pdf')            
    #             plt.show()
        plt.close()

    else:
        in_rsquared   = None
        in_rms        = None
        out_rsquared  = None
        out_rms       = None
        true_in_mean  = None
        true_out_mean = None
        true_in_std   = None
        true_out_std  = None
        pred_in_mean  = None
        pred_out_mean = None
        pred_in_std   = None
        pred_out_std  = None

        # scatter plot
        plt.figure(figsize=(15,8))
        plt.xlim([-1,x_max])
        plt.ylim([-1,y_max])
        plt.scatter(df_test['True'], df_test['Predicted'], c ='green', alpha=0.5, label='Indoors')
        plt.plot(df_test['True'],p(df_test['True']),"b")            
        plt.legend(loc='upper right')
        plt.title(name)
        plt.xlabel('True')
        plt.ylabel('Predicted')
        plt.savefig(figures_folder+name+'_scatter.pdf')
    #             plt.show()
        plt.close()

        df_allPredin = df_allPredin.append(df_test)
    #fi

    get_all_patients_rsquared = get_all_patients_rsquared.append({'participant': name,
                                                                  'rsquared'   : rsquared,
                                                                  'inRsquared' : in_rsquared,
                                                                  'outRsquared': out_rsquared},
                                                         ignore_index=True) 

    get_all_patients_rms = get_all_patients_rms.append({'participant': name,
                                                        'rms'        : rms,
                                                        'inRms'      : in_rms,
                                                        'outRms'     : out_rms},
                                                         ignore_index=True) 

    stats_EEm_results = stats_EEm_results.append({'participant'  : name,
                                                  'true_tot_mean': true_tot_mean,
                                                  'true_in_mean' : true_in_mean,
                                                  'true_out_mean': true_out_mean,
                                                  'true_tot_std' : true_tot_std,
                                                  'true_in_std'  : true_in_std,
                                                  'true_out_std' : true_out_std,
                                                  'pred_tot_mean': pred_tot_mean,
                                                  'pred_in_mean' : pred_in_mean,

                                                  'pred_out_mean': pred_out_mean,
                                                  'pred_tot_std' : pred_tot_std,
                                                  'pred_in_std'  : pred_in_std,
                                                  'pred_out_std' : pred_out_std},
                                                 ignore_index=True)
#efor

get_all_patients_rsquared.to_csv(csv_folder+'rsquared.csv',index=False)
get_all_patients_rms.to_csv(csv_folder+'rms.csv',index=False)
stats_EEm_results.to_csv(csv_folder+'predStats.csv',index=False)

# scatter
true = stats_EEm_results['true_tot_mean']
pred = stats_EEm_results['pred_tot_mean']
# std_true  = stats_EEm_results['true_tot_std']
# std_pred  = stats_EEm_results['pred_tot_std']
# scatter
z = np.polyfit(true, pred, 1)
p = np.poly1d(z)
x_max = int(np.max(true)+1)
y_max = int(np.max(pred)+1)
plt.figure(figsize=(15,8))
plt.xlim([1,x_max])
plt.ylim([1,y_max])
plt.scatter(true, pred, c ='blue', alpha=0.5)#, label='Indoors')
plt.plot(true,p(true),"b")     
# plt.legend(loc='upper right')
plt.title('True Vs Predicted Average EEm per participant')
plt.xlabel('True')
plt.ylabel('Predicted')
plt.savefig(figures_folder+'all_scatter.pdf') 

print('Rsquared: '+str(get_all_patients_rsquared.rsquared.mean())+' /rms:'+ str(get_all_patients_rms.rms.mean()))
print('inRsquared: '+str(get_all_patients_rsquared.inRsquared.mean())+' /inRms:'+ str(get_all_patients_rms.inRms.mean()))
print('outRsquared: '+str(get_all_patients_rsquared.outRsquared.mean())+' /outRms:'+ str(get_all_patients_rms.outRms.mean()))

In [None]:
print('Rsquared: '+str(get_all_patients_rsquared.rsquared.mean())+' /rms:'+ str(get_all_patients_rms.rms.mean()))
print('inRsquared: '+str(get_all_patients_rsquared.inRsquared.mean())+' /inRms:'+ str(get_all_patients_rms.inRms.mean()))
print('outRsquared: '+str(get_all_patients_rsquared.outRsquared.mean())+' /outRms:'+ str(get_all_patients_rms.outRms.mean()))

## Linear Mixed Model

In [None]:
w=30
print(saveDir+'features_bindAll_'+str(w)+'sec.csv')
features_bindAll = pd.read_csv(saveDir+'features_bindAll_'+str(w)+'sec.csv')
# print(features_bindAll.columns)

features_bindAll.set_index('time', inplace=True)
features_bindAll.index = pd.to_datetime(features_bindAll.index)
features_bindAll

In [None]:
ankle_cols = []
ankle_cols = [col for col in features_bindAll.columns if 'ankle' in col]
ankle_cols.append('EEm')
ankle_cols.append('ID')

wrist_cols =[]
wrist_cols = [col for col in features_bindAll.columns if 'wrist' in col]
wrist_cols.append('EEm')
wrist_cols.append('ID')


In [None]:
devices = 'wrist' # aw or ankle or wrist

if devices == 'ankle':
    features_bindAll = features_bindAll[ankle_cols]
elif devices == 'wrist':
    features_bindAll = features_bindAll[wrist_cols]
features_bindAll

In [None]:
EEm_details = pd.read_csv('EEm_details_per_partic.csv')  


numberForVal = 1 # the number of participants you want per gtoup as validation set (random seed is the number of participants' id)
# randomSeed = i

data = features_bindAll

model_name = 'LMM_'+devices+'_'+str(w)+'sec'
figures_folder = 'images/' + model_name + '/predPlot/'
csv_folder = figures_folder

if not os.path.exists(figures_folder):
    os.makedirs(figures_folder)

get_all_patients_rsquared = pd.DataFrame(columns=['participant', 'rsquared', 'inRsquared', 'outRsquared'])
get_all_patients_rms = pd.DataFrame(columns=['participant', 'rms', 'inRms', 'outRms'])
stats_EEm_results = pd.DataFrame(columns=['participant', 
                                          'true_tot_mean', 'true_in_mean', 'true_out_mean', 
                                          'pred_tot_mean', 'pred_in_mean', 'pred_out_mean'])


In [None]:
# build formula
if devices == 'ankle':
    fmla ='EEm ~ ankle_x_mean' 
    for vname in data.columns[0:30].values:
        fmla = fmla + ' + ' +str(vname)
elif devices == 'wrist':
    fmla ='EEm ~ wrist_x_mean' 
    for vname in data.columns[0:30].values:
        fmla = fmla + ' + ' +str(vname)
elif devices == 'aw':
    fmla ='EEm ~ ankle_x_mean' 
    for vname in data.columns[1:60].values:
        fmla = fmla + ' + ' +str(vname)
   
fmla

In [None]:
df_allPred = pd.DataFrame()
df_allPredin = pd.DataFrame()
df_allPredout = pd.DataFrame()

for i in range(5, 37):
    randomSeed = i
    if i in [2,3,4,19]: #[12, 19, 23, 16]: 
        continue
#         print('Skip ID')
    else: 
        if len(str(i)) == 1:
            name = 'GOTOV0'+str(i)
        else: 
            name = 'GOTOV'+str(i)
        #eIF
    #eIF
    
    # select data for train, val, test sets           
    df_train, df_val, df_test = select_Data(name, randomSeed, numberForVal, data)        

    time = EEm_details['timeToSplit'][EEm_details['participant']==name].values[0] #time to split in and outdoors
    
    cols = ['EEm', 'ID']
    X_train = df_train.drop(cols, axis=1)
    y_train = df_train.EEm

    X_test = df_test.drop(cols, axis=1)
    y_test = df_test.EEm
    
    print('train model....')
    llm = smf.mixedlm(str(fmla), df_train, groups=df_train["ID"])
    model = llm.fit()
#     print(model.summary())

#     lm = linear_model.LinearRegression()
#     model = lm.fit(X_train,y_train)

    predictions = model.predict(X_test)
    
    df_test = pd.DataFrame(data=y_test.values, index=y_test.index, columns=['True'])
    df_test['Predicted'] = predictions
    df_test['participant'] = name        
    df_allPred = df_allPred.append(df_test)

    rsquared = r2_score(y_test, predictions)
    print('rsquared...', rsquared)

    rms = sqrt(mean_squared_error(y_test, predictions))
    print('rms...', rms)
    
    print('Create figures ....')

    true_tot_mean = np.mean(df_test['True'])
    pred_tot_mean = np.mean(df_test['Predicted'])        
    true_tot_std  = np.std(df_test['True'])
    pred_tot_std  = np.std(df_test['Predicted'])        

    z = np.polyfit(df_test['True'], df_test['Predicted'], 1)
    p = np.poly1d(z)
    x_max = int(np.max(df_test['True'])+2)
    y_max = int(np.max(df_test['Predicted'])+2)

    plt.figure(figsize=(15,8))
    plt.plot(df_test['True'], label='True_EE')
    plt.plot(df_test['Predicted'], label='Predicted_EE')
    plt.legend(loc='upper left')
    plt.title(name)
    plt.savefig(figures_folder+name+'.pdf')
    # plt.show()
    plt.close()
    
    if np.isnan(EEm_details['outEEm'][EEm_details['participant']==name].values) == False:
        # indoors
        indoors = df_test[df_test.index <= time]#'2016-02-19 11:12:25.004000']
        indoors['participant'] = name        
        df_allPredin = df_allPredin.append(indoors)

        in_rsquared = r2_score(indoors['True'], indoors['Predicted'])
        in_rms = sqrt(mean_squared_error(indoors['True'], indoors['Predicted']))
        print('in :', in_rsquared)
        true_in_mean = np.mean(indoors['True'])
        pred_in_mean = np.mean(indoors['Predicted'])
        true_in_std  = np.std(indoors['True'])
        pred_in_std  = np.std(indoors['Predicted'])        

        # plot
        plt.figure(figsize=(15,8))
        plt.plot(indoors['True'], label='True_EE')
        plt.plot(indoors['Predicted'], label='Predicted_EE')
        plt.title(name)
        plt.legend(loc='upper left')
        plt.savefig(figures_folder+name+'_in.pdf')
    #             plt.show()
        plt.close()

        # outdoors
        outdoors = df_test[df_test.index > time]#'2016-02-19 11:12:25.004000']
        outdoors['participant'] = name        
        df_allPredout = df_allPredout.append(outdoors)

        out_rsquared = r2_score(outdoors['True'], outdoors['Predicted'])
        out_rms = sqrt(mean_squared_error(outdoors['True'], outdoors['Predicted']))            
        print('out:', out_rsquared)
        true_out_mean = np.mean(outdoors['True'])
        pred_out_mean = np.mean(outdoors['Predicted'])            
        true_out_std  = np.std(outdoors['True'])
        pred_out_std  = np.std(outdoors['Predicted'])        

        # plot
        plt.figure(figsize=(15,8))
        plt.title(name)
        plt.plot(outdoors['True'], label='True_EE')
        plt.plot(outdoors['Predicted'], label='Predicted_EE')
        plt.legend(loc='upper left')
        plt.savefig(figures_folder+name+'_out.pdf')
    #             plt.show()
        plt.close()

        # scatter plot
        plt.figure(figsize=(15,8))
        plt.xlim([-1,x_max])
        plt.ylim([-1,y_max])            
        plt.scatter(indoors['True'], indoors['Predicted'], c ='green', alpha=0.5, label='Indoors')
        plt.scatter(outdoors['True'], outdoors['Predicted'], c ='orange', alpha=0.5, label='Outdoors')
        plt.plot(df_test['True'],p(df_test['True']),"b")
        plt.legend(loc='upper right')
        plt.title(name)
        plt.xlabel('True')
        plt.ylabel('Predicted')
        plt.savefig(figures_folder+name+'_scatter.pdf')            
    #             plt.show()
        plt.close()

    else:
        in_rsquared   = None
        in_rms        = None
        out_rsquared  = None
        out_rms       = None
        true_in_mean  = None
        true_out_mean = None
        true_in_std   = None
        true_out_std  = None
        pred_in_mean  = None
        pred_out_mean = None
        pred_in_std   = None
        pred_out_std  = None

        # scatter plot
        plt.figure(figsize=(15,8))
        plt.xlim([-1,x_max])
        plt.ylim([-1,y_max])
        plt.scatter(df_test['True'], df_test['Predicted'], c ='green', alpha=0.5, label='Indoors')
        plt.plot(df_test['True'],p(df_test['True']),"b")            
        plt.legend(loc='upper right')
        plt.title(name)
        plt.xlabel('True')
        plt.ylabel('Predicted')
        plt.savefig(figures_folder+name+'_scatter.pdf')
    #             plt.show()
        plt.close()

        df_allPredin = df_allPredin.append(df_test)
    #fi

    get_all_patients_rsquared = get_all_patients_rsquared.append({'participant': name,
                                                                  'rsquared'   : rsquared,
                                                                  'inRsquared' : in_rsquared,
                                                                  'outRsquared': out_rsquared},
                                                         ignore_index=True) 

    get_all_patients_rms = get_all_patients_rms.append({'participant': name,
                                                        'rms'        : rms,
                                                        'inRms'      : in_rms,
                                                        'outRms'     : out_rms},
                                                         ignore_index=True) 

    stats_EEm_results = stats_EEm_results.append({'participant'  : name,
                                                  'true_tot_mean': true_tot_mean,
                                                  'true_in_mean' : true_in_mean,
                                                  'true_out_mean': true_out_mean,
                                                  'true_tot_std' : true_tot_std,
                                                  'true_in_std'  : true_in_std,
                                                  'true_out_std' : true_out_std,
                                                  'pred_tot_mean': pred_tot_mean,
                                                  'pred_in_mean' : pred_in_mean,

                                                  'pred_out_mean': pred_out_mean,
                                                  'pred_tot_std' : pred_tot_std,
                                                  'pred_in_std'  : pred_in_std,
                                                  'pred_out_std' : pred_out_std},
                                                 ignore_index=True)
#efor

get_all_patients_rsquared.to_csv(csv_folder+'rsquared.csv',index=False)
get_all_patients_rms.to_csv(csv_folder+'rms.csv',index=False)
stats_EEm_results.to_csv(csv_folder+'predStats.csv',index=False)

# scatter
true = stats_EEm_results['true_tot_mean']
pred = stats_EEm_results['pred_tot_mean']
# std_true  = stats_EEm_results['true_tot_std']
# std_pred  = stats_EEm_results['pred_tot_std']
# scatter
z = np.polyfit(true, pred, 1)
p = np.poly1d(z)
x_max = int(np.max(true)+1)
y_max = int(np.max(pred)+1)
plt.figure(figsize=(15,8))
plt.xlim([1,x_max])
plt.ylim([1,y_max])
plt.scatter(true, pred, c ='blue', alpha=0.5)#, label='Indoors')
plt.plot(true,p(true),"b")     
# plt.legend(loc='upper right')
plt.title('True Vs Predicted Average EEm per participant')
plt.xlabel('True')
plt.ylabel('Predicted')
plt.savefig(figures_folder+'all_scatter.pdf') 

print('Rsquared: '+str(get_all_patients_rsquared.rsquared.mean())+' /rms:'+ str(get_all_patients_rms.rms.mean()))
print('inRsquared: '+str(get_all_patients_rsquared.inRsquared.mean())+' /inRms:'+ str(get_all_patients_rms.inRms.mean()))
print('outRsquared: '+str(get_all_patients_rsquared.outRsquared.mean())+' /outRms:'+ str(get_all_patients_rms.outRms.mean()))

In [None]:
print('Rsquared: '+str(get_all_patients_rsquared.rsquared.mean())+' /rms:'+ str(get_all_patients_rms.rms.mean()))
print('inRsquared: '+str(get_all_patients_rsquared.inRsquared.mean())+' /inRms:'+ str(get_all_patients_rms.inRms.mean()))
print('outRsquared: '+str(get_all_patients_rsquared.outRsquared.mean())+' /outRms:'+ str(get_all_patients_rms.outRms.mean()))

# RandomForest regressor

In [None]:
w=30
# print('')
features_bindAll = pd.read_csv('sequences/features_MontoyePaper_replication/features_bindAll_30sec.csv')
# print(features_bindAll.columns)

features_bindAll.set_index('time', inplace=True)
features_bindAll.index = pd.to_datetime(features_bindAll.index)
features_bindAll

In [None]:
ankle_cols = []
ankle_cols = [col for col in features_bindAll.columns if 'ankle' in col]
ankle_cols.append('EEm')
ankle_cols.append('ID')

wrist_cols =[]
wrist_cols = [col for col in features_bindAll.columns if 'wrist' in col]
wrist_cols.append('EEm')
wrist_cols.append('ID')


In [None]:
devices = 'aw' # aw or ankle or wrist

if devices == 'ankle':
    features_bindAll = features_bindAll[ankle_cols]
elif devices == 'wrist':
    features_bindAll = features_bindAll[wrist_cols]
features_bindAll

In [None]:
# Fitting Random Forest Regression to the dataset
# import the regressor
from sklearn.ensemble import RandomForestRegressor
  

EEm_details = pd.read_csv('EEm_details_per_partic.csv')  


numberForVal = 1 # the number of participants you want per gtoup as validation set (random seed is the number of participants' id)
# randomSeed = i

data = features_bindAll

model_name = 'RFregr_1000trees_'+devices+'_'+str(w)+'sec'
figures_folder = 'images/' + model_name + '/predPlot/'
csv_folder = figures_folder

if not os.path.exists(figures_folder):
    os.makedirs(figures_folder)

get_all_patients_rsquared = pd.DataFrame(columns=['participant', 'rsquared', 'inRsquared', 'outRsquared'])
get_all_patients_rms = pd.DataFrame(columns=['participant', 'rms', 'inRms', 'outRms'])
stats_EEm_results = pd.DataFrame(columns=['participant', 
                                          'true_tot_mean', 'true_in_mean', 'true_out_mean', 
                                          'pred_tot_mean', 'pred_in_mean', 'pred_out_mean'])

print(model_name)
print(figures_folder)

In [None]:
df_allPred = pd.DataFrame()
df_allPredin = pd.DataFrame()
df_allPredout = pd.DataFrame()

names =[5]
# for i in names:

for i in range(5, 37):
    randomSeed = i
    if i in [2,3,4,19]: #[12, 19, 23, 16]: 
        continue
#         print('Skip ID')
    else: 
        if len(str(i)) == 1:
            name = 'GOTOV0'+str(i)
        else: 
            name = 'GOTOV'+str(i)
        #eIF
    #eIF
    
    # select data for train, val, test sets           
    df_train, df_val, df_test = select_Data(name, randomSeed, numberForVal, data)        

    time = EEm_details['timeToSplit'][EEm_details['participant']==name].values[0] #time to split in and outdoors
    
    cols = ['EEm', 'ID']
    X_train = df_train.drop(cols, axis=1)
    y_train = df_train.EEm

    X_test = df_test.drop(cols, axis=1)
    y_test = df_test.EEm
    
    print('train model....')
#   create regressor object
    regressor = RandomForestRegressor(n_estimators = 1000, random_state = 0)
#   fit the regressor with x and y data
    model = regressor.fit(X_train, y_train)  
    pickle.dump(model, open('models/2nd_round_of_models_paper_rebuttal/RFregr_1000trees_aw_30sec/'+str(name), 'wb'))

    predictions = model.predict(X_test)
    
    df_test = pd.DataFrame(data=y_test.values, index=y_test.index, columns=['True'])
    df_test['Predicted'] = predictions
    df_test['participant'] = name        
    df_allPred = df_allPred.append(df_test)

    rsquared = r2_score(y_test, predictions)
    print('rsquared...', rsquared)

    rms = sqrt(mean_squared_error(y_test, predictions))
    print('rms...', rms)
    
    print('Create figures ....')

    true_tot_mean = np.mean(df_test['True'])
    pred_tot_mean = np.mean(df_test['Predicted'])        
    true_tot_std  = np.std(df_test['True'])
    pred_tot_std  = np.std(df_test['Predicted'])        

    z = np.polyfit(df_test['True'], df_test['Predicted'], 1)
    p = np.poly1d(z)
    x_max = int(np.max(df_test['True'])+2)
    y_max = int(np.max(df_test['Predicted'])+2)

    plt.figure(figsize=(15,8))
    plt.plot(df_test['True'], label='True_EE')
    plt.plot(df_test['Predicted'], label='Predicted_EE')
    plt.legend(loc='upper left')
    plt.title(name)
    plt.savefig(figures_folder+name+'.pdf')
    # plt.show()
    plt.close()
    
    if np.isnan(EEm_details['outEEm'][EEm_details['participant']==name].values) == False:
        # indoors
        indoors = df_test[df_test.index <= time]#'2016-02-19 11:12:25.004000']
        indoors['participant'] = name        
        df_allPredin = df_allPredin.append(indoors)

        in_rsquared = r2_score(indoors['True'], indoors['Predicted'])
        in_rms = sqrt(mean_squared_error(indoors['True'], indoors['Predicted']))
        print('in :', in_rsquared)
        true_in_mean = np.mean(indoors['True'])
        pred_in_mean = np.mean(indoors['Predicted'])
        true_in_std  = np.std(indoors['True'])
        pred_in_std  = np.std(indoors['Predicted'])        

        # plot
        plt.figure(figsize=(15,8))
        plt.plot(indoors['True'], label='True_EE')
        plt.plot(indoors['Predicted'], label='Predicted_EE')
        plt.title(name)
        plt.legend(loc='upper left')
        plt.savefig(figures_folder+name+'_in.pdf')
    #             plt.show()
        plt.close()

        # outdoors
        outdoors = df_test[df_test.index > time]#'2016-02-19 11:12:25.004000']
        outdoors['participant'] = name        
        df_allPredout = df_allPredout.append(outdoors)

        out_rsquared = r2_score(outdoors['True'], outdoors['Predicted'])
        out_rms = sqrt(mean_squared_error(outdoors['True'], outdoors['Predicted']))            
        print('out:', out_rsquared)
        true_out_mean = np.mean(outdoors['True'])
        pred_out_mean = np.mean(outdoors['Predicted'])            
        true_out_std  = np.std(outdoors['True'])
        pred_out_std  = np.std(outdoors['Predicted'])        

        # plot
        plt.figure(figsize=(15,8))
        plt.title(name)
        plt.plot(outdoors['True'], label='True_EE')
        plt.plot(outdoors['Predicted'], label='Predicted_EE')
        plt.legend(loc='upper left')
        plt.savefig(figures_folder+name+'_out.pdf')
    #             plt.show()
        plt.close()

        # scatter plot
        plt.figure(figsize=(15,8))
        plt.xlim([-1,x_max])
        plt.ylim([-1,y_max])            
        plt.scatter(indoors['True'], indoors['Predicted'], c ='green', alpha=0.5, label='Indoors')
        plt.scatter(outdoors['True'], outdoors['Predicted'], c ='orange', alpha=0.5, label='Outdoors')
        plt.plot(df_test['True'],p(df_test['True']),"b")
        plt.legend(loc='upper right')
        plt.title(name)
        plt.xlabel('True')
        plt.ylabel('Predicted')
        plt.savefig(figures_folder+name+'_scatter.pdf')            
    #             plt.show()
        plt.close()

    else:
        in_rsquared   = None
        in_rms        = None
        out_rsquared  = None
        out_rms       = None
        true_in_mean  = None
        true_out_mean = None
        true_in_std   = None
        true_out_std  = None
        pred_in_mean  = None
        pred_out_mean = None
        pred_in_std   = None
        pred_out_std  = None

        # scatter plot
        plt.figure(figsize=(15,8))
        plt.xlim([-1,x_max])
        plt.ylim([-1,y_max])
        plt.scatter(df_test['True'], df_test['Predicted'], c ='green', alpha=0.5, label='Indoors')
        plt.plot(df_test['True'],p(df_test['True']),"b")            
        plt.legend(loc='upper right')
        plt.title(name)
        plt.xlabel('True')
        plt.ylabel('Predicted')
        plt.savefig(figures_folder+name+'_scatter.pdf')
    #             plt.show()
        plt.close()

        df_allPredin = df_allPredin.append(df_test)
    #fi

    get_all_patients_rsquared = get_all_patients_rsquared.append({'participant': name,
                                                                  'rsquared'   : rsquared,
                                                                  'inRsquared' : in_rsquared,
                                                                  'outRsquared': out_rsquared},
                                                         ignore_index=True) 

    get_all_patients_rms = get_all_patients_rms.append({'participant': name,
                                                        'rms'        : rms,
                                                        'inRms'      : in_rms,
                                                        'outRms'     : out_rms},
                                                         ignore_index=True) 

    stats_EEm_results = stats_EEm_results.append({'participant'  : name,
                                                  'true_tot_mean': true_tot_mean,
                                                  'true_in_mean' : true_in_mean,
                                                  'true_out_mean': true_out_mean,
                                                  'true_tot_std' : true_tot_std,
                                                  'true_in_std'  : true_in_std,
                                                  'true_out_std' : true_out_std,
                                                  'pred_tot_mean': pred_tot_mean,
                                                  'pred_in_mean' : pred_in_mean,

                                                  'pred_out_mean': pred_out_mean,
                                                  'pred_tot_std' : pred_tot_std,
                                                  'pred_in_std'  : pred_in_std,
                                                  'pred_out_std' : pred_out_std},
                                                 ignore_index=True)
efor

get_all_patients_rsquared.to_csv(csv_folder+'rsquared.csv',index=False)
get_all_patients_rms.to_csv(csv_folder+'rms.csv',index=False)
stats_EEm_results.to_csv(csv_folder+'predStats.csv',index=False)

# scatter
true = stats_EEm_results['true_tot_mean']
pred = stats_EEm_results['pred_tot_mean']
# std_true  = stats_EEm_results['true_tot_std']
# std_pred  = stats_EEm_results['pred_tot_std']
# scatter
z = np.polyfit(true, pred, 1)
p = np.poly1d(z)
x_max = int(np.max(true)+1)
y_max = int(np.max(pred)+1)
plt.figure(figsize=(15,8))
plt.xlim([1,x_max])
plt.ylim([1,y_max])
plt.scatter(true, pred, c ='blue', alpha=0.5)#, label='Indoors')
plt.plot(true,p(true),"b")     
# plt.legend(loc='upper right')
plt.title('True Vs Predicted Average EEm per participant')
plt.xlabel('True')
plt.ylabel('Predicted')
plt.savefig(figures_folder+'all_scatter.pdf') 

print('Rsquared: '+str(get_all_patients_rsquared.rsquared.mean())+' /rms:'+ str(get_all_patients_rms.rms.mean()))
print('inRsquared: '+str(get_all_patients_rsquared.inRsquared.mean())+' /inRms:'+ str(get_all_patients_rms.inRms.mean()))
print('outRsquared: '+str(get_all_patients_rsquared.outRsquared.mean())+' /outRms:'+ str(get_all_patients_rms.outRms.mean()))

In [None]:
print('Rsquared: '+str(get_all_patients_rsquared.rsquared.median())+' /rms:'+ str(get_all_patients_rms.rms.median()))
print('inRsquared: '+str(get_all_patients_rsquared.inRsquared.median())+' /inRms:'+ str(get_all_patients_rms.inRms.median()))
print('outRsquared: '+str(get_all_patients_rsquared.outRsquared.median())+' /outRms:'+ str(get_all_patients_rms.outRms.median()))

# nnet model

In [None]:
EEm_details = pd.read_csv('EEm_details_per_partic.csv')  

# numberForVal = 1 # the number of participants you want per gtoup as validation set (random seed is the number of participants' id)
# randomSeed = i
w =30
devices = 'w' # aw or a or w
# data = features_bindAll
dataToUse = devices+'/'+str(w)+'sec'
model_name = 'nnet_'+dataToUse
figures_folder = 'images/' + model_name + '/predPlot/'
csv_folder = figures_folder

if not os.path.exists(figures_folder):
    os.makedirs(figures_folder)

get_all_patients_rsquared = pd.DataFrame(columns=['participant', 'rsquared', 'inRsquared', 'outRsquared'])
get_all_patients_rms = pd.DataFrame(columns=['participant', 'rms', 'inRms', 'outRms'])
stats_EEm_results = pd.DataFrame(columns=['participant', 
                                          'true_tot_mean', 'true_in_mean', 'true_out_mean', 
                                          'pred_tot_mean', 'pred_in_mean', 'pred_out_mean'])
print(model_name)
print(figures_folder)

In [None]:
df_allPred = pd.DataFrame()
df_allPredin = pd.DataFrame()
df_allPredout = pd.DataFrame()

for i in range(5, 37):
    randomSeed = i
    if i in [2,3,4,19]: #[12, 19, 23, 16]: 
        continue
#         print('Skip ID')
    else: 
        if len(str(i)) == 1:
            name = 'GOTOV0'+str(i)
        else: 
            name = 'GOTOV'+str(i)
        #eIF
    #eIF
    time = EEm_details['timeToSplit'][EEm_details['participant']==name].values[0] #time to split in and outdoors
    
    # load predictions of R models (they were trained in Shark cluster using another R-Jupyter notebook)
    print('read predictions:', name)
    
    df_test = pd.read_csv('ANNs_predictions_Montoye_repl/nnet/'+dataToUse+'/'+name+"_preds.csv", header = 0, index_col = None, low_memory=False)
    df_test.columns = ['time', 'True', 'Predicted']
    df_test.set_index('time', inplace=True)
    df_test.index = pd.to_datetime(df_test.index)

    df_test['participant'] = name        
    
    df_allPred = df_allPred.append(df_test)

    rsquared = r2_score(df_test['True'], df_test['Predicted'])
    print('rsquared...', rsquared)

    rms = sqrt(mean_squared_error(df_test['True'], df_test['Predicted']))
    print('rms...', rms)
    
    print('Create figures ....')

    true_tot_mean = np.mean(df_test['True'])
    pred_tot_mean = np.mean(df_test['Predicted'])        
    true_tot_std  = np.std(df_test['True'])
    pred_tot_std  = np.std(df_test['Predicted'])        

    z = np.polyfit(df_test['True'], df_test['Predicted'], 1)
    p = np.poly1d(z)
    x_max = int(np.max(df_test['True'])+2)
    y_max = int(np.max(df_test['Predicted'])+2)

    plt.figure(figsize=(15,8))
    plt.plot(df_test['True'], label='True_EE')
    plt.plot(df_test['Predicted'], label='Predicted_EE')
    plt.legend(loc='upper left')
    plt.title(name)
    plt.savefig(figures_folder+name+'.pdf')
    # plt.show()
    plt.close()
    
    if np.isnan(EEm_details['outEEm'][EEm_details['participant']==name].values) == False:
        # indoors
        indoors = df_test[df_test.index <= time]#'2016-02-19 11:12:25.004000']
        indoors['participant'] = name        
        df_allPredin = df_allPredin.append(indoors)

        in_rsquared = r2_score(indoors['True'], indoors['Predicted'])
        in_rms = sqrt(mean_squared_error(indoors['True'], indoors['Predicted']))
        print('in :', in_rsquared)
        true_in_mean = np.mean(indoors['True'])
        pred_in_mean = np.mean(indoors['Predicted'])
        true_in_std  = np.std(indoors['True'])
        pred_in_std  = np.std(indoors['Predicted'])        

        # plot
        plt.figure(figsize=(15,8))
        plt.plot(indoors['True'], label='True_EE')
        plt.plot(indoors['Predicted'], label='Predicted_EE')
        plt.title(name)
        plt.legend(loc='upper left')
        plt.savefig(figures_folder+name+'_in.pdf')
    #             plt.show()
        plt.close()

        # outdoors
        outdoors = df_test[df_test.index > time]#'2016-02-19 11:12:25.004000']
        outdoors['participant'] = name        
        df_allPredout = df_allPredout.append(outdoors)

        out_rsquared = r2_score(outdoors['True'], outdoors['Predicted'])
        out_rms = sqrt(mean_squared_error(outdoors['True'], outdoors['Predicted']))            
        print('out:', out_rsquared)
        true_out_mean = np.mean(outdoors['True'])
        pred_out_mean = np.mean(outdoors['Predicted'])            
        true_out_std  = np.std(outdoors['True'])
        pred_out_std  = np.std(outdoors['Predicted'])        

        # plot
        plt.figure(figsize=(15,8))
        plt.title(name)
        plt.plot(outdoors['True'], label='True_EE')
        plt.plot(outdoors['Predicted'], label='Predicted_EE')
        plt.legend(loc='upper left')
        plt.savefig(figures_folder+name+'_out.pdf')
    #             plt.show()
        plt.close()

        # scatter plot
        plt.figure(figsize=(15,8))
        plt.xlim([-1,x_max])
        plt.ylim([-1,y_max])            
        plt.scatter(indoors['True'], indoors['Predicted'], c ='green', alpha=0.5, label='Indoors')
        plt.scatter(outdoors['True'], outdoors['Predicted'], c ='orange', alpha=0.5, label='Outdoors')
        plt.plot(df_test['True'],p(df_test['True']),"b")
        plt.legend(loc='upper right')
        plt.title(name)
        plt.xlabel('True')
        plt.ylabel('Predicted')
        plt.savefig(figures_folder+name+'_scatter.pdf')            
    #             plt.show()
        plt.close()

    else:
        in_rsquared   = None
        in_rms        = None
        out_rsquared  = None
        out_rms       = None
        true_in_mean  = None
        true_out_mean = None
        true_in_std   = None
        true_out_std  = None
        pred_in_mean  = None
        pred_out_mean = None
        pred_in_std   = None
        pred_out_std  = None

        # scatter plot
        plt.figure(figsize=(15,8))
        plt.xlim([-1,x_max])
        plt.ylim([-1,y_max])
        plt.scatter(df_test['True'], df_test['Predicted'], c ='green', alpha=0.5, label='Indoors')
        plt.plot(df_test['True'],p(df_test['True']),"b")            
        plt.legend(loc='upper right')
        plt.title(name)
        plt.xlabel('True')
        plt.ylabel('Predicted')
        plt.savefig(figures_folder+name+'_scatter.pdf')
    #             plt.show()
        plt.close()

        df_allPredin = df_allPredin.append(df_test)
    #fi

    get_all_patients_rsquared = get_all_patients_rsquared.append({'participant': name,
                                                                  'rsquared'   : rsquared,
                                                                  'inRsquared' : in_rsquared,
                                                                  'outRsquared': out_rsquared},
                                                         ignore_index=True) 

    get_all_patients_rms = get_all_patients_rms.append({'participant': name,
                                                        'rms'        : rms,
                                                        'inRms'      : in_rms,
                                                        'outRms'     : out_rms},
                                                         ignore_index=True) 

    stats_EEm_results = stats_EEm_results.append({'participant'  : name,
                                                  'true_tot_mean': true_tot_mean,
                                                  'true_in_mean' : true_in_mean,
                                                  'true_out_mean': true_out_mean,
                                                  'true_tot_std' : true_tot_std,
                                                  'true_in_std'  : true_in_std,
                                                  'true_out_std' : true_out_std,
                                                  'pred_tot_mean': pred_tot_mean,
                                                  'pred_in_mean' : pred_in_mean,

                                                  'pred_out_mean': pred_out_mean,
                                                  'pred_tot_std' : pred_tot_std,
                                                  'pred_in_std'  : pred_in_std,
                                                  'pred_out_std' : pred_out_std},
                                                 ignore_index=True)
#efor

get_all_patients_rsquared.to_csv(csv_folder+'rsquared.csv',index=False)
get_all_patients_rms.to_csv(csv_folder+'rms.csv',index=False)
stats_EEm_results.to_csv(csv_folder+'predStats.csv',index=False)

# scatter
true = stats_EEm_results['true_tot_mean']
pred = stats_EEm_results['pred_tot_mean']
# std_true  = stats_EEm_results['true_tot_std']
# std_pred  = stats_EEm_results['pred_tot_std']
# scatter
z = np.polyfit(true, pred, 1)
p = np.poly1d(z)
x_max = int(np.max(true)+1)
y_max = int(np.max(pred)+1)
plt.figure(figsize=(15,8))
plt.xlim([1,x_max])
plt.ylim([1,y_max])
plt.scatter(true, pred, c ='blue', alpha=0.5)#, label='Indoors')
plt.plot(true,p(true),"b")     
# plt.legend(loc='upper right')
plt.title('True Vs Predicted Average EEm per participant')
plt.xlabel('True')
plt.ylabel('Predicted')
plt.savefig(figures_folder+'all_scatter.pdf') 

print('Rsquared: '+str(get_all_patients_rsquared.rsquared.mean())+' /rms:'+ str(get_all_patients_rms.rms.mean()))
print('inRsquared: '+str(get_all_patients_rsquared.inRsquared.mean())+' /inRms:'+ str(get_all_patients_rms.inRms.mean()))
print('outRsquared: '+str(get_all_patients_rsquared.outRsquared.mean())+' /outRms:'+ str(get_all_patients_rms.outRms.mean()))

In [None]:
print('Rsquared: '+str(get_all_patients_rsquared.rsquared.mean())+' /rms:'+ str(get_all_patients_rms.rms.mean()))
print('inRsquared: '+str(get_all_patients_rsquared.inRsquared.mean())+' /inRms:'+ str(get_all_patients_rms.inRms.mean()))
print('outRsquared: '+str(get_all_patients_rsquared.outRsquared.mean())+' /outRms:'+ str(get_all_patients_rms.outRms.mean()))

In [None]:
get_all_patients_rsquared

In [None]:
get_all_patients_rms

# neuralnet model

In [None]:
EEm_details = pd.read_csv('EEm_details_per_partic.csv')  

# numberForVal = 1 # the number of participants you want per gtoup as validation set (random seed is the number of participants' id)
# randomSeed = i
w =30
# data = features_bindAll
dataToUse = 'aw_'+str(w)+'sec'
model_name = 'neuralnet_'+dataToUse
figures_folder = 'images/' + model_name + '/predPlot/'
csv_folder = figures_folder

if not os.path.exists(figures_folder):
    os.makedirs(figures_folder)

get_all_patients_rsquared = pd.DataFrame(columns=['participant', 'rsquared', 'inRsquared', 'outRsquared'])
get_all_patients_rms = pd.DataFrame(columns=['participant', 'rms', 'inRms', 'outRms'])
stats_EEm_results = pd.DataFrame(columns=['participant', 
                                          'true_tot_mean', 'true_in_mean', 'true_out_mean', 
                                          'pred_tot_mean', 'pred_in_mean', 'pred_out_mean'])


In [None]:
df_allPred = pd.DataFrame()
df_allPredin = pd.DataFrame()
df_allPredout = pd.DataFrame()

for i in range(5, 37):
    randomSeed = i
    if i in [2,3,4,19,9,26,27,28,29]: #[12, 19, 23, 16]: 
        continue
#         print('Skip ID')
    else: 
        if len(str(i)) == 1:
            name = 'GOTOV0'+str(i)
        else: 
            name = 'GOTOV'+str(i)
        #eIF
    #eIF
    time = EEm_details['timeToSplit'][EEm_details['participant']==name].values[0] #time to split in and outdoors
    
    # load predictions of R models (they were trained in Shark cluster using another R-Jupyter notebook)
    print('read predictions:', name)
    
    df_test = pd.read_csv('ANNs_predictions_Montoye_repl/neuralnet/'+dataToUse+'/'+name+"_preds.csv", header = 0, index_col = None, low_memory=False)
    df_test.columns = ['time', 'True', 'Predicted']
    df_test.set_index('time', inplace=True)
    df_test.index = pd.to_datetime(df_test.index)

    df_test['participant'] = name        
    
    df_allPred = df_allPred.append(df_test)

    rsquared = r2_score(df_test['True'], df_test['Predicted'])
    print('rsquared...', rsquared)

    rms = sqrt(mean_squared_error(df_test['True'], df_test['Predicted']))
    print('rms...', rms)
    
    print('Create figures ....')

    true_tot_mean = np.mean(df_test['True'])
    pred_tot_mean = np.mean(df_test['Predicted'])        
    true_tot_std  = np.std(df_test['True'])
    pred_tot_std  = np.std(df_test['Predicted'])        

    z = np.polyfit(df_test['True'], df_test['Predicted'], 1)
    p = np.poly1d(z)
    x_max = int(np.max(df_test['True'])+2)
    y_max = int(np.max(df_test['Predicted'])+2)

    plt.figure(figsize=(15,8))
    plt.plot(df_test['True'], label='True_EE')
    plt.plot(df_test['Predicted'], label='Predicted_EE')
    plt.legend(loc='upper left')
    plt.title(name)
    plt.savefig(figures_folder+name+'.pdf')
    # plt.show()
    plt.close()
    
    if np.isnan(EEm_details['outEEm'][EEm_details['participant']==name].values) == False:
        # indoors
        indoors = df_test[df_test.index <= time]#'2016-02-19 11:12:25.004000']
        indoors['participant'] = name        
        df_allPredin = df_allPredin.append(indoors)

        in_rsquared = r2_score(indoors['True'], indoors['Predicted'])
        in_rms = sqrt(mean_squared_error(indoors['True'], indoors['Predicted']))
        print('in :', in_rsquared)
        true_in_mean = np.mean(indoors['True'])
        pred_in_mean = np.mean(indoors['Predicted'])
        true_in_std  = np.std(indoors['True'])
        pred_in_std  = np.std(indoors['Predicted'])        

        # plot
        plt.figure(figsize=(15,8))
        plt.plot(indoors['True'], label='True_EE')
        plt.plot(indoors['Predicted'], label='Predicted_EE')
        plt.title(name)
        plt.legend(loc='upper left')
        plt.savefig(figures_folder+name+'_in.pdf')
    #             plt.show()
        plt.close()

        # outdoors
        outdoors = df_test[df_test.index > time]#'2016-02-19 11:12:25.004000']
        outdoors['participant'] = name        
        df_allPredout = df_allPredout.append(outdoors)

        out_rsquared = r2_score(outdoors['True'], outdoors['Predicted'])
        out_rms = sqrt(mean_squared_error(outdoors['True'], outdoors['Predicted']))            
        print('out:', out_rsquared)
        true_out_mean = np.mean(outdoors['True'])
        pred_out_mean = np.mean(outdoors['Predicted'])            
        true_out_std  = np.std(outdoors['True'])
        pred_out_std  = np.std(outdoors['Predicted'])        

        # plot
        plt.figure(figsize=(15,8))
        plt.title(name)
        plt.plot(outdoors['True'], label='True_EE')
        plt.plot(outdoors['Predicted'], label='Predicted_EE')
        plt.legend(loc='upper left')
        plt.savefig(figures_folder+name+'_out.pdf')
    #             plt.show()
        plt.close()

        # scatter plot
        plt.figure(figsize=(15,8))
        plt.xlim([-1,x_max])
        plt.ylim([-1,y_max])            
        plt.scatter(indoors['True'], indoors['Predicted'], c ='green', alpha=0.5, label='Indoors')
        plt.scatter(outdoors['True'], outdoors['Predicted'], c ='orange', alpha=0.5, label='Outdoors')
        plt.plot(df_test['True'],p(df_test['True']),"b")
        plt.legend(loc='upper right')
        plt.title(name)
        plt.xlabel('True')
        plt.ylabel('Predicted')
        plt.savefig(figures_folder+name+'_scatter.pdf')            
    #             plt.show()
        plt.close()

    else:
        in_rsquared   = None
        in_rms        = None
        out_rsquared  = None
        out_rms       = None
        true_in_mean  = None
        true_out_mean = None
        true_in_std   = None
        true_out_std  = None
        pred_in_mean  = None
        pred_out_mean = None
        pred_in_std   = None
        pred_out_std  = None

        # scatter plot
        plt.figure(figsize=(15,8))
        plt.xlim([-1,x_max])
        plt.ylim([-1,y_max])
        plt.scatter(df_test['True'], df_test['Predicted'], c ='green', alpha=0.5, label='Indoors')
        plt.plot(df_test['True'],p(df_test['True']),"b")            
        plt.legend(loc='upper right')
        plt.title(name)
        plt.xlabel('True')
        plt.ylabel('Predicted')
        plt.savefig(figures_folder+name+'_scatter.pdf')
    #             plt.show()
        plt.close()

        df_allPredin = df_allPredin.append(df_test)
    #fi

    get_all_patients_rsquared = get_all_patients_rsquared.append({'participant': name,
                                                                  'rsquared'   : rsquared,
                                                                  'inRsquared' : in_rsquared,
                                                                  'outRsquared': out_rsquared},
                                                         ignore_index=True) 

    get_all_patients_rms = get_all_patients_rms.append({'participant': name,
                                                        'rms'        : rms,
                                                        'inRms'      : in_rms,
                                                        'outRms'     : out_rms},
                                                         ignore_index=True) 

    stats_EEm_results = stats_EEm_results.append({'participant'  : name,
                                                  'true_tot_mean': true_tot_mean,
                                                  'true_in_mean' : true_in_mean,
                                                  'true_out_mean': true_out_mean,
                                                  'true_tot_std' : true_tot_std,
                                                  'true_in_std'  : true_in_std,
                                                  'true_out_std' : true_out_std,
                                                  'pred_tot_mean': pred_tot_mean,
                                                  'pred_in_mean' : pred_in_mean,

                                                  'pred_out_mean': pred_out_mean,
                                                  'pred_tot_std' : pred_tot_std,
                                                  'pred_in_std'  : pred_in_std,
                                                  'pred_out_std' : pred_out_std},
                                                 ignore_index=True)
#efor

get_all_patients_rsquared.to_csv(csv_folder+'rsquared.csv',index=False)
get_all_patients_rms.to_csv(csv_folder+'rms.csv',index=False)
stats_EEm_results.to_csv(csv_folder+'predStats.csv',index=False)

# scatter
true = stats_EEm_results['true_tot_mean']
pred = stats_EEm_results['pred_tot_mean']
# std_true  = stats_EEm_results['true_tot_std']
# std_pred  = stats_EEm_results['pred_tot_std']
# scatter
z = np.polyfit(true, pred, 1)
p = np.poly1d(z)
x_max = int(np.max(true)+1)
y_max = int(np.max(pred)+1)
plt.figure(figsize=(15,8))
plt.xlim([1,x_max])
plt.ylim([1,y_max])
plt.scatter(true, pred, c ='blue', alpha=0.5)#, label='Indoors')
plt.plot(true,p(true),"b")     
# plt.legend(loc='upper right')
plt.title('True Vs Predicted Average EEm per participant')
plt.xlabel('True')
plt.ylabel('Predicted')
plt.savefig(figures_folder+'all_scatter.pdf') 

print('Rsquared: '+str(get_all_patients_rsquared.rsquared.mean())+' /rms:'+ str(get_all_patients_rms.rms.mean()))
print('inRsquared: '+str(get_all_patients_rsquared.inRsquared.mean())+' /inRms:'+ str(get_all_patients_rms.inRms.mean()))
print('outRsquared: '+str(get_all_patients_rsquared.outRsquared.mean())+' /outRms:'+ str(get_all_patients_rms.outRms.mean()))

In [None]:
print('Rsquared: '+str(get_all_patients_rsquared.rsquared.mean())+' /rms:'+ str(get_all_patients_rms.rms.mean()))
print('inRsquared: '+str(get_all_patients_rsquared.inRsquared.mean())+' /inRms:'+ str(get_all_patients_rms.inRms.mean()))
print('outRsquared: '+str(get_all_patients_rsquared.outRsquared.mean())+' /outRms:'+ str(get_all_patients_rms.outRms.mean()))

# CNN model

In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "6" # set cuda to only use this/these GPU (check if free with nvidia-smi first)`
os.getcwd()

In [None]:
import datetime
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.layers import Conv1D, AveragePooling1D, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, concatenate
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from tensorflow.keras.utils import plot_model
from livelossplot.keras import PlotLossesCallback


In [None]:
tf.test.gpu_device_name()

In [None]:
def CNN_modeling(name, X_train, y_train, bmi_train, X_val, y_val, bmi_val, n_batch, n_epochs, moldel_dir, image_dir, BMI=True):
# the function is used to train the different models per participant based on the decided architecture
# INPUTS:
#     - name: The id-name of test participant (e.g. GOTOV00)
#     - X_train/val: The training/validation predictors (accel)
#     - y_train/val: The training/validation targer (EE)
#     - bmi_train/val: The training/validation participant level data
#     - n_batch: batch size
#     - n_epochs: number of epochs
#     - moldel_dir: the directory that the models are saved
#     - image_dir: the directory that the images are saved
# OUTPUTS: The training history (model, images)

    print('Creating functional api model')
# Use functional API to build cnn layers
#     - 1st conv1D layer with filters = 8 and kernel_size = 5,
#     - then average pooling factor 2D
#     - 2nd conv1D layer with filters = 4 and kernel_size = 5,
#     - then average pooling factor 2D
#     - Flatten 
#     - MLP: Dense layer of size = 400

#     input1 accelerometer sensor measurements 
    input_1 = Input(shape=(X_train.shape[1], X_train.shape[2]))
#     1st conv layer: filters = 8, size = 5
    conv_1 = Conv1D(filters=8, kernel_size=5, activation='tanh')(input_1)
    pool_1 = AveragePooling1D(pool_size=2)(conv_1)
#     2nd conv layer: filters = 4, size = 5
    conv_2 = Conv1D(filters=4, kernel_size=5, activation='tanh')(pool_1)
    pool_2 = AveragePooling1D(pool_size=2)(conv_2)    
#     image to vector before connecting to dense layer
    flat = Flatten()(pool_2)

#     input2 BMI of participants 
    input_2 = Input(shape=(bmi_train.shape[1],))
    
    if BMI == True:        
    #     merge
        con = concatenate([input_2, flat])
    #         MLP: the dense layer of size 400
        mlp_1 = Dense(400)(con)
    else:
        mlp_1 = Dense(400)(flat)
       
    output = Dense(1, activation='linear')(mlp_1)

    model = Model(inputs=[input_1, input_2], outputs=output)
    # plot_model(model, to_file=image_dir)

    model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['mse'])
    model.summary()
    tbCallBack = TensorBoard(log_dir= moldel_dir + '/' + name, histogram_freq=0, write_graph=True, write_images=True)
    # monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=50, mode='auto', verbose=1)
    
    checkpointer = ModelCheckpoint(filepath= moldel_dir + '/' + name + '.hdf5', 
                                  save_best_only=True, verbose=1, mode='auto', monitor='val_loss')
    
    history = model.fit(x=[X_train, bmi_train], y=y_train, 
                      batch_size=n_batch, epochs=n_epochs, verbose=1, 
                      validation_data=([X_val, bmi_val], y_val), shuffle=True, 
                      callbacks=[checkpointer, tbCallBack])
    return history
#eDEF

## Train

In [None]:
# # # # # # # # # # # # FIXED VARs # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
n_batch = 512
epochs = 50
EEm_rate = 10
window = '5.12sec'

BMI = True

device = 'wrist'
#2min_seqs_1a1val_EEm_60ds_std_480

seq_path = 'sequences/5.12secs_seqs_1a1val_EEm_10ds_mean_256/'#'sequences/'+str(minutes)+'min_seqs_1val_EEm_'+str(EEm_rate)+'ds_sd/'
orig_seq_path = 'sequences/5.12secs_seqs_1a1val_EEm_0ds_mean_256/'#'sequences/'+str(minutes)+'min_seqs_1val_EEm_'+str(EEm_rate)+'ds_sd/'
model_name = 'CNNrepl_EEm_'+str(EEm_rate)+'ds_1Seg_'+str(window)+'_'+str(epochs)+'epochs_'+str(n_batch)+'batch_1val_' + device
if BMI==False:
    model_name = model_name+'_noBMI'
else:
    model_name = model_name+'_withBMI'
    
folder = 'models/' + model_name
image = 'images/' + model_name + '/Accur_Loss_figures/'
predPlot = 'images/' + model_name + '/predPlot/'

print('Seqs path :', seq_path)
print('Model name:', model_name)

In [None]:
# run the modeling functions

if not os.path.exists(folder):
    os.makedirs(folder) 
if not os.path.exists(image):
    os.makedirs(image)
    os.makedirs(predPlot)
#eIF
  
# names = [5]
# for i in names:
for i in range(5, 37):
    if i in [2,3,4,19]: 
    # if i in [12, 16,19,23]:
        continue
    else:
        if len(str(i)) ==1:
            name = 'GOTOV0'+str(i)
        else:
            name = 'GOTOV'+str(i)
    if os.path.exists(seq_path+name+'.pkl') == True:
        
        part =  model_name+'_'+ name
        print('Load sequences of participant', name)
        with open(seq_path+name+'.pkl','rb') as f:
            X_train, y_train, ytrain_time, bmi_train, X_val, y_val, yval_time, bmi_val, X_test, y_test, ytest_time, bmi_test, scaler = pickle.load(f)
        # load the orignal val and test
        with open(orig_seq_path+name+'.pkl','rb') as f:
            X_train1, y_train1, ytrain_time1, bmi_train1, X_val, y_val, yval_time, bmi_val, X_test, y_test, ytest_time, bmi_test, scaler = pickle.load(f)
        
        X_train1, y_train1, ytrain_time1 = [],[],[]
        
        # keep only one device data
        if device == 'ankle':
            X_train = X_train[:,:,0:3]
            X_val = X_val[:,:,0:3]
            X_test = X_test[:,:,0:3]
        elif device == 'wrist':
            X_train = X_train[:,:,3:6]
            X_val = X_val[:,:,3:6]
            X_test = X_test[:,:,3:6]
            
        print('Training model... ')
        model = CNN_modeling(name, X_train, y_train, bmi_train, 
                             X_val, y_val, bmi_val, n_batch, epochs, folder, image,
                             BMI=BMI)
        
        plt.plot(model.history['mse'])
        plt.plot(model.history['val_mse'])
        plt.title('model Mean Square Error')
        plt.ylabel('Mean Square Error')
        plt.xlabel('epoch')
        plt.legend(['train', 'val'], loc='upper left')
        plt.savefig(image + name + '_mse.pdf')
        plt.show()
        # summarize history for loss
        plt.plot(model.history['loss'])
        plt.plot(model.history['val_loss'])
        plt.title('model loss')
        plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.legend(['train', 'val'], loc='upper left')
        plt.savefig(image + name + '_loss.pdf')
        plt.show()
    else:
        print('filepath doesnt exist')

## Predict

In [None]:
from tensorflow.keras.models import load_model
from sklearn.metrics import r2_score
import numpy as np
from sklearn.metrics import mean_squared_error
from math import sqrt
import matplotlib.pyplot as plt
import csv
import glob
import pandas as pd
import matplotlib.pyplot as plt
# Display figures inline in Jupyter notebook
import seaborn as sns
# Use seaborn style defaults and set the default figure size
sns.set(rc={'figure.figsize':(11, 4)})
# confs for plotly
def configure_plotly_browser_state():
  import IPython
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              plotly: 'https://cdn.plot.ly/plotly-1.5.1.min.js?noext',
            },
          });
        </script>
        '''))

In [None]:
# n_batch = 512
# epochs = 50
# EEm_rate = 10
# windowInMinutes = 4     

EEm_details = pd.read_csv('EEm_details_per_partic.csv')  
seq_path = orig_seq_path

trained_models_location = folder+'/'
patient_folder = orig_seq_path # test with orignal target data
figures_folder = 'images/' + model_name + '/predPlot/'
csv_folder = figures_folder

# get_all_patients_rsquared, get_all_patients_rms = {}, {} 
get_all_patients_rsquared = pd.DataFrame(columns=['participant', 'rsquared', 'inRsquared', 'outRsquared'])
get_all_patients_rms = pd.DataFrame(columns=['participant', 'rms', 'inRms', 'outRms'])
stats_EEm_results = pd.DataFrame(columns=['participant', 
                                          'true_tot_mean', 'true_in_mean', 'true_out_mean', 
                                          'pred_tot_mean', 'pred_in_mean', 'pred_out_mean'])

print('Model:', trained_models_location)
print('Seqs:', patient_folder)
print('Results:',figures_folder)

import glob
dire= list(glob.glob(trained_models_location +'*.hdf5'))
dire.sort()
print('N =',len(dire))
# dire

In [None]:
df_allPred = pd.DataFrame()
df_allPredin = pd.DataFrame()
df_allPredout = pd.DataFrame()

# names = [36]
# for i in names:
for i in range(5, 37):
    if i in [2,3,4,19]: 
    # if i in [12, 16,19,23]:
        continue
    else:
        if len(str(i)) ==1: 
            name = 'GOTOV0'+str(i)
        else:
            name = 'GOTOV'+str(i)
        #fi
    #fi    
    time = EEm_details['timeToSplit'][EEm_details['participant']==name].values[0]    
    if os.path.exists(trained_models_location + name +'.hdf5') == True: 
        print('Predicting test for patient', name)
        with open(patient_folder+name+'.pkl','rb') as f:
            X_train, y_train, ytrain_time, bmi_train,X_val, y_val, yval_time, bmi_val, X_test, y_test, ytest_time, bmi_test, scaler = pickle.load(f)
        
        # keep only one device data
        if device == 'ankle':
            X_train = X_train[:,:,0:3]
            X_val = X_val[:,:,0:3]
            X_test1 = X_test[:,:,0:3]
        elif device == 'wrist':
            X_train = X_train[:,:,3:6]
            X_val = X_val[:,:,3:6]
            X_test1 = X_test[:,:,3:6]
        else:
            X_test1 = X_test
            
#         X_train, y_train, ytrain_time, bmi_train, X_val, y_val, yval_time, bmi_val, X_test, y_test, ytest_time, bmi_test, scaler = dataConcatenate(name, mn_seq_path, std_seq_path)
        # part = model_name+'_'+name
        print('loading model....')
        model = load_model(trained_models_location+name+ '.hdf5')
        yhat = model.predict([X_test1, bmi_test])
        print('predicting....')
        # reverse data from standardization to compute the correct MSE
#         print('Inversing data.....') 
        X_test = X_test.reshape(-1, X_test.shape[2])
#         d_test = pd.DataFrame(data=X_test)
#         labels = d_test[6]
#         d_test = d_test.drop([6], axis=1)
#         X_test = d_test.to_numpy()

        inv_yhat = np.empty((X_test.shape[0], 1))
        inv_yhat.fill(np.nan)
        inv_yhat[:yhat.shape[0]] = yhat

        inv_yhat = np.concatenate((X_test, inv_yhat), axis=1)
        inv_yhat = np.ma.array(inv_yhat, mask=np.isnan(inv_yhat))
        inv_yhat = scaler.inverse_transform(inv_yhat)
        inv_yhat = inv_yhat[:, 6]

        y_test = y_test.reshape(len(y_test), 1)
        inv_ytest = np.empty((X_test.shape[0], 1))
        inv_ytest.fill(np.nan)
        inv_ytest[:y_test.shape[0]] = y_test

        inv_ytest = np.concatenate((X_test, inv_ytest), axis=1)
        inv_ytest = np.ma.array(inv_ytest, mask=np.isnan(inv_ytest))
        inv_ytest = scaler.inverse_transform(inv_ytest)
        inv_ytest = inv_ytest[:, 6]

        inv_yhat = inv_yhat[~np.isnan(inv_yhat)]
        inv_ytest = inv_ytest[~np.isnan(inv_ytest)]

        df_test = pd.DataFrame(data=inv_ytest, index=ytest_time, columns=['True'])
        df_test['Predicted'] = inv_yhat

        df_test['participant'] = name        
        df_allPred = df_allPred.append(df_test)

        # downsample
        
        rsquared = r2_score(df_test['True'], df_test['Predicted'])
        print('rsquared...', rsquared)

        rms = sqrt(mean_squared_error(df_test['True'], df_test['Predicted']))
        print('rms...', rms)

        print('Create figures ....')

        true_tot_mean = np.mean(df_test['True'])
        pred_tot_mean = np.mean(df_test['Predicted'])        
        true_tot_std  = np.std(df_test['True'])
        pred_tot_std  = np.std(df_test['Predicted'])        
                        
        z = np.polyfit(df_test['True'], df_test['Predicted'], 1)
        p = np.poly1d(z)
        x_max = int(np.max(df_test['True'])+2)
        y_max = int(np.max(df_test['Predicted'])+2)
        
        plt.figure(figsize=(15,8))
        plt.plot(df_test['True'], label='True_EE')
        plt.plot(df_test['Predicted'], label='Predicted_EE')
        plt.legend(loc='upper left')
        plt.title(name)
        plt.savefig(figures_folder+name+'.pdf')
        # plt.show()
        plt.close()
        
        if np.isnan(EEm_details['outEEm'][EEm_details['participant']==name].values) == False:
            # indoors
            indoors = df_test[df_test.index <= time]#'2016-02-19 11:12:25.004000']
            indoors['participant'] = name        
            df_allPredin = df_allPredin.append(indoors)
            
            in_rsquared = r2_score(indoors['True'], indoors['Predicted'])
            in_rms = sqrt(mean_squared_error(indoors['True'], indoors['Predicted']))
            print('in :', in_rsquared)
            true_in_mean = np.mean(indoors['True'])
            pred_in_mean = np.mean(indoors['Predicted'])
            true_in_std  = np.std(indoors['True'])
            pred_in_std  = np.std(indoors['Predicted'])        
    
            # plot
            plt.figure(figsize=(15,8))
            plt.plot(indoors['True'], label='True_EE')
            plt.plot(indoors['Predicted'], label='Predicted_EE')
            plt.title(name)
            plt.legend(loc='upper left')
            plt.savefig(figures_folder+name+'_in.pdf')
#             plt.show()
            plt.close()
            
            # outdoors
            outdoors = df_test[df_test.index > time]#'2016-02-19 11:12:25.004000']
            outdoors['participant'] = name        
            df_allPredout = df_allPredout.append(outdoors)
            
            out_rsquared = r2_score(outdoors['True'], outdoors['Predicted'])
            out_rms = sqrt(mean_squared_error(outdoors['True'], outdoors['Predicted']))            
            print('out:', out_rsquared)
            true_out_mean = np.mean(outdoors['True'])
            pred_out_mean = np.mean(outdoors['Predicted'])            
            true_out_std  = np.std(outdoors['True'])
            pred_out_std  = np.std(outdoors['Predicted'])        

            # plot
            plt.figure(figsize=(15,8))
            plt.title(name)
            plt.plot(outdoors['True'], label='True_EE')
            plt.plot(outdoors['Predicted'], label='Predicted_EE')
            plt.legend(loc='upper left')
            plt.savefig(figures_folder+name+'_out.pdf')
#             plt.show()
            plt.close()
        
            # scatter plot
            plt.figure(figsize=(15,8))
            plt.xlim([-1,x_max])
            plt.ylim([-1,y_max])            
            plt.scatter(indoors['True'], indoors['Predicted'], c ='green', alpha=0.5, label='Indoors')
            plt.scatter(outdoors['True'], outdoors['Predicted'], c ='orange', alpha=0.5, label='Outdoors')
            plt.plot(df_test['True'],p(df_test['True']),"b")
            plt.legend(loc='upper right')
            plt.title(name)
            plt.xlabel('True')
            plt.ylabel('Predicted')
            plt.savefig(figures_folder+name+'_scatter.pdf')            
#             plt.show()
            plt.close()
    
        else:
            in_rsquared   = None
            in_rms        = None
            out_rsquared  = None
            out_rms       = None
            true_in_mean  = None
            true_out_mean = None
            true_in_std   = None
            true_out_std  = None
            pred_in_mean  = None
            pred_out_mean = None
            pred_in_std   = None
            pred_out_std  = None
            
            # scatter plot
            plt.figure(figsize=(15,8))
            plt.xlim([-1,x_max])
            plt.ylim([-1,y_max])
            plt.scatter(df_test['True'], df_test['Predicted'], c ='green', alpha=0.5, label='Indoors')
            plt.plot(df_test['True'],p(df_test['True']),"b")            
            plt.legend(loc='upper right')
            plt.title(name)
            plt.xlabel('True')
            plt.ylabel('Predicted')
            plt.savefig(figures_folder+name+'_scatter.pdf')
#             plt.show()
            plt.close()
    
            df_allPredin = df_allPredin.append(df_test)
        #fi
    
        get_all_patients_rsquared = get_all_patients_rsquared.append({'participant': name,
                                                                      'rsquared'   : rsquared,
                                                                      'inRsquared' : in_rsquared,
                                                                      'outRsquared': out_rsquared},
                                                             ignore_index=True) 
        
        get_all_patients_rms = get_all_patients_rms.append({'participant': name,
                                                            'rms'        : rms,
                                                            'inRms'      : in_rms,
                                                            'outRms'     : out_rms},
                                                             ignore_index=True) 
        
        stats_EEm_results = stats_EEm_results.append({'participant'  : name,
                                                      'true_tot_mean': true_tot_mean,
                                                      'true_in_mean' : true_in_mean,
                                                      'true_out_mean': true_out_mean,
                                                      'true_tot_std' : true_tot_std,
                                                      'true_in_std'  : true_in_std,
                                                      'true_out_std' : true_out_std,
                                                      'pred_tot_mean': pred_tot_mean,
                                                      'pred_in_mean' : pred_in_mean,
     
                                                      'pred_out_mean': pred_out_mean,
                                                      'pred_tot_std' : pred_tot_std,
                                                      'pred_in_std'  : pred_in_std,
                                                      'pred_out_std' : pred_out_std},
                                                     ignore_index=True)


    else: 
        print("Model not found")
    #fi
#efor

get_all_patients_rsquared.to_csv(csv_folder+'rsquared.csv',index=False)
get_all_patients_rms.to_csv(csv_folder+'rms.csv',index=False)
stats_EEm_results.to_csv(csv_folder+'predStats.csv',index=False)

true = stats_EEm_results['true_tot_mean']
pred = stats_EEm_results['pred_tot_mean']
# std_true  = stats_EEm_results['true_tot_std']
# std_pred  = stats_EEm_results['pred_tot_std']
# scatter
z = np.polyfit(true, pred, 1)
p = np.poly1d(z)
x_max = int(np.max(true)+1)
y_max = int(np.max(pred)+1)
plt.figure(figsize=(15,8))
plt.xlim([1,x_max])
plt.ylim([1,y_max])
plt.scatter(true, pred, c ='blue', alpha=0.5)#, label='Indoors')
plt.plot(true,p(true),"b")     
# plt.legend(loc='upper right')
plt.title('True Vs Predicted Average EEm per participant')
plt.xlabel('True')
plt.ylabel('Predicted')
plt.savefig(figures_folder+'all_scatter.pdf')            


In [None]:
print('Rsquared MD: '+str(get_all_patients_rsquared.rsquared.median())+'/Rsquared SD: '+str(get_all_patients_rsquared.rsquared.std()))
print('Rsquared MN: '+str(get_all_patients_rsquared.rsquared.mean())+' /rms MN:'+ str(get_all_patients_rms.rms.mean()))
print('inRsquared: '+str(get_all_patients_rsquared.inRsquared.mean())+' /inRms:'+ str(get_all_patients_rms.inRms.mean()))
print('outRsquared: '+str(get_all_patients_rsquared.outRsquared.mean())+' /outRms:'+ str(get_all_patients_rms.outRms.mean()))