In [1]:
import pandas as pd
import os
import numpy as np
import random
import joblib

DATA_DIR = '/mnt/sda/hong01-data/MART_DATA/OUTPUT_MERGED'
CSV_DIR = f"{DATA_DIR}/PANDAS"
IMG_DIR = f"{DATA_DIR}/AUTOGRAPHER"

In [2]:
print('===== DATA PREPARATION =====')
dfA = pd.read_csv(f'{CSV_DIR}/trainA.csv', index_col=0)
dfB = pd.read_csv(f'{CSV_DIR}/trainB.csv', index_col=0)
dfC = pd.read_csv(f'{CSV_DIR}/test.csv', index_col=0)
df = pd.concat([dfA, dfB])
df = df.sort_values(by=['event_id', 'sub_id'])

===== DATA PREPARATION =====


In [3]:
dfC

Unnamed: 0,sub_id,event_id,source,data_HR_activity_median,data_HR_activity_min,data_HR_activity_max,data_HR_activity_average,data_HR_activity_std,data_HR_activity_len,data_LEFT_ACC_MAG_median,...,"data_AUTOGRAPHER_RESNET_max_buckeye, horse chestnut, conker",data_AUTOGRAPHER_RESNET_max_coral fungus,data_AUTOGRAPHER_RESNET_max_agaric,data_AUTOGRAPHER_RESNET_max_gyromitra,"data_AUTOGRAPHER_RESNET_max_stinkhorn, carrion fungus",data_AUTOGRAPHER_RESNET_max_earthstar,"data_AUTOGRAPHER_RESNET_max_hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa",data_AUTOGRAPHER_RESNET_max_bolete,"data_AUTOGRAPHER_RESNET_max_ear, spike, capitulum","data_AUTOGRAPHER_RESNET_max_toilet tissue, toilet paper, bathroom tissue"
0,1001,pred13,test,88.235294,72.289157,98.360656,88.219283,3.874933,89,0.001699,...,0.000145,0.000012,0.000380,0.000015,0.000025,0.000171,0.000093,0.000117,0.001396,0.013950
1,1001,pred5,test,85.714286,81.081081,95.238095,86.359184,2.492820,90,0.005783,...,0.000718,0.000329,0.004896,0.000148,0.000369,0.001058,0.000820,0.001022,0.003179,0.011084
2,1001,pred1,test,93.750000,76.923077,111.111111,93.337212,5.955160,90,0.019117,...,0.029949,0.000820,0.009032,0.000122,0.000838,0.000528,0.006853,0.002272,0.097297,2.994558
3,1001,pred0,test,80.000000,74.074074,98.360656,81.438823,4.955610,90,0.009851,...,0.001687,0.000190,0.003335,0.000153,0.000265,0.000568,0.000725,0.001179,0.006935,0.015692
4,1001,pred14,test,83.333333,76.923077,95.238095,83.504971,2.883154,90,0.007025,...,0.001243,0.000071,0.000332,0.000039,0.000157,0.000367,0.000356,0.000378,0.002017,0.005454
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15,1007,pred10,test,84.507042,77.922078,92.307692,84.215004,2.816631,89,0.026158,...,0.000541,0.000020,0.000208,0.000004,0.000017,0.000052,0.000068,0.000210,0.000475,0.106788
16,1007,pred8,test,89.552239,83.333333,96.774194,89.349588,2.709860,89,0.003145,...,0.000169,0.000109,0.000464,0.000008,0.000105,0.000144,0.000457,0.000351,0.001817,0.182118
17,1007,pred9,test,75.949367,65.934066,90.909091,76.247028,4.796392,90,0.029223,...,0.000227,0.000056,0.000778,0.000062,0.000098,0.000659,0.000997,0.000643,0.001736,0.034770
18,1007,pred15,test,90.909091,80.000000,98.360656,91.301371,3.193509,90,0.025524,...,0.001957,0.001838,0.000810,0.000298,0.001192,0.000611,0.008139,0.005682,0.035892,0.107097


## Processing DF

In [4]:
def preprocess_df(input_df, standardize_paras=None):
    if standardize_paras is None:
        data_columns = input_df.columns.str.startswith("data_")
        data_autographer = [not item for item in input_df.columns.str.startswith("data_AUTOGRAPHER")]
        cont_names = list(input_df.loc[:, data_columns & data_autographer].columns)
        cont_names_2 = []
        for col in cont_names:
            nunique = len(np.unique(input_df[col]))
            if nunique > 1:
                cont_names_2.append(col)
        target_df = input_df[cont_names_2]
        for col in cont_names_2:
            temp = np.where(np.isnan(target_df[f'{col}'].values), 1, 0)
            if np.sum(temp) > 0: # this column has NA values
                target_df[f'{col}_isNA'] = np.where(np.isnan(target_df[f'{col}'].values), 1, -1) 
        # Fill NA
        target_df = target_df.fillna(target_df.median())
    
        # Standardize 
        standardize_paras = dict()
        for col in cont_names_2:
            standardize_paras[col] = [target_df[col].mean(), target_df[col].std()]

        std_df = target_df.copy()
        for col in cont_names_2:
            std_df[col] = (target_df[col] - standardize_paras[col][0])/standardize_paras[col][1]
    else:
        cont_names_2 = list(standardize_paras.keys())
        target_df = input_df[cont_names_2]
        for col in cont_names_2:
            temp = np.where(np.isnan(target_df[f'{col}'].values), 1, 0)
            if np.sum(temp) > 0: # this column has NA values
                target_df[f'{col}_isNA'] = np.where(np.isnan(target_df[f'{col}'].values), 1, -1) 
        # Fill NA
        target_df = target_df.fillna(target_df.median())
        
        # Standardize 
        std_df = target_df.copy()
        for col in cont_names_2:
            std_df[col] = (target_df[col] - standardize_paras[col][0])/standardize_paras[col][1]
            
    return std_df, standardize_paras

    

In [5]:
paras = joblib.load('scaling_paras.joblib')
target_df, paras = preprocess_df(dfC, paras)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [4]:
target_df, paras = preprocess_df(df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [6]:
target_df

Unnamed: 0,data_HR_activity_median,data_HR_activity_min,data_HR_activity_max,data_HR_activity_average,data_HR_activity_std,data_HR_activity_len,data_LEFT_ACC_MAG_median,data_LEFT_ACC_MAG_min,data_LEFT_ACC_MAG_max,data_LEFT_ACC_MAG_average,...,data_MOUSE_TIMEDIFFS_median_isNA,data_MOUSE_TIMEDIFFS_min_isNA,data_MOUSE_TIMEDIFFS_max_isNA,data_MOUSE_TIMEDIFFS_average_isNA,data_MOUSE_TIMEDIFFS_std_isNA,data_MOUSE_VELOCITY_median_isNA,data_MOUSE_VELOCITY_min_isNA,data_MOUSE_VELOCITY_max_isNA,data_MOUSE_VELOCITY_average_isNA,data_MOUSE_VELOCITY_std_isNA
0,0.536697,-0.187334,0.533283,0.543295,-0.065119,-1.592236,-0.851820,1.324178,-0.651650,-0.850786,...,1,1,1,1,1,1,1,1,1,1
1,0.287748,0.775374,0.228078,0.353007,-0.618545,0.625804,-0.620615,-0.168253,-0.998053,-0.650123,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
2,1.081273,0.320076,1.779533,1.066858,0.767845,0.625804,0.134121,-2.009457,2.367226,1.203804,...,1,1,1,1,1,1,1,1,1,1
3,-0.276536,0.008113,0.533283,-0.150346,0.367606,0.625804,-0.390345,-0.378369,0.560495,-0.500823,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
4,0.052630,0.320076,0.228078,0.061021,-0.462247,0.625804,-0.550312,1.284008,0.273049,-0.578717,...,1,1,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15,0.168533,0.429466,-0.058344,0.133657,-0.488884,-1.592236,0.532637,0.676025,0.259863,0.181820,...,1,1,1,1,1,1,1,1,1,1
16,0.666745,1.021994,0.378219,0.658925,-0.531637,-1.592236,-0.769945,1.458411,-0.220111,-0.513913,...,1,1,1,1,1,1,1,1,1,1
17,-0.676535,-0.883212,-0.195046,-0.681466,0.303852,0.625804,0.706131,-0.290489,-0.833858,0.245515,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
18,0.800734,0.656997,0.533283,0.858592,-0.337975,0.625804,0.496772,-1.072312,1.207910,0.235803,...,1,1,1,1,1,1,1,1,1,1


In [6]:
paras

{'data_HR_activity_median': [82.80037262393816, 10.126603727472052],
 'data_HR_activity_min': [73.999984301409, 9.132487141642178],
 'data_HR_activity_max': [92.90461419618875, 10.231050877238824],
 'data_HR_activity_average': [82.9084797430595, 9.775180460212962],
 'data_HR_activity_std': [4.0375595638755275, 2.4973784839603863],
 'data_HR_activity_len': [89.71785714285714, 0.4508483214001913],
 'data_LEFT_ACC_MAG_median': [0.016747733146517077, 0.017667087700605412],
 'data_LEFT_ACC_MAG_min': [-0.5303886881543718, 0.22508111178559065],
 'data_LEFT_ACC_MAG_max': [1.3914852391193278, 0.39924991634548523],
 'data_LEFT_ACC_MAG_average': [0.023199174178477663, 0.025681894154484193],
 'data_LEFT_ACC_MAG_std': [0.07674702999991916, 0.07780738511878107],
 'data_LEFT_ACC_MAG_len': [7518.146428571428, 118.37303584194893],
 'data_LEFT_ACC_X_median': [-0.15716071428571426, 0.47061146744401644],
 'data_LEFT_ACC_X_min': [-1.0929642857142858, 0.6802061664138638],
 'data_LEFT_ACC_X_max': [0.98282142

## MERGE WITH IMAGES

In [8]:
target_df['event_id'] = dfC['event_id']
target_df['sub_id'] = dfC['sub_id']
target_df['source'] = dfC['source']
# target_df['label'] = target_df.apply(lambda row: int(row['event_id'][-2:]), axis=1) # run this after merge below

In [9]:
target_df

Unnamed: 0,data_HR_activity_median,data_HR_activity_min,data_HR_activity_max,data_HR_activity_average,data_HR_activity_std,data_HR_activity_len,data_LEFT_ACC_MAG_median,data_LEFT_ACC_MAG_min,data_LEFT_ACC_MAG_max,data_LEFT_ACC_MAG_average,...,data_MOUSE_TIMEDIFFS_average_isNA,data_MOUSE_TIMEDIFFS_std_isNA,data_MOUSE_VELOCITY_median_isNA,data_MOUSE_VELOCITY_min_isNA,data_MOUSE_VELOCITY_max_isNA,data_MOUSE_VELOCITY_average_isNA,data_MOUSE_VELOCITY_std_isNA,event_id,sub_id,source
0,0.536697,-0.187334,0.533283,0.543295,-0.065119,-1.592236,-0.851820,1.324178,-0.651650,-0.850786,...,1,1,1,1,1,1,1,pred13,1001,test
1,0.287748,0.775374,0.228078,0.353007,-0.618545,0.625804,-0.620615,-0.168253,-0.998053,-0.650123,...,-1,-1,-1,-1,-1,-1,-1,pred5,1001,test
2,1.081273,0.320076,1.779533,1.066858,0.767845,0.625804,0.134121,-2.009457,2.367226,1.203804,...,1,1,1,1,1,1,1,pred1,1001,test
3,-0.276536,0.008113,0.533283,-0.150346,0.367606,0.625804,-0.390345,-0.378369,0.560495,-0.500823,...,-1,-1,-1,-1,-1,-1,-1,pred0,1001,test
4,0.052630,0.320076,0.228078,0.061021,-0.462247,0.625804,-0.550312,1.284008,0.273049,-0.578717,...,1,1,1,1,1,1,1,pred14,1001,test
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15,0.168533,0.429466,-0.058344,0.133657,-0.488884,-1.592236,0.532637,0.676025,0.259863,0.181820,...,1,1,1,1,1,1,1,pred10,1007,test
16,0.666745,1.021994,0.378219,0.658925,-0.531637,-1.592236,-0.769945,1.458411,-0.220111,-0.513913,...,1,1,1,1,1,1,1,pred8,1007,test
17,-0.676535,-0.883212,-0.195046,-0.681466,0.303852,0.625804,0.706131,-0.290489,-0.833858,0.245515,...,-1,-1,-1,-1,-1,-1,-1,pred9,1007,test
18,0.800734,0.656997,0.533283,0.858592,-0.337975,0.625804,0.496772,-1.072312,1.207910,0.235803,...,1,1,1,1,1,1,1,pred15,1007,test


In [11]:
imgs = sorted(os.listdir(IMG_DIR))
imgs = [item[:5]+'test_'+item[5:] if 'pred' in item else item for item in imgs]

df_image = pd.DataFrame([item.split('_')+[item] for item in imgs], columns=['sub_id', 'source', 'event_id', 'img_order', 'image_path'])
df_image = df_image.astype({'sub_id': 'int64'})

df_train = pd.merge(target_df, df_image, how='inner', on=['sub_id', 'event_id', 'source'])
#df_train['label'] = df_train.apply(lambda row: int(row['event_id'][-2:]), axis=1)

In [12]:
df_train

Unnamed: 0,data_HR_activity_median,data_HR_activity_min,data_HR_activity_max,data_HR_activity_average,data_HR_activity_std,data_HR_activity_len,data_LEFT_ACC_MAG_median,data_LEFT_ACC_MAG_min,data_LEFT_ACC_MAG_max,data_LEFT_ACC_MAG_average,...,data_MOUSE_VELOCITY_median_isNA,data_MOUSE_VELOCITY_min_isNA,data_MOUSE_VELOCITY_max_isNA,data_MOUSE_VELOCITY_average_isNA,data_MOUSE_VELOCITY_std_isNA,event_id,sub_id,source,img_order,image_path
0,0.536697,-0.187334,0.533283,0.543295,-0.065119,-1.592236,-0.851820,1.324178,-0.651650,-0.850786,...,1,1,1,1,1,pred13,1001,test,0.jpg,1001_test_pred13_0.jpg
1,0.536697,-0.187334,0.533283,0.543295,-0.065119,-1.592236,-0.851820,1.324178,-0.651650,-0.850786,...,1,1,1,1,1,pred13,1001,test,1.jpg,1001_test_pred13_1.jpg
2,0.536697,-0.187334,0.533283,0.543295,-0.065119,-1.592236,-0.851820,1.324178,-0.651650,-0.850786,...,1,1,1,1,1,pred13,1001,test,2.jpg,1001_test_pred13_2.jpg
3,0.536697,-0.187334,0.533283,0.543295,-0.065119,-1.592236,-0.851820,1.324178,-0.651650,-0.850786,...,1,1,1,1,1,pred13,1001,test,3.jpg,1001_test_pred13_3.jpg
4,0.536697,-0.187334,0.533283,0.543295,-0.065119,-1.592236,-0.851820,1.324178,-0.651650,-0.850786,...,1,1,1,1,1,pred13,1001,test,4.jpg,1001_test_pred13_4.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
917,-0.169780,-0.090802,-0.327667,-0.159418,-0.127334,0.625804,0.123012,-0.743965,1.099807,-0.077033,...,1,1,1,1,1,pred13,1007,test,0.jpg,1007_test_pred13_0.jpg
918,-0.169780,-0.090802,-0.327667,-0.159418,-0.127334,0.625804,0.123012,-0.743965,1.099807,-0.077033,...,1,1,1,1,1,pred13,1007,test,1.jpg,1007_test_pred13_1.jpg
919,-0.169780,-0.090802,-0.327667,-0.159418,-0.127334,0.625804,0.123012,-0.743965,1.099807,-0.077033,...,1,1,1,1,1,pred13,1007,test,2.jpg,1007_test_pred13_2.jpg
920,-0.169780,-0.090802,-0.327667,-0.159418,-0.127334,0.625804,0.123012,-0.743965,1.099807,-0.077033,...,1,1,1,1,1,pred13,1007,test,3.jpg,1007_test_pred13_3.jpg


In [11]:
joblib.dump(paras, 'scaling_paras.joblib')

['scaling_paras.joblib']

In [10]:
joblib.dump(target_df, 'tabular_test.joblib')

['tabular_test.joblib']

In [13]:
joblib.dump(df_train, 'tabular_with_images_train.joblib')

['tabular_with_images_test.joblib']

In [23]:
target_df.drop(columns=['event_id', 'source', 'sub_id'])

Unnamed: 0,data_HR_activity_median,data_HR_activity_min,data_HR_activity_max,data_HR_activity_average,data_HR_activity_std,data_HR_activity_len,data_LEFT_ACC_MAG_median,data_LEFT_ACC_MAG_min,data_LEFT_ACC_MAG_max,data_LEFT_ACC_MAG_average,...,data_MOUSE_TIMEDIFFS_min_isNA,data_MOUSE_TIMEDIFFS_max_isNA,data_MOUSE_TIMEDIFFS_average_isNA,data_MOUSE_TIMEDIFFS_std_isNA,data_MOUSE_VELOCITY_median_isNA,data_MOUSE_VELOCITY_min_isNA,data_MOUSE_VELOCITY_max_isNA,data_MOUSE_VELOCITY_average_isNA,data_MOUSE_VELOCITY_std_isNA,label
0,0.168533,0.320076,0.693515,0.170740,-0.041823,0.625804,-0.919668,0.254847,0.190162,-0.852501,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,1
0,0.287748,0.775374,-0.058344,0.370003,-0.531763,0.625804,-0.899871,-0.290489,-0.337649,-0.810834,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,1
0,-1.443579,-1.466624,-1.750027,-1.533298,-0.313644,0.625804,-1.100996,-1.026626,-1.234384,-1.036088,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,1
0,-1.122963,-0.720971,-1.464418,-1.153714,-0.719503,0.625804,-1.146419,-1.086029,-0.931140,-1.038276,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,1
0,-0.861720,-0.802992,-1.364205,-0.924028,-0.749108,0.625804,0.242324,0.025694,-0.841651,-0.034196,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19,1.536575,1.150514,1.207945,1.600793,-0.258795,-1.592236,-0.560149,0.344601,0.677396,0.216965,...,1,1,1,1,1,1,1,1,1,20
19,0.938846,1.282707,0.693515,0.899897,-0.590087,0.625804,-0.750199,0.645126,0.846592,0.019635,...,1,1,1,1,1,1,1,1,1,20
19,1.081273,1.150514,1.030555,1.150357,-0.009890,-1.592236,-0.658615,0.386064,1.203071,0.330224,...,1,1,1,1,1,1,1,1,1,20
19,0.666745,0.008113,0.378219,0.507914,0.363736,-1.592236,0.267265,-0.590945,1.001614,-0.029348,...,1,1,1,1,1,1,1,1,1,20


## DEBUG

In [10]:
import joblib
import numpy as np
a = joblib.load('tabular.joblib')
a = a.drop(columns=['event_id', 'source', 'sub_id'])

In [11]:
a

Unnamed: 0,data_HR_activity_median,data_HR_activity_min,data_HR_activity_max,data_HR_activity_average,data_HR_activity_std,data_HR_activity_len,data_LEFT_ACC_MAG_median,data_LEFT_ACC_MAG_min,data_LEFT_ACC_MAG_max,data_LEFT_ACC_MAG_average,...,data_MOUSE_TIMEDIFFS_min_isNA,data_MOUSE_TIMEDIFFS_max_isNA,data_MOUSE_TIMEDIFFS_average_isNA,data_MOUSE_TIMEDIFFS_std_isNA,data_MOUSE_VELOCITY_median_isNA,data_MOUSE_VELOCITY_min_isNA,data_MOUSE_VELOCITY_max_isNA,data_MOUSE_VELOCITY_average_isNA,data_MOUSE_VELOCITY_std_isNA,label
0,0.168533,0.320076,0.693515,0.170740,-0.041823,0.625804,-0.919668,0.254847,0.190162,-0.852501,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,1
0,0.287748,0.775374,-0.058344,0.370003,-0.531763,0.625804,-0.899871,-0.290489,-0.337649,-0.810834,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,1
0,-1.443579,-1.466624,-1.750027,-1.533298,-0.313644,0.625804,-1.100996,-1.026626,-1.234384,-1.036088,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,1
0,-1.122963,-0.720971,-1.464418,-1.153714,-0.719503,0.625804,-1.146419,-1.086029,-0.931140,-1.038276,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,1
0,-0.861720,-0.802992,-1.364205,-0.924028,-0.749108,0.625804,0.242324,0.025694,-0.841651,-0.034196,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19,1.536575,1.150514,1.207945,1.600793,-0.258795,-1.592236,-0.560149,0.344601,0.677396,0.216965,...,1,1,1,1,1,1,1,1,1,20
19,0.938846,1.282707,0.693515,0.899897,-0.590087,0.625804,-0.750199,0.645126,0.846592,0.019635,...,1,1,1,1,1,1,1,1,1,20
19,1.081273,1.150514,1.030555,1.150357,-0.009890,-1.592236,-0.658615,0.386064,1.203071,0.330224,...,1,1,1,1,1,1,1,1,1,20
19,0.666745,0.008113,0.378219,0.507914,0.363736,-1.592236,0.267265,-0.590945,1.001614,-0.029348,...,1,1,1,1,1,1,1,1,1,20


In [12]:
b = a.iloc[0,:]

In [15]:
data = np.asarray(b)

In [16]:
data

array([ 0.16853327,  0.32007629,  0.69351486,  0.17074007, -0.041823  ,
        0.62580439, -0.91966816,  0.25484668,  0.19016236, -0.85250052,
       -0.39179123,  0.4718437 ,  0.35519898, -1.18645751, -0.70572596,
        0.3401742 , -0.58935173,  0.4718437 , -0.16899115, -0.14083437,
       -0.34969629, -0.19117356, -0.35872936,  0.4718437 , -1.04511642,
        0.18182996, -1.90434149, -1.09201487, -0.59543255,  0.4718437 ,
        0.21187722, -0.56164711, -0.82925952, -0.04345959, -0.46513549,
        0.71278739,  0.55584517, -0.61119853, -1.07321282,  0.46748933,
       -0.28738431,  0.71278739, -0.08131036,  0.86341766, -1.20430222,
       -0.11520852, -0.62679419,  0.71278739, -0.76697276,  1.8438729 ,
        0.93876658, -0.85277049, -0.49910351,  0.71278739, -1.15550832,
        0.28700091, -0.7641885 , -1.1232565 , -0.54744961, -0.99216574,
       -0.14320447, -0.7352177 , -0.9311763 , -0.07111406,  0.79173947,
        0.7297896 , -0.33971331,  0.76335936, -0.49569828, -0.23

In [18]:
list(a.columns)

['data_HR_activity_median',
 'data_HR_activity_min',
 'data_HR_activity_max',
 'data_HR_activity_average',
 'data_HR_activity_std',
 'data_HR_activity_len',
 'data_LEFT_ACC_MAG_median',
 'data_LEFT_ACC_MAG_min',
 'data_LEFT_ACC_MAG_max',
 'data_LEFT_ACC_MAG_average',
 'data_LEFT_ACC_MAG_std',
 'data_LEFT_ACC_MAG_len',
 'data_LEFT_ACC_X_median',
 'data_LEFT_ACC_X_min',
 'data_LEFT_ACC_X_max',
 'data_LEFT_ACC_X_average',
 'data_LEFT_ACC_X_std',
 'data_LEFT_ACC_X_len',
 'data_LEFT_ACC_Y_median',
 'data_LEFT_ACC_Y_min',
 'data_LEFT_ACC_Y_max',
 'data_LEFT_ACC_Y_average',
 'data_LEFT_ACC_Y_std',
 'data_LEFT_ACC_Y_len',
 'data_LEFT_ACC_Z_median',
 'data_LEFT_ACC_Z_min',
 'data_LEFT_ACC_Z_max',
 'data_LEFT_ACC_Z_average',
 'data_LEFT_ACC_Z_std',
 'data_LEFT_ACC_Z_len',
 'data_RIGHT_ACC_MAG_median',
 'data_RIGHT_ACC_MAG_min',
 'data_RIGHT_ACC_MAG_max',
 'data_RIGHT_ACC_MAG_average',
 'data_RIGHT_ACC_MAG_std',
 'data_RIGHT_ACC_MAG_len',
 'data_RIGHT_ACC_X_median',
 'data_RIGHT_ACC_X_min',
 'dat