# Feature Extraction of the 2016-ANSAMO Dataset
**Directory:**
    > `Subject_<nr>_ADL_<activity>.csv`
    > ...

**Types of Executed ADLs:**   
1) normal walking, 2) light jogging, 3) body bending, 4) hopping, 5) climbing stairs (up), 6) climbing stairs (down), 7) lying down and getting up from a bed, 8) sitting down (and up) on (from) a chair.

**Columns Units:**  
    After the header, every line in the files corresponds to a measurement captured by a particular mobility sensor of a determined node (mote or SensorTag).  
    The format of the lines, which is also explained in the file header, includes 7 numerical values separated by a semicolon:  
        -The time (in ms) since the experiment began.  
        -The number of the sample (for the same sensor and node).  
        -The three real numbers describing the measurements of the triaxial sensor (x-axis, y-axis and z-axis). The units are g, °/s or μT depending on whether the measurement was performed by an accelerometer, a gyroscope or a magnetometer, respectively.  
        -An integer (0, 1 or 2) describing the type of the sensor that originated the measurement (Accelerometer = 0 , Gyroscope = 1, Magnetometer = 2)  
        -An integer (from 0 to 4) informing about the sensing node (the correspondence between this numerical code and the Bluetooth MAC address and position of the motes is described in the file header).

In [1]:
## Readme File ##
from IPython.display import IFrame
IFrame("./2016-ANSAMO-Readme.pdf", width=800, height=800)

In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [3]:
###
### Read CSV File
###
all_values = pd.read_csv("all_values.csv");
all_values.dtypes

Unnamed: 0      int64
accX          float64
accY          float64
accZ          float64
experiment     object
filename       object
gyrX          float64
gyrY          float64
gyrZ          float64
label          object
magX          float64
magY          float64
magZ          float64
position       object
ts(ms)          int64
userAge         int64
userGender     object
userID         object
dtype: object

In [4]:
###
###
###
print("nr of instances:", len(all_values))
list_of_users = list(set(all_values['userID']))
print("nr of users: ", len(list_of_users))
ageDistribution = list(set(all_values['userAge']))
print("age distribution", ageDistribution)
labels = list(set(all_values['label']))
print("labels:", labels)

nr of instances: 2843028
nr of users:  17
age distribution [14, 19, 51, 55, 22, 23, 24, 21, 26, 27, 28]
labels: ['lateralFall', 'Hopping', 'Sitting', 'GoDownstairs', 'GoUpstairs', 'LyingDown', 'forwardFall', 'Jogging', 'backwardFall', 'Walking', 'Bending']


In [5]:
list(set(all_values['userID']))

['Subject 09',
 'Subject 03',
 'Subject 01',
 'Subject 06',
 'Subject 07',
 'Subject 10',
 'Subject 11',
 'Subject 12',
 'Subject 14',
 'Subject 16',
 'Subject 08',
 'Subject 13',
 'Subject 04',
 'Subject 15',
 'Subject 02',
 'Subject 05',
 'Subject 17']

In [54]:
###
### Each User With A Separate File
###
users = list(set(all_values['userID']));

for line in users:
    subject_index = all_values[all_values['userID'].isin([line])]
    print(line, " " , len(subject_index))
    subject_index.to_csv("2016-ANSAMO-valid-dataset\\" + line + ".csv");

Subject 09   221925
Subject 03   227716
Subject 01   236210
Subject 06   61969
Subject 07   93653
Subject 10   90067
Subject 11   88683
Subject 12   167738
Subject 14   67276
Subject 16   555730
Subject 08   85714
Subject 13   117225
Subject 04   235954
Subject 15   128784
Subject 02   183943
Subject 05   129682
Subject 17   150759


In [116]:
from math import fabs
def sliding_window(values, time_interval_ms, time_interval_unc):
    ''' ( values, time_interval_ms, time_interval_unc)
    '''
    old_ts = 0;
    old_acc = [-1,-1,-1];
    old_gyr = [-1,-1,-1];
    old_mag = [-1,-1,-1];
    header = [ 'Its(ms)', 'Fts(ms)',
                  'accX', 'accY', 'accZ',
                  'magX', 'magY', 'magZ',
                  'gyrX', 'gyrY', 'gyrZ',
                  'userGender', 'userAge', 'userID',
                  'position', 
                  'label',
                  'filename',
                  'experiment' ];
    final_values = pd.DataFrame(columns=header);
    values = values.sort_values(['ts(ms)']);

    for index, line in values.iterrows():
        ts = line['ts(ms)']
        acc = [ line['accX'], line['accY'],  line['accZ']]
        gyr = [ line['gyrX'], line['gyrY'],  line['gyrZ']]
        mag = [ line['magX'], line['magY'],  line['magZ']]
        if acc[0] != -1:
            old_acc = acc;
        if gyr[0] != -1:
            old_gyr = gyr;
        if mag[0] != -1:
            old_mag = mag;    
        diff = ts - old_ts;
        
        if diff < 0:
            raise ValueError('The List Is Not Sorted By Time!!!') 

        if diff == time_interval_ms or fabs(diff - time_interval_ms) <= time_interval_unc:
            final_values = final_values.append(pd.DataFrame([{
                 'Its(ms)': old_ts, 'Fts(ms)': ts,
                  'accX': old_acc[0], 'accY': old_acc[1], 'accZ': old_acc[2],
                  'magX': old_mag[0], 'magY': old_mag[1], 'magZ': old_mag[2],
                  'gyrX': old_gyr[0], 'gyrY': old_gyr[1], 'gyrZ': old_gyr[2],
                  'userGender': line['userGender'], 'userAge': line['userAge'], 'userID': line['userID'],
                  'position': line['position'], 
                  'label': line['label'],
                  'filename': line['filename'],
                  'experiment': line['experiment']}]));
            old_ts = ts;
        elif diff > time_interval_ms:
            old_ts = ts;
    return final_values;

In [112]:
from math import fabs
def sliding_window_v2(values, time_interval_ms, time_interval_unc):
    ''' ( values, time_interval_ms, time_interval_unc)
    '''
    old_ts = 0;
    old_acc = [-1,-1,-1];
    old_gyr = [-1,-1,-1];
    old_mag = [-1,-1,-1];
    header = [ 'Its(ms)', 'Fts(ms)',
                  'accX', 'accY', 'accZ',
                  'magX', 'magY', 'magZ',
                  'gyrX', 'gyrY', 'gyrZ',
                  'userGender', 'userAge', 'userID',
                  'position', 
                  'label',
                  'filename',
                  'experiment' ];
    final_values = pd.DataFrame(columns=header);    
    for index, line in values.iterrows():
        ts = line['ts(ms)']
        acc = [ line['accX'], line['accY'],  line['accZ']]
        gyr = [ line['gyrX'], line['gyrY'],  line['gyrZ']]
        mag = [ line['magX'], line['magY'],  line['magZ']]
        if acc[0] != -1:
            old_acc = acc;
        if gyr[0] != -1:
            old_gyr = gyr;
        if mag[0] != -1:
            old_mag = mag;    
        diff = ts - old_ts;

        if diff == time_interval_ms or fabs(diff - time_interval_ms) <= time_interval_unc:
            final_values = final_values.append(pd.DataFrame([{
                 'Its(ms)': old_ts, 'Fts(ms)': ts,
                  'accX': old_acc[0], 'accY': old_acc[1], 'accZ': old_acc[2],
                  'magX': old_mag[0], 'magY': old_mag[1], 'magZ': old_mag[2],
                  'gyrX': old_gyr[0], 'gyrY': old_gyr[1], 'gyrZ': old_gyr[2],
                  'userGender': line['userGender'], 'userAge': line['userAge'], 'userID': line['userID'],
                  'position': line['position'], 
                  'label': line['label'],
                  'filename': line['filename'],
                  'experiment': line['experiment']}]));
            old_ts = ts;
        elif diff > time_interval_ms:
            old_ts = ts;
            
    return final_values;

In [122]:
subject_01_data = all_values[all_values['userID'].isin(['Subject 01'])]
subject_01_data_bending_1 = subject_01_data[subject_01_data['experiment'].isin(['Bending_1'])]
subject_01_data_bending_1_waist = subject_01_data_bending_1[subject_01_data_bending_1['position'].isin(['ankle'])]
#subject_01_data_bending_1_waist_acc = subject_01_data_bending_1[~subject_01_data_bending_1['accX'].isin([-1])]
#subject_01_data_bending_1_waist_gyr = subject_01_data_bending_1[~subject_01_data_bending_1['gyrX'].isin([-1])]
#subject_01_data_bending_1_waist_gyr = subject_01_data_bending_1_waist_gyr.sort_values['ts(ms)']

i = 0
values = sliding_window(subject_01_data_bending_1_waist, 50, 5); 
for index, line in values.iterrows():
    if line['accX'] != -1 and line['gyrX'] != -1 and line['magX'] != -1:
        i+=1

#print(subject_01_data_bending_1_waist['accX'].describe())
#delta_ts(subject_01_data_bending_1_waist_acc).describe()
#delta_ts(subject_01_data_bending_1_waist_gyr).describe()
#print(subject_01_data_bending_1_waist['gyrX'].describe())
#print(subject_01_data_bending_1_waist['magX'].describe())
print(len(subject_01_data_bending_1_waist))
print(len(values))
print("i:", i)

868
232
i: 232


In [123]:
time_interval_ms = 50;
time_interval_unc = 10;
final_array = pd.DataFrame()
# users
for line in users:
    subject = all_values[all_values['userID'].isin([line])]
    print(line, ", ", len(subject))
    #experiments
    experiments = list(set(subject['experiment']))
    for lineExp in experiments:
        subject_exp = subject[subject['experiment'].isin([lineExp])]        
        #positions
        positions = list(set(subject_exp['position']))
        for linePos in positions:
            subject_exp_pos = subject_exp[subject_exp['position'].isin([linePos])]
            final_array = final_array.append(sliding_window(subject_exp_pos, time_interval_ms, time_interval_unc));      
    print(len(final_array));
    
final_array.to_csv("all_users_50ms.csv");

Subject 09 ,  221925
46072
Subject 03 ,  227716
90721
Subject 01 ,  236210
136412
Subject 06 ,  61969
149240
Subject 07 ,  93653


KeyboardInterrupt: 

In [80]:
#print(final_array.dtypes)
waist = final_array[final_array['position'].isin(['waist'])]
#waist = waist.sort_values(['Its(ms)'])
i = 0
for index, line in waist.iterrows():
    if line['accX'] != -1 and line['gyrX'] != -1 and line['magX'] != -1:
        i+=1;
print(len(waist))
print("i:", i)

24177
i: 7


In [43]:
subject_01_data = all_values[all_values['userID'].isin(['Subject 01'])]
### Sort In Experiments
list(set(subject_01_data['experiment']))

subject_01_data_bending_1 = subject_01_data[subject_01_data['experiment'].isin(['Bending_1'])]
len(subject_01_data_bending_1)

position = list(set(subject_01_data_bending_1['position']))
subject_01_data_bending_1_waist = subject_01_data_bending_1[subject_01_data_bending_1['position'].isin(['waist'])]
len(subject_01_data_bending_1_waist)

867

In [44]:
subject_01_data_bending_1_waist = subject_01_data_bending_1_waist.sort_values(['ts(ms)']);
subject_01_data_bending_1_waist.head(5)

Unnamed: 0.1,Unnamed: 0,accX,accY,accZ,experiment,filename,gyrX,gyrY,gyrZ,label,magX,magY,magZ,position,ts(ms),userAge,userGender,userID
4664,0,0.887695,-0.050781,0.464111,Bending_1,UMAFall_Subject_01_ADL_Bending_1_2016-06-13_20...,-1.0,-1.0,-1.0,Bending,-1.0,-1.0,-1.0,waist,158,22,female,Subject 01
4959,0,-1.0,-1.0,-1.0,Bending_1,UMAFall_Subject_01_ADL_Bending_1_2016-06-13_20...,12.945312,2.28125,4.65625,Bending,-1.0,-1.0,-1.0,waist,159,22,female,Subject 01
5240,0,-1.0,-1.0,-1.0,Bending_1,UMAFall_Subject_01_ADL_Bending_1_2016-06-13_20...,-1.0,-1.0,-1.0,Bending,49.333332,-138.166672,-43.666668,waist,161,22,female,Subject 01
4669,0,0.906738,-0.02002,0.425049,Bending_1,UMAFall_Subject_01_ADL_Bending_1_2016-06-13_20...,-1.0,-1.0,-1.0,Bending,-1.0,-1.0,-1.0,waist,177,22,female,Subject 01
4964,0,-1.0,-1.0,-1.0,Bending_1,UMAFall_Subject_01_ADL_Bending_1_2016-06-13_20...,16.742188,-43.296875,13.25,Bending,-1.0,-1.0,-1.0,waist,178,22,female,Subject 01


In [46]:
subject_01_bending1_waist_final_values.head(5)

Unnamed: 0,Fts(ms),Its(ms),accX,accY,accZ,experiment,filename,gyrX,gyrY,gyrZ,label,magX,magY,magZ,position,userAge,userGender,userID
0,209,158,0.906738,-0.02002,0.425049,Bending_1,UMAFall_Subject_01_ADL_Bending_1_2016-06-13_20...,16.742188,-43.296875,13.25,Bending,51.333332,-136.833328,-43.333332,waist,22,female,Subject 01
0,323,273,0.897461,-0.047363,0.345947,Bending_1,UMAFall_Subject_01_ADL_Bending_1_2016-06-13_20...,16.382812,20.695312,44.039062,Bending,50.166668,-136.166672,-44.5,waist,22,female,Subject 01
0,517,468,0.926514,0.455811,0.347656,Bending_1,UMAFall_Subject_01_ADL_Bending_1_2016-06-13_20...,3.132812,23.703125,-10.3125,Bending,47.333332,-139.333328,-47.5,waist,22,female,Subject 01
0,567,517,1.031982,0.143799,0.399902,Bending_1,UMAFall_Subject_01_ADL_Bending_1_2016-06-13_20...,78.929688,-0.429688,-17.15625,Bending,49.333332,-139.333328,-47.5,waist,22,female,Subject 01
0,713,664,0.983887,0.144775,0.468262,Bending_1,UMAFall_Subject_01_ADL_Bending_1_2016-06-13_20...,36.3125,-42.21875,18.367188,Bending,50.166668,-138.166672,-49.833332,waist,22,female,Subject 01


In [47]:
len(subject_01_bending1_waist_final_values)

269

In [18]:
acc = [-1,-1,-1];
acc[1] =0;
from math import fabs
fabs(10.5 - 10)


0.5

In [104]:
###
### Analysis Only Of Ts Column
###
def delta_ts(array):
    old_ts = 0;
    array = array.sort_values(['ts(ms)'])
    deltas_ts = pd.DataFrame({'delta_ts':[]})
    for index, line in array.iterrows():
        ts = line['ts(ms)']
        if old_ts > ts:
            print("fuck current ts:", ts, " and old ts:", old_ts)
            break
        delta_ts = ts - old_ts;
        deltas_ts = deltas_ts.append(pd.DataFrame({'delta_ts':[delta_ts]}))
        old_ts = ts;
    return deltas_ts;

In [3]:
subject_01_data = all_values[all_values['userID'].isin(['Subject 01'])]
### Sort In Experiments
list(set(subject_01_data['experiment']))
### Sort In Position

print(len(subject_01_data))
###
### Analysis Only Of Ts Column
###
old_ts = 0;
subject_01_data = subject_01_data.sort_values(['ts(ms)'])
deltas_ts = pd.DataFrame({'delta_ts':[]})
for index, line in subject_01_data.iterrows():
    ts = line['ts(ms)']
    if old_ts > ts:
        print("fuck current ts:", ts, " and old ts:", old_ts)
        break
    delta_ts = ts - old_ts;
    deltas_ts = deltas_ts.append(pd.DataFrame({'delta_ts':[delta_ts]}))
    old_ts = ts;

deltas_ts.describe()

236210


Unnamed: 0,delta_ts
count,236210.0
mean,0.063532
std,0.280685
min,0.0
25%,0.0
50%,0.0
75%,0.0
max,67.0
