# BioBot_FDS_YB_07_DATA_PREP_4_LSTM
## Deliverable_07: Preparing a balanced dataset with moving windows for LSTM RNN Models
Author/code developer: Yan Bello. 01/12/2018. As part of the Master in Artificial Intelligence (UNIR). 
This file/code is part of the development and exploration/experimentation on a Fall Detection System (FDS). 

---


In the following sections, we used this dataset: 
SisFall: A Fall and Movement Dataset. 
Created by: A. Sucerquia, J.D. López, J.F. Vargas-Bonilla
SISTEMIC, Faculty of Engineering, Universidad de Antiquia UDEA.
Detailed information about this dataset can be found in this website: http://sistemic.udea.edu.co/en/investigacion/proyectos/english-falls/.
Reference paper: Sucerquia A, López JD, Vargas-Bonilla JF. SisFall: A Fall and Movement Dataset. Sensors (Basel). 2017;17(1):198. Published 2017 Jan 20. doi:10.3390/s17010198

---



In [0]:
# Preliminary step 0. We need to establish/select our working folders. First, ensure  the previous dataset files are available.
# The code below is prepared to work with two options: local drive or mounting a Google Drive for Colab
# Select the appropriate configuration for your environment by commenting/un-commenting the following lines:

# To work with Google Colab and Google Drive: 
from google.colab import drive 
drive.mount('/content/gdrive')
FILE_DIRECTORY =  "gdrive/My Drive/Colab Notebooks/"
SisFall_ALL_DIRECTORY =  FILE_DIRECTORY + "SisFall_dataset_ALL/"

# To work with a local drive, uncomment these line:
# FILE_DIRECTORY =  os.getcwd() + "\\"
# SisFall_ALL_DIRECTORY =  FILE_DIRECTORY + "SisFall_dataset_ALL\\"

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


## 7.1 Read the file names

In [0]:
import os
from os import walk

file_names = []
dir_names = []

for (dirpath, dirnames, filenames) in walk(SisFall_ALL_DIRECTORY):
    file_names.extend(filenames)


import pandas as pd
import numpy as np

# Create a data frame and load file data
df_Files_Trials = pd.DataFrame({"File": file_names})

fn = lambda row: (row.File[0:3]) # define a function for the new column
col = df_Files_Trials.apply(fn, axis=1) # get column data with an index
df_Files_Trials = df_Files_Trials.assign(Act_Type=col.values) # assign values to column 'Act_Type'

fn = lambda row: (row.File[4:6]) # define a function for the new column
col = df_Files_Trials.apply(fn, axis=1) # get column data with an index
df_Files_Trials = df_Files_Trials.assign(Age_Cat=col.values) # assign values to column 'Age_Cat'

df_Files_Trials = df_Files_Trials.assign(S1_Max_N_XYZ=0) # assign values to column 'Max_N_XYZ'
df_Files_Trials = df_Files_Trials.assign(S1_Max_N_HOR=0) # assign values to column 'Max_N_HOR'
df_Files_Trials = df_Files_Trials.assign(S3_Max_N_XYZ=0) # assign values to column 'Max_N_XYZ'
df_Files_Trials = df_Files_Trials.assign(S3_Max_N_HOR=0) # assign values to column 'Max_N_HOR'

df_Files_Trials.drop(df_Files_Trials.loc[df_Files_Trials['File']=="desktop.ini"].index, inplace=True)

print(df_Files_Trials.head())
print(df_Files_Trials.tail())


               File Act_Type Age_Cat  S1_Max_N_XYZ  S1_Max_N_HOR  \
0  F03_SA23_R04.txt      F03      SA             0             0   
1  F03_SA23_R01.txt      F03      SA             0             0   
2  F02_SA23_R05.txt      F02      SA             0             0   
3  F02_SA23_R04.txt      F02      SA             0             0   
4  F03_SA23_R02.txt      F03      SA             0             0   

   S3_Max_N_XYZ  S3_Max_N_HOR  
0             0             0  
1             0             0  
2             0             0  
3             0             0  
4             0             0  
                  File Act_Type Age_Cat  S1_Max_N_XYZ  S1_Max_N_HOR  \
4495  D07_SA08_R03.txt      D07      SA             0             0   
4496  D07_SA08_R04.txt      D07      SA             0             0   
4497  D07_SA09_R02.txt      D07      SA             0             0   
4498  D07_SA09_R03.txt      D07      SA             0             0   
4499  D07_SA08_R05.txt      D07      SA     

## 7.2 Balance the samples in the dataset

In [0]:
print("____________________________")
print("Activities of Daily Living (ADL):")
print(len(df_Files_Trials[df_Files_Trials.Act_Type.str.contains('D')]))
print("____________________________")
print("FALLS:")
print(len(df_Files_Trials[df_Files_Trials.Act_Type.str.contains('F')]))
number_of_FALLs = len(df_Files_Trials[df_Files_Trials.Act_Type.str.contains('F')])
df_Files_Falls = df_Files_Trials[df_Files_Trials.Act_Type.str.contains('F')]
df_Files_ADL = df_Files_Trials[df_Files_Trials.Act_Type.str.contains('D')]
print(len(df_Files_ADL))
df_Files_ADL = df_Files_ADL.sample(n=number_of_FALLs,random_state=1)
print(len(df_Files_ADL))
df_Files_ADL.head()
print("Contents of df_Files_Trials (Before pd.concat():")
print(len(df_Files_Trials))
df_Files_Trials =  pd.concat([df_Files_ADL, df_Files_Falls], axis=0)
print("Contents of df_Files_Trials:")
print(len(df_Files_Trials))
print(df_Files_Trials.head())
df_Files_Trials.tail()


____________________________
Activities of Daily Living (ADL):
1798
____________________________
FALLS:
1798
1798
1798
CONTENIDOS DE df_Files_Trials (ANTES DEL pd.concat():
3596
CONTENIDOS DE df_Files_Trials:
3596
                  File Act_Type Age_Cat  S1_Max_N_XYZ  S1_Max_N_HOR  \
2472  D15_SA17_R03.txt      D15      SA             0             0   
3499  D09_SE11_R02.txt      D09      SE             0             0   
3660  D10_SE05_R05.txt      D10      SE             0             0   
3066  D07_SA23_R04.txt      D07      SA             0             0   
3948  D12_SA11_R01.txt      D12      SA             0             0   

      S3_Max_N_XYZ  S3_Max_N_HOR  
2472             0             0  
3499             0             0  
3660             0             0  
3066             0             0  
3948             0             0  


Unnamed: 0,File,Act_Type,Age_Cat,S1_Max_N_XYZ,S1_Max_N_HOR,S3_Max_N_XYZ,S3_Max_N_HOR
1995,F04_SA23_R01.txt,F04,SA,0,0,0,0
1996,F04_SA23_R02.txt,F04,SA,0,0,0,0
1997,F01_SA01_R02.txt,F01,SA,0,0,0,0
1998,F03_SA23_R05.txt,F03,SA,0,0,0,0
1999,F03_SA23_R03.txt,F03,SA,0,0,0,0


## 7.3 Compute the moving windows for LSTM and save them in a file...

In [0]:
"""In this cell, we process the files to create the moving windows."""

def prepare_trial_file(trial_file_name, header_mode = False):
 
    df_Mediciones_WIP = pd.DataFrame(pd.read_csv(trial_file_name, header = None, comment=';', sep = ',', 
                                         names = ["S1_X", "S1_Y", "S1_Z", 
                                                  "S2_X", "S2_Y", "S2_Z", 
                                                  "S3_X", "S3_Y", "S3_Z"], 
                                                   skiprows= 0))

    df_Mediciones_WIP.columns = [col.replace(';', '') for col in df_Mediciones_WIP.columns]
  
    """Data are in bits with the following characteristics:
    In order to convert the acceleration data (AD) given in bits into gravity, use this equation: 
    Acceleration [g]: [(2*Range)/(2^Resolution)]*AD
    In order to convert the rotation data (RD) given in bits into angular velocity, use this equation:
    Angular velocity [°/s]: [(2*Range)/(2^Resolution)]*RD
    """

    # Let's calculate the values for SENSOR_1
    import math
    Sensor1_Resolution = 13
    Sensor1_Range = 16
    g_S1 = (2*Sensor1_Range/2**Sensor1_Resolution)
   
    # Let's calculate the values for SENSOR_2
    # Angular velocity [°/s]: [(2*Range)/(2^Resolution)]*RD
    Sensor2_Resolution = 16
    Sensor2_Range = 2000
    av_S2 = (2*Sensor2_Range/2**Sensor2_Resolution)  

    # Let's calculate the values for SENSOR_3
    Sensor3_Resolution = 14
    Sensor3_Range = 8
    g_S3 = (2*Sensor3_Range/2**Sensor3_Resolution)

    # S1_N_XYZ
    fn = lambda row: math.sqrt((g_S1*row.S1_X)**2 + (g_S1*row.S1_Y)**2 + (g_S1*row.S1_Z)**2) # define a function for the new column
    col = df_Mediciones_WIP.apply(fn, axis=1) # get column data with an index
    df_Mediciones_WIP = df_Mediciones_WIP.assign(S1_N_XYZ=col.values) # assign values to column 'c'    

    # Get the max value of N_XYZ
    S1_max_N_XYZ = np.max(df_Mediciones_WIP.S1_N_XYZ)
    print("S1_max_N_XYZ = " + str(S1_max_N_XYZ))

    max_N = np.max(df_Mediciones_WIP.S1_N_XYZ)
    max_N_index = df_Mediciones_WIP.index[df_Mediciones_WIP.S1_N_XYZ == max_N][0]
    min_N = np.min(df_Mediciones_WIP.S1_N_XYZ)
    min_N_index = df_Mediciones_WIP.index[df_Mediciones_WIP.S1_N_XYZ == min_N][0]
    len_df_Mediciones = len(df_Mediciones_WIP)
    
    # We work with a sliding-window of 1sec (200 measurements), starting 2sec (400 measurements) before the peak/MAX
    if (max_N_index - 400<0):
        index_min_window = 0
        print("LOW WINDOW index:" + str(index_min_window))
    else:
        if (max_N_index + 200+1> len_df_Mediciones):
            index_min_window = len_df_Mediciones - 602
            print("HIGH WINDOW index:" + str(index_min_window))
        else:
            # extract the central window
            index_min_window = max_N_index - 400
            print("NORMAL WINDOW index:" + str(index_min_window))

    print("max_N = " + str(max_N))
    print("max_N_index = " + str(max_N_index))
    print("min_N = " + str(min_N))
    print("min_N_index = " + str(min_N_index))    
    
    # we take a 600 measurement window (equivalent to 3 seconds)
    df_Mediciones = df_Mediciones_WIP[index_min_window : index_min_window + 601]            
    
    # S1_N_HOR
    fn_hor = lambda row: math.sqrt((g_S1*row.S1_Y)**2 + (g_S1*row.S1_Z)**2) 
    col = df_Mediciones.apply(fn_hor, axis=1) 
    df_Mediciones = df_Mediciones.assign(S1_N_HOR=col.values) 

    # S1_N_VER
    fn_ver = lambda row: math.sqrt((g_S1*row.S1_X)**2 + (g_S1*row.S1_Z)**2) 
    col = df_Mediciones.apply(fn_ver, axis=1) 
    df_Mediciones = df_Mediciones.assign(S1_N_VER=col.values) 
    
    # S2_AV_X
    fn2X = lambda row: (av_S2*row.S2_X) 
    col = df_Mediciones.apply(fn2X, axis=1) 
    df_Mediciones = df_Mediciones.assign(S2_AV_X=col.values) 
    
    # S2_AV_Y
    fn2Y = lambda row: (av_S2*row.S2_Y) 
    col = df_Mediciones.apply(fn2Y, axis=1) 
    df_Mediciones = df_Mediciones.assign(S2_AV_Y=col.values) 

    # S2_AV_Z
    fn2Z = lambda row: (av_S2*row.S2_Z) 
    col = df_Mediciones.apply(fn2Z, axis=1) 
    df_Mediciones = df_Mediciones.assign(S2_AV_Z=col.values) 
    
    
    # S3_N_HOR
    fn_hor3 = lambda row: math.sqrt((g_S3*row.S3_Y)**2 + (g_S3*row.S3_Z)**2) 
    col = df_Mediciones.apply(fn_hor3, axis=1) 
    df_Mediciones = df_Mediciones.assign(S3_N_HOR=col.values) 

    # S3_N_VER
    fn_ver3 = lambda row: math.sqrt((g_S3*row.S3_X)**2 + (g_S3*row.S3_Z)**2) 
    col = df_Mediciones.apply(fn_ver3, axis=1) 
    df_Mediciones = df_Mediciones.assign(S3_N_VER=col.values) 
    
    # S3_N_XYZ
    fn3 = lambda row: math.sqrt((g_S3*row.S3_X)**2 + (g_S3*row.S3_Y)**2 + (g_S3*row.S3_Z)**2) 
    col = df_Mediciones.apply(fn3, axis=1) 
    df_Mediciones = df_Mediciones.assign(S3_N_XYZ=col.values) 

    trial_file_name = row['File']
    df_Mediciones['Age_Cat'] = trial_file_name[4:6] 
    df_Mediciones['Fall_ADL'] = trial_file_name[0:1]
    df_Mediciones['Act_Type'] = trial_file_name[0:3]
    df_Mediciones['File'] = trial_file_name
    
    print(df_Mediciones.head())
    
    if header_mode:
      # These lines are for creating the file structure:
      df_field_list = pd.DataFrame(list(df_Mediciones.columns)).T
      print(df_field_list)
      df_field_list.to_csv(FILE_DIRECTORY + 'DS_ADL_Falls_NEW_181202.txt', mode='w', header=False)   
      return max_N, max_N_index, min_N, min_N_index
    else:
      # writes the record/instance data:
      outfile = open(FILE_DIRECTORY + 'DS_ADL_Falls_NEW_181202.txt', 'a')
      df_Mediciones.to_csv(outfile, header=False)
      outfile.close()
      
    return max_N, max_N_index, min_N, min_N_index


"""
______________________________________________________________ 
"""

import time
from datetime import timedelta

start_time = time.time()

# to get a specific type of activity use this line with the corresponding activity code
# file_list = df_Files_Trials[df_Files_Trials.Act_Type == "D01"][["File"]]

file_list = df_Files_Trials[["File"]]
total_num_iter = len(file_list)
iter_no = 1
    
for index, row in file_list.iterrows():
    iter_start_time = time.time()
    
    my_data_file_name = SisFall_ALL_DIRECTORY + row['File']
    print("_________ ITERATION NO: " + str(iter_no) + "/" + str(total_num_iter) + " (TOTAL)___________________________")
    iter_no +=1
    if iter_no <=10: 
        print("_________ SKIPPING TRIAL FILE: " + row['File'] + " ___________________________")
        continue
        
         
    print("_________ PROCESSING TRIAL FILE: " + row['File'] + " ___________________________")
    max_N, max_N_index, min_N, min_N_index = prepare_trial_file(my_data_file_name, False)
    print("________________________________________________________________________________________")    
    elapsed_time_secs = time.time() - iter_start_time
    msg = "Iteration took: %s secs (Wall clock time)" % timedelta(seconds=round(elapsed_time_secs))
    print(msg)
    remaining_time = round(((time.time() - start_time)/iter_no)*(total_num_iter-iter_no))
    msg = "REMAINING TIME: %s secs (Wall clock time)" % timedelta(seconds = remaining_time)
    print(msg)

    print("________________________________________________________________________________________")    
    

elapsed_time_secs = time.time() - start_time

msg = "Execution took: %s secs (Wall clock time)" % timedelta(seconds=round(elapsed_time_secs))

print(msg)


_________ ITERATION NO: 1/3596 (TOTAL)___________________________
_________ SKIPPING TRIAL FILE: D09_SA09_R01.txt ___________________________
_________ ITERATION NO: 2/3596 (TOTAL)___________________________
_________ SKIPPING TRIAL FILE: D09_SA22_R01.txt ___________________________
_________ ITERATION NO: 3/3596 (TOTAL)___________________________
_________ SKIPPING TRIAL FILE: D17_SE13_R05.txt ___________________________
_________ ITERATION NO: 4/3596 (TOTAL)___________________________
_________ SKIPPING TRIAL FILE: D16_SA02_R05.txt ___________________________
_________ ITERATION NO: 5/3596 (TOTAL)___________________________
_________ SKIPPING TRIAL FILE: D08_SA09_R01.txt ___________________________
_________ ITERATION NO: 6/3596 (TOTAL)___________________________
_________ SKIPPING TRIAL FILE: D07_SA03_R04.txt ___________________________
_________ ITERATION NO: 7/3596 (TOTAL)___________________________
_________ SKIPPING TRIAL FILE: D06_SA13_R05.txt ___________________________
______