# Convolutional neural network in the human activities recognition

- using raw data from motion sensors in smartphone (Smartphone-Based Recognition of Human Activities and Postural Transitions Data Set - UCI)

- an experiment carried out on 30 people (19-49 years)

- six activities noticed: WALKING, WALKING_UPSTAIRS, WALKING_DOWNSTAIRS, SITTING, STANDING, LAYING and postural transtions

- capturing 3-axial linear acceleration and 3-axial angular velocity at a constant rate of 50Hz

- 128 readings per window for one signal sample (50% overlap)

- dataset including raw 3-axial data from sensors and labels for each activity:

1. WALKING
2. WALKING_UPSTAIRS
3. WALKING_DOWNSTAIRS
4. SITTING
5. STANDING
6. LAYING

# Uploading the data from database files
- each signal for each experiment number is saved into separated file, signals from accelerometer and gyroscope are also separated 

In [65]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import glob
%matplotlib inline

In [46]:
# Making a raw data dictionary for each experiment
raw_dic = {}
# Getting data from 61 experiments
for n in range(61):
    
    # Keys: expXX_userYY where XX number of experiment and YY user's ID
    key = file_names[n][12:24]
    
    # DataFrames with the total accelerometer and gyroscope signals
    raw_acc = pd.read_csv(file_names[n], sep = ' ', header = None, names = ['total_acc_x','total_acc_y','total_acc_z'])
    raw_gyr = pd.read_csv(file_names[n+61], sep = ' ', header = None, names = ['gyro_x','gyro_y','gyro_z'])
    
    # Dictionary with both DataFrames
    raw_dic[key] = pd.concat([raw_acc,raw_gyr],axis = 1)

In [47]:
# Example of the raw DataFrame
raw_dic['exp01_user01'].head(3)

Unnamed: 0,total_acc_x,total_acc_y,total_acc_z,gyro_x,gyro_y,gyro_z
0,0.918056,-0.1125,0.509722,-0.054978,-0.069639,-0.030849
1,0.911111,-0.093056,0.5375,-0.012523,0.019242,-0.038485
2,0.881944,-0.086111,0.513889,-0.023518,0.276417,0.006414


In [25]:
# DataFrame with database labels
labels = pd.read_csv('RawData/labels.txt', sep=' ', header = None, names = ['Exp_ID','Us_ID','Act_ID','Start_Point','End_Point'] )

In [26]:
# Info about labels DataFrame
labels.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1214 entries, 0 to 1213
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   Exp_ID       1214 non-null   int64
 1   Us_ID        1214 non-null   int64
 2   Act_ID       1214 non-null   int64
 3   Start_Point  1214 non-null   int64
 4   End_Point    1214 non-null   int64
dtypes: int64(5)
memory usage: 47.5 KB


In [27]:
labels.head(3)

Unnamed: 0,Exp_ID,Us_ID,Act_ID,Start_Point,End_Point
0,1,1,5,250,1232
1,1,1,7,1233,1392
2,1,1,4,1393,2194


# Signal filtering
- noise reduction
- separating body compontent (without gravity results) from the total accelerometers signals

In [28]:
# Import functions from scipy.signal
from scipy.signal import medfilt, butter, filtfilt

In [31]:
# Median filter to reduce noise
def median_filter(raw_signal):
    
    # Changing the signal data into array
    arr_signal = np.array(raw_signal)
    # Using a median filter on the raw signal data
    m_signal = medfilt(arr_signal)
    
    return m_signal # return the array of filtered signal

In [34]:
# Function with 3rd order low-pass Butterworth filter (cutoff = 20Hz - cut-off freq.) to reduce the noise either
fs = 50 # Sampling frequency
cutoff= 20  # Cut-off frequency of the filter
W = cutoff/(fs/2) # Normalizing the frequency
def butterworth3rd(m_singal):
    
    # Preparing the low-pass filter
    b, a = butter(3, W, 'low')
    # Using a filter on the signal 
    output = filtfilt(b, a, m_singal)
    
    return output

In [66]:
# Import functions for FFT and iFFT
from scipy.fftpack import fft, fftfreq, ifft
import math

# Function with the filter to separate the signal into body and gravity component
def butterworth(signal):
    
    # Changing the signal data into the array
    arr_signal = np.array(signal)
    # Using FFT to convert the time singal into the freqency signal
    f_signal = fft(arr_signal)
    # Generating frequencies where the sampling frequency is 50 Hz
    freq = fftfreq(len(arr_signal), d = 1/fs)
    # Copying all frequency signal to gravity and body arrays
    f_grav_signal = f_signal.copy()
    f_body_signal = f_signal.copy()
    # Put null when the signal frequency is above the 0.3 Hz - gravity component
    f_grav_signal[(abs(freq) > 0.3)] = 0
    # Put null when the signal frequency is under the 0.3 Hz - body component
    f_body_signal[(abs(freq) <= 0.3)] = 0
    
    # iFFT to receive real time signals after the filtering
    t_grav_signal = ifft(f_grav_signal).real
    t_body_signal = ifft(f_body_signal).real
    #Returning body and gravity compontents
    return t_body_signal, t_grav_signal

In [54]:
# Function to preprocessing all filtered compontents of acceleration and gryoscope signals
def signal_processing(data):
    
    # DataFrame for collecting all informations from signals
    time_df = pd.DataFrame()
   
    # Processing first 3 columns from the raw database containing acc signals
    for label in data.columns[0:3]:
        # Using all designed filters
        med_sig = median_filter(data[label])
        butt_sig = butterworth3rd(med_sig)
        body, _ = butterworth(butt_sig)
        # Deleting last row of datas to fit the size with jerk and magnitude datas
        time_df['total_acc_'+label[-1]] = butt_sig # total signal
        time_df['body_acc_'+label[-1]] = body # body component

    # Processing next 3 columns from the raw database containing gyro signals: same algorythms but without gravity component
    for label in data.columns[3:6]:
        med_sig = median_filter(data[label])
        butt_sig = butterworth3rd(med_sig)
        time_df['gyro_'+label[-1]] = butt_sig # total signal
        
    # Preparing new time dataframe with stacked columns
    columns = ['total_acc_x', 'total_acc_y', 'total_acc_z',
               'body_acc_x', 'body_acc_y', 'body_acc_z', 
               'gyro_x', 'gyro_y', 'gyro_z',]
    
    new_time_df = time_df[columns]
    
    return new_time_df

In [55]:
# Creating the dictionary included dataframes with filtered signals datas for all experiments 
filtered_dic = {}
# Signal preprocessing for each number of the experiment
filtered_dic = {key: signal_processing(raw_dic[key]) for key in raw_dic.keys()}

In [64]:
# Dataframe after filtering visualisation
filtered_dic['exp01_user01'].head(3)

Unnamed: 0,total_acc_x,total_acc_y,total_acc_z,body_acc_x,body_acc_y,body_acc_z,gyro_x,gyro_y,gyro_z
0,0.911095,-0.093009,0.509775,0.479343,-0.268985,-0.211078,-0.012521,0.000269,-0.030992
1,0.909651,-0.093543,0.513887,0.466571,-0.26282,-0.201204,-0.018848,0.030431,-0.029185
2,0.884484,-0.08523,0.513767,0.430114,-0.247785,-0.195519,-0.031853,0.257246,-0.001601


Receiving 9 features for each experiment file: total and body accelerometer signals, gyroscope signals

# Using fixed-width sliding windows on signals
- windowing process
- each sample: 128 time steps (2,56 s) and 50% overlapping
- analyzing only first six activities without postural transitions

In [67]:
# Describing indexes of activities in the dictionary
act_numbers = {1: 'WALKING',           
2: 'WALKING_UPSTAIRS',  
3: 'WALKING_DOWNSTAIRS',
4: 'SITTING',           
5: 'STANDING',          
6: 'LAYING',            
}

In [68]:
# Windowing signals with a span of 2.56 sec (2.56sec × 50Hz = 128 cycles) and an overlap of 50%.
def signal_windowing(data_dic, labels):
    # Creating a dictionary for each window with a keys: 'expAA_us_BB_act_C_D' where AA - experiment id, BB - user id, 
    # C - activity id, D - window id
    win_dic = {}
    # Using only activities with index 1 - 6, postural transitions are not included
    sorted_labels =np.array(labels[labels['Act_ID'] < 7])
    # Window id, first is 0
    win_id = 0
    # Taking each sample from labels
    for sample in sorted_labels:
        # When id numbers are less then 10, add 0 before 
        if sample[0] < 10:
            key_exp = '0' + str(sample[0])
        else:
            key_exp = str(sample[0])
        if sample[1] < 10:
            key_us = '0' + str(sample[1])
        else:
            key_us = str(sample[1])  
        # Creating keys to use right dataframes for the user and the experiment number
        key = 'exp' + key_exp + '_user' + key_us
        # Start_Point from the sample
        start = sample[3]
        # End_Point
        end = sample[4]
        # Act_ID
        act_id = str(sample[2])
        # Windowing a signal described by the sample (from startpoint to endpoint), where the step is 128/2 = 64
        # bc the overlap is 50%
        for start_point in range(start,end - 127,64):
            # Window size 128 cycles
            end_point = start_point + 127
            # Creating a key for the windows dictionary
            key_win = key + '_act' + act_id + '_' + str(win_id)
            # Windowing the signal data
            win_dic[key_win] = data_dic[key][start_point:end_point + 1]
            # Increasing the window id
            win_id+=1
    # Returning the windowing time signal dicionary        
    return win_dic

In [69]:
# Windowing the time signal
win_dic = signal_windowing(filtered_dic,labels)

In [80]:
# Checking the shape of one sample
win_dic['exp01_user01_act1_100'].shape

(128, 9)

Each window has 9 features. 