## Real time activity mornitoring simulation

This notebook simulate the real time mornitoring on human activities in the follwing steps:
- Read in the time series data from sensor
- Convert the data in the format can be used by the model
- Load the model
- Classify the activities for the data
- Convert the results back into time series data format
- Display the data and classified activies dynamicly
- Summarize the classified activities

In [201]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import RobustScaler
import tensorflow as tf
from scipy import stats
from collections import deque

In [202]:
#define a function to reading the datasets I collected
def read_in_iPhone_data(path = './01_rawdata/collection/', file = 'z_walk1.csv', merge_wd = True):
    #input: path -> the folder where the collected data was saved
    #       file -> the name of the file which contains the collected data
    #       merge_wd -> Where the data will work with the WIDSM related model. default: True (3 featuers to read in), False (12 features to read in)
    #output: df -> Output of the collected in pandas DataFrame
    
    df = pd.read_csv(path+file)
    df['x_axis']=df['userAcceleration.x']+df['gravity.x']
    df['y_axis']=df['userAcceleration.y']+df['gravity.y']
    df['z_axis']=df['userAcceleration.z']+df['gravity.z']
    if merge_wd:      # only need 3 featuress if work with WISDM datasets
        return df[['x_axis','y_axis','z_axis']]
    else:             # all the features could be used if work with MS datasets
        col = ['attitude.roll', 'attitude.pitch',
       'attitude.yaw', 'gravity.x', 'gravity.y', 'gravity.z',
       'rotationRate.x', 'rotationRate.y',
       'rotationRate.z', 'userAcceleration.x', 'userAcceleration.y',
       'userAcceleration.z']
        return df[col]    

In [203]:
#define a function to scale the features, the scaler need to match with the ones used for building the models
def robustscale_data(df,cols,scaler=RobustScaler()):
    scale_columns = cols
    df.loc[:, scale_columns] = scaler.fit_transform(df[scale_columns].to_numpy())
    return df

In [204]:
def create_dataset_X(X, time_steps=1, step=1):
    '''
    This function creats the timeseries data for model building from the raw datasets.
    X: The input features for building the model
    time_step: The window length (number of samples in the time series) of each of the training datasets
    step: The moving distance for the window to generate the next training datasets.
    The difference from (time_step - step) gives the number of overlapped samples between the two nearby rows of the model building datasets.
    '''
    Xs, ys = [], []
    for i in range(0, len(X) - time_steps, step):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)
    return np.array(Xs)

In [205]:
#define a function to mapping the model's prediciton to activities
def map_preds(pred):
    out = []
    act_dict = {0:'Downstairs', 1:'Jogging', 2:'Sitting', 3:'Standing', 4:'Upstairs',
        5:'Walking'}    
    for i in range(pred.shape[0]):
        out.append(act_dict[np.argmax(pred[i])])
    return out

In [206]:
#Define a function convert the data from the model building format and the corresponding prediction from model to the time series data 
def convert_pred_2_ts(X,preds,step=40):
    # Input: X -> Data in format could be used by the model
    #        preds -> Predicted activies by the model
    #        step -> window shift steps used for creating the model building datasets from time series data
    # Output: df -> A dataframe that have the time series acceleration data cellected from iphone and the predicted activity
    Xs = X[0][:40]
    ys = np.tile(preds[0],(step,1))
    for i in range(1,X.shape[0]):
        if i<X.shape[0]-1:
            Xs = np.concatenate((Xs,X[i][:step]),axis = 0)
            ys = np.concatenate((ys,np.tile(preds[i],(step,1))),axis = 0)
        else:
            Xs = np.concatenate((Xs,X[i]),axis = 0)
            ys = np.concatenate((ys,np.tile(preds[i],(200,1))),axis = 0)
    y = np.array([np.argmax(ys[i]) for i in range(ys.shape[0])])
    df = pd.DataFrame(np.concatenate((Xs,y.reshape(y.shape[0],1)),axis = 1),columns=['x_axis','y_axis','z_axis','act'])
    return df

In [207]:
# Load the model after transfer learning
model = tf.keras.models.load_model('./02_models/1005cb_cnn_md_3lay_epo20_tl.h5')

In [208]:
# Read in the data collect by iPhone and convert it into the format can be used by the model
df = read_in_iPhone_data(file='z_walk2.csv')
df = robustscale_data(df,['x_axis', 'y_axis', 'z_axis'])

TIME_STEPS = 200
STEP = 40
X_add = create_dataset_X(
                df[df.columns[-3:]],
                TIME_STEPS,
                STEP
            )
X_add_rs = np.array(X_add).reshape(X_add.shape[0],X_add.shape[1],X_add.shape[2],1)

In [209]:
# Predict the activities
preds = model.predict(X_add_rs)

In [210]:
#converte the data and corresponding predictions back into time series format
df_rt = convert_pred_2_ts(X_add,preds)

In [211]:
df_rt

Unnamed: 0,x_axis,y_axis,z_axis,act
0,1.088865,-6.149610,-4.971766,5.0
1,1.083402,-6.231863,-5.124143,5.0
2,1.050211,-6.191359,-5.050452,5.0
3,0.944698,-6.136960,-4.969643,5.0
4,0.874740,-5.971427,-5.293442,5.0
...,...,...,...,...
15035,0.469597,-6.164550,-5.020157,1.0
15036,0.455729,-6.222970,-5.043720,1.0
15037,0.395543,-6.330672,-4.810946,1.0
15038,0.418126,-6.347121,-4.902057,1.0


In [217]:
# Plot the real time mornitoring results in another window
plot_realtime(df_rt,visible = 1000,shift=50)

In [213]:
# Define a function that could diplay time-series data dynamicly (Credit to this kaggle post https://www.kaggle.com/c/m5-forecasting-accuracy/discussion/141469)
# The monitoring system will display the time series day and the predicted activities in a seperate window

def plot_realtime(df1,visible = 200, shift = 5):
#   Input: df1 - > The datafram with 4 columns, the first 3 columns are acceleration on x,y,z axes, the 4th column is the activity
    %matplotlib qt5
    plt.ion()

    col = df1.shape[1]
    # initializing deques
    dy = [None for _ in range(col)]
    ah = [None for _ in range(col)]
    l = [None for _ in range(col)]

    colors = ['rosybrown','slategray','green','navy']
   
    for i in range(col):
        dy[i] = deque(np.zeros(visible), visible)
         
    dx=deque(np.zeros(visible), visible)
    data_length = np.linspace(0,df1.shape[0],num=df1.shape[0]+1)

    # plotting
    fig=plt.figure(figsize=(8,4))
    
    label_dict = {0:'x',1:'y',2:'z',3:'activity'}   
    
    for i in range(col):
        ah[i] = fig.add_subplot(4,1,i+1)
        plt.margins(x=0.001)
        if i<3:
            ah[i].set_ylabel("Acceleration", fontsize=12)

        else:
            ah[i].set_ylabel("Activity ID", fontsize=12)

         
        l[i], = ah[i].plot(dx, dy[i], color=colors[i], label=label_dict[i])
        ah[i].legend(loc="upper right", fontsize=10, fancybox=True, framealpha=0.5)
        
        x_data=0
        
    while x_data+visible <= df1.shape[0]: 
        dx.extend(data_length[x_data:x_data+visible])
        for i in range(col):
            dy[i].extend(df1.iloc[:,i].iloc[x_data:x_data+visible])
            l[i].set_ydata(dy[i])
            l[i].set_xdata(dx)
            ah[i].set_ylim(-2+np.min(dy[i]), 2+np.max(dy[i]))
            ah[i].set_xlim(data_length[x_data], data_length[x_data+visible])

# control speed of moving time-series    
        x_data += shift
        
        fig.canvas.draw()
        fig.canvas.flush_events()

In [214]:
#Define a function to output the summary of activies
def act_summary(df,t=1):
#   Input: df -> data frame that have acceleration data and predicted activities in time series format
#          t - > the time range to summarize activities, unit is 'second'
#   Output act -> A dictionary,the keys of the dictionary is the time interval, and the values are the corresponding activies.
    step = 50*t
    act ={}
    act_dict = {0:'Downstairs', 1:'Jogging', 2:'Sitting', 3:'Standing', 4:'Upstairs',
        5:'Walking'}  
    for i in range(0,df.shape[0]-step,step):
        key = str(i/50)+'-'+str((i+step)/50)+' second'
        act[key] = act_dict[int(stats.mode(list(df['act'].iloc[i:i+t*50]))[0][0])] # use the mode of the predictions in the user provided time range as the activies for this time windwo
        
    return act

In [215]:
# Summarize the classfied activies
act = act_summary(df_rt,t=10)
act

{'0.0-10.0 second': 'Walking',
 '10.0-20.0 second': 'Walking',
 '20.0-30.0 second': 'Walking',
 '30.0-40.0 second': 'Walking',
 '40.0-50.0 second': 'Walking',
 '50.0-60.0 second': 'Walking',
 '60.0-70.0 second': 'Walking',
 '70.0-80.0 second': 'Walking',
 '80.0-90.0 second': 'Walking',
 '90.0-100.0 second': 'Walking',
 '100.0-110.0 second': 'Walking',
 '110.0-120.0 second': 'Walking',
 '120.0-130.0 second': 'Walking',
 '130.0-140.0 second': 'Walking',
 '140.0-150.0 second': 'Walking',
 '150.0-160.0 second': 'Walking',
 '160.0-170.0 second': 'Walking',
 '170.0-180.0 second': 'Walking',
 '180.0-190.0 second': 'Walking',
 '190.0-200.0 second': 'Walking',
 '200.0-210.0 second': 'Walking',
 '210.0-220.0 second': 'Walking',
 '220.0-230.0 second': 'Walking',
 '230.0-240.0 second': 'Walking',
 '240.0-250.0 second': 'Walking',
 '250.0-260.0 second': 'Walking',
 '260.0-270.0 second': 'Walking',
 '270.0-280.0 second': 'Walking',
 '280.0-290.0 second': 'Walking',
 '290.0-300.0 second': 'Walking'}

In [229]:
pd.DataFrame(act.values(),index=act.keys(),columns=['Activity']).T

Unnamed: 0,0.0-10.0 second,10.0-20.0 second,20.0-30.0 second,30.0-40.0 second,40.0-50.0 second,50.0-60.0 second,60.0-70.0 second,70.0-80.0 second,80.0-90.0 second,90.0-100.0 second,...,200.0-210.0 second,210.0-220.0 second,220.0-230.0 second,230.0-240.0 second,240.0-250.0 second,250.0-260.0 second,260.0-270.0 second,270.0-280.0 second,280.0-290.0 second,290.0-300.0 second
Activity,Walking,Walking,Walking,Walking,Walking,Walking,Walking,Walking,Walking,Walking,...,Walking,Walking,Walking,Walking,Walking,Walking,Walking,Walking,Walking,Walking
