In [71]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import RobustScaler
from scipy import stats
from sklearn.preprocessing import OneHotEncoder
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.models import Sequential
from sklearn.linear_model import LogisticRegression

In [72]:
#define a function to reading the datasets I collected
def read_in_iPhone_data(path = './01_rawdata/collection/', file = 'z_walk1.csv', merge_wd = True):
    df = pd.read_csv(path+file)
    df['x_axis']=df['userAcceleration.x']+df['gravity.x']
    df['y_axis']=df['userAcceleration.y']+df['gravity.y']
    df['z_axis']=df['userAcceleration.z']+df['gravity.z']
    if merge_wd:      # only need 3 featuress if work with WISDM datasets
        return df[['x_axis','y_axis','z_axis']]
    else:             # all the features could be used if work with MS datasets
        col = ['attitude.roll', 'attitude.pitch',
       'attitude.yaw', 'gravity.x', 'gravity.y', 'gravity.z',
       'rotationRate.x', 'rotationRate.y',
       'rotationRate.z', 'userAcceleration.x', 'userAcceleration.y',
       'userAcceleration.z', 'x_axis', 'y_axis', 'z_axis']
        return df[col]    

In [73]:
#define a function to scale the features, the scaler need to match with the ones used for building the models
def robustscale_data(df,cols,scaler=RobustScaler()):
    scale_columns = cols
    df.loc[:, scale_columns] = scaler.fit_transform(df[scale_columns].to_numpy())
    return df

In [4]:
df = read_in_iPhone_data()

In [5]:
df.head()

Unnamed: 0,x_axis,y_axis,z_axis
0,0.121612,-0.494126,-0.931396
1,0.125077,-0.517579,-0.988495
2,0.105133,-0.492829,-1.029984
3,0.053482,-0.464905,-0.957855
4,0.02919,-0.445298,-0.990738


In [6]:
df = robustscale_data(df,['x_axis', 'y_axis', 'z_axis'])

In [7]:
df.tail()

Unnamed: 0,x_axis,y_axis,z_axis
13976,1.727623,-3.970266,-2.409552
13977,1.720784,-3.962965,-2.390196
13978,1.598563,-3.953,-2.368218
13979,1.608809,-4.01872,-2.419286
13980,1.569607,-4.032458,-2.359924


In [74]:
def create_dataset_X(X, time_steps=1, step=1):
    '''
    This function is very similar as the above function. The only difference is this function does not need inputting labels
    Using this function to create validation datasets.
    '''
    Xs, ys = [], []
    for i in range(0, len(X) - time_steps, step):
        v = X.iloc[i:(i + time_steps)].values
#        labels = y.iloc[i: i + time_steps]
        Xs.append(v)
#        ys.append(stats.mode(labels)[0][0])
    return np.array(Xs)#, np.array(ys).reshape(-1, 1)

In [9]:
TIME_STEPS = 200
STEP = 40

X_test = create_dataset_X(
    df[df.columns[-3:]],
    TIME_STEPS,
    STEP
)

In [10]:
X_test.shape

(345, 200, 3)

In [18]:
X_test_rs = np.array(X_test).reshape(X_test.shape[0],X_test.shape[1],X_test.shape[2],1)

In [19]:
X_test_rs.shape

(345, 200, 3, 1)

In [12]:
model = tf.keras.models.load_model('./02_models/0926ms_lstm_md.h5')

In [80]:
cnn_model3_ep20 = tf.keras.models.load_model('./02_models/0930cb_cnn_resample200_ep20_3lay_ms2_colab.h5')

In [34]:
model_ep20 = tf.keras.models.load_model('./02_models/0930cb_lstm_resample200_ep20_colab.h5')

In [13]:
pred = model.predict(X_test)

In [45]:
pred.shape

(345, 6)

In [78]:
def map_preds(pred):
    out = []
    act_dict = {0:'Downstairs', 1:'Jogging', 2:'Sitting', 3:'Standing', 4:'Upstairs',
        5:'Walking'}    
    for i in range(pred.shape[0]):
        out.append(act_dict[np.argmax(pred[i])])
    return out        

In [16]:
# map_preds(pred)

In [21]:
# map_preds(cnn_model3_ep20.predict(X_test_rs))

In [40]:
collection_list = ['b_walk1.csv','d_walk1.csv','d_walk2.csv','l_walk1.csv','z_walk1.csv','z_walk1_left.csv','d_jogging1.csv','z_jogging1.csv']
for i, act in enumerate(collection_list[0:1]):
    print(i,act)

0 b_walk1.csv


In [112]:
collection_list = ['b_walk1.csv','d_walk1.csv','d_walk2.csv','l_walk1.csv','z_walk1.csv','z_walk1_left.csv','d_jogging1.csv','z_jogging1.csv']
for i, act in enumerate(collection_list):
    if i>=6:
        act_name = 'Jogging'
    else:
        act_name = 'Walking'
        
    df = read_in_iPhone_data(file=act)
    df = robustscale_data(df,['x_axis', 'y_axis', 'z_axis'])
    TIME_STEPS = 200
    STEP = 40

    X_test = create_dataset_X(
        df[df.columns[-3:]],
        TIME_STEPS,
        STEP
    )
    X_test_rs = np.array(X_test).reshape(X_test.shape[0],X_test.shape[1],X_test.shape[2],1)
    print(act)
#     print("MS lstm model acc:")
#     print(np.mean(np.array(map_preds(model.predict(X_test)))==act_name))
#     print("Combine lstm model acc:")
#     print(np.mean(np.array(map_preds(model_ep20.predict(X_test)))==act_name))
    print("Combine CNN model")
    print(np.mean(np.array(map_preds(cnn_model3_ep20.predict(X_test_rs)))==act_name))
    print("Combine CNN model new")
    print(np.mean(np.array(map_preds(cnn_model_new.predict(X_test_rs)))==act_name))
    print("\n")

b_walk1.csv
Combine CNN model
0.9352941176470588
Combine CNN model new
0.9764705882352941


d_walk1.csv
Combine CNN model
0.7932816537467701
Combine CNN model new
0.937984496124031


d_walk2.csv
Combine CNN model
0.9827160493827161
Combine CNN model new
0.9876543209876543


l_walk1.csv
Combine CNN model
0.7682619647355163
Combine CNN model new
0.8438287153652393


z_walk1.csv
Combine CNN model
0.6463768115942029
Combine CNN model new
0.8695652173913043


z_walk1_left.csv
Combine CNN model
0.7365591397849462
Combine CNN model new
0.9381720430107527


d_jogging1.csv
Combine CNN model
0.0
Combine CNN model new
0.9983136593591906


z_jogging1.csv
Combine CNN model
0.0
Combine CNN model new
0.9858044164037855




In [90]:
# define a function to plot the sensor data of a user provided activitiy
def plot_activity(df,activity = None,sensor = 0):
    sensor_dict = {
        0:'attitude',
        1:'gravity',
        2:'rotationRate',
        3:'userAcceleration',
        4:'axis' # This is the sum of gravity and userAcceleration
    }
    if sensor == 4:
        x ='x_'+sensor_dict[sensor]
        y ='y_'+sensor_dict[sensor]
        z ='z_'+sensor_dict[sensor]
    elif sensor == 0:
        x = sensor_dict[sensor]+'.roll'
        y = sensor_dict[sensor]+'.pitch'
        z = sensor_dict[sensor]+'.yaw'
    else:
        x = sensor_dict[sensor]+'.x'
        y = sensor_dict[sensor]+'.y'
        z = sensor_dict[sensor]+'.z'
    if activity == None:
        data = df[[x,y,z]][1800:2000]
        title = None
    else:
        data = df[df['activity'] == activity][[x,y,z]][1800:2000]
        title = activity
    
    axis = data.plot(figsize=(8, 6), 
                     title=activity)
    axis.legend(loc='lower left',bbox_to_anchor=(1.0,0.5)) 

In [5]:
layer = keras.layers.Dense(3)
layer.build((None, 4))  # Create the weights

print("weights:", len(layer.weights))
print("trainable_weights:", len(layer.trainable_weights))
print("non_trainable_weights:", len(layer.non_trainable_weights))

weights: 2
trainable_weights: 2
non_trainable_weights: 0


In [4]:
layer = keras.layers.BatchNormalization()
layer.build((None, 4))  # Create the weights

print("weights:", len(layer.weights))
print("trainable_weights:", len(layer.trainable_weights))
print("non_trainable_weights:", len(layer.non_trainable_weights))


weights: 4
trainable_weights: 2
non_trainable_weights: 2


In [79]:
cnn_model3_ep20.layers

[<tensorflow.python.keras.layers.convolutional.Conv2D at 0x177d419d7c8>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x177d441ad88>,
 <tensorflow.python.keras.layers.pooling.MaxPooling2D at 0x177d444a208>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x177d4456508>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x177d4456108>,
 <tensorflow.python.keras.layers.pooling.MaxPooling2D at 0x177d4467148>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x177d4472c48>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x177d4485f48>,
 <tensorflow.python.keras.layers.pooling.MaxPooling2D at 0x177d4485d88>,
 <tensorflow.python.keras.layers.core.Flatten at 0x177d449ae08>,
 <tensorflow.python.keras.layers.core.Dense at 0x177d449e048>,
 <tensorflow.python.keras.layers.core.Dropout at 0x177d44ade88>,
 <tensorflow.python.keras.layers.core.Dense at 0x177d44b2508>,
 <tensorflow.python.keras.layers.core.Dropout at 0x177d4495088>]

In [22]:
# cnn_model3_ep20.get_weights()

In [54]:
base_model = cnn_model3_ep20

In [56]:
base_model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_14 (Conv2D)           (None, 200, 3, 128)       1280      
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 200, 3, 64)        73792     
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 100, 3, 64)        0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 100, 3, 64)        36928     
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 100, 3, 32)        18464     
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 50, 3, 32)         0         
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 50, 3, 32)        

In [67]:
base_model.trainable

False

In [82]:
base_model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_14 (Conv2D)           (None, 200, 3, 128)       1280      
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 200, 3, 64)        73792     
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 100, 3, 64)        0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 100, 3, 64)        36928     
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 100, 3, 32)        18464     
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 50, 3, 32)         0         
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 50, 3, 32)        

In [68]:
cnn_model3_ep20.trainable

False

In [84]:
# collection_list = ['b_walk1.csv','d_walk1.csv','d_walk2.csv','l_walk1.csv','z_walk1.csv','z_walk1_left.csv','d_jogging1.csv','z_jogging1.csv']
# for i, act in enumerate(collection_list):
#     if i>=6:
#         act_name = 'Jogging'
#     else:
#         act_name = 'Walking'
        
df = read_in_iPhone_data(file='z_walk1.csv')
df = robustscale_data(df,['x_axis', 'y_axis', 'z_axis'])
TIME_STEPS = 200
STEP = 40

X_add = create_dataset_X(
        df[df.columns[-3:]],
        TIME_STEPS,
        STEP
    )
X_add_rs = np.array(X_add).reshape(X_add.shape[0],X_add.shape[1],X_add.shape[2],1)

In [87]:
X_add_rs_train = X_add_rs[0:280]
X_add_rs_train.shape

(280, 200, 3, 1)

In [None]:
an_array = np.array([1,2,3])
repetitions = 3

repeats_array = np.tile(an_array, (repetitions, 1))

In [None]:
y_add_train = np.tile(np.array([0,0,0,0,0,1]),(280,1))

In [91]:
y_add_train

array([[0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 1],
       ...,
       [0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 1]])

In [100]:
df = read_in_iPhone_data(file='z_jogging1.csv')
df = robustscale_data(df,['x_axis', 'y_axis', 'z_axis'])
TIME_STEPS = 200
STEP = 40

X_add2 = create_dataset_X(
        df[df.columns[-3:]],
        TIME_STEPS,
        STEP
    )
X_add2_rs = np.array(X_add2).reshape(X_add2.shape[0],X_add2.shape[1],X_add2.shape[2],1)

In [103]:
X_add2_rs.shape

(634, 200, 3, 1)

In [105]:
X_add2_rs_train = X_add2_rs[0:480]
X_add2_rs_train.shape

(480, 200, 3, 1)

In [106]:
y_add2_train = np.tile(np.array([0,1,0,0,0,0]),(480,1))

In [107]:
X_add_cb = np.concatenate((X_add_rs_train,X_add2_rs_train),axis = 0 )
y_add_cb = np.concatenate((y_add_train,y_add2_train),axis = 0 )
print(X_add_cb.shape,y_add_cb.shape)

(760, 200, 3, 1) (760, 6)


In [110]:
cnn_model_new = Sequential()
cnn_model_new.add(base_model)
# Add a densely-connected layer with 32 neurons.
cnn_model_new.add(Dense(16, activation='relu'))

# Let's try to avoid overfitting!
cnn_model_new.add(Dropout(0.4))

# Add a final layer with 10 neurons.
cnn_model_new.add(Dense(6, activation='softmax'))

# Compile model
cnn_model_new.compile(loss='categorical_crossentropy',
                    optimizer='adam',
                    metrics=['accuracy'])

In [111]:
# Fit model on training data
history = cnn_model_new.fit(X_add_cb,
                          y_add_cb,
                          batch_size=32,
                          validation_split=0.2,
                          epochs=10,
                          verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [113]:
cnn_model_new.save('./02_models/0930_cnn_add_collect.h5')

In [120]:
cnn_model3_ep20.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_14 (Conv2D)           (None, 200, 3, 128)       1280      
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 200, 3, 64)        73792     
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 100, 3, 64)        0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 100, 3, 64)        36928     
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 100, 3, 32)        18464     
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 50, 3, 32)         0         
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 50, 3, 32)        

In [96]:
cnn_model_new.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_5 (Sequential)    (None, 32)                223280    
_________________________________________________________________
dense_4 (Dense)              (None, 16)                528       
_________________________________________________________________
dropout_1 (Dropout)          (None, 16)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 6)                 102       
Total params: 223,910
Trainable params: 630
Non-trainable params: 223,280
_________________________________________________________________


In [119]:
# cnn_model_new.weights