This code is basically the same as model_training.py except written in a jupyter notebook so it can work with google colab.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [4]:
import tensorflow as tf
from tensorflow import keras
from keras import layers

from sklearn.preprocessing import MinMaxScaler


In [5]:
#seed allows for exact repeateability
SEED = 1337
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [6]:
#Change file names in the following two
cook_df = pd.read_csv('./samples/cook.csv')
flick_df = pd.read_csv('./samples/flickup.csv')

#getting rid of last 5 elements from cook_df
flick_df = flick_df[0:len(flick_df)-10]

#since one second is about 118 rows, im getting rid of the extra rows that would make it evenly divisible
cook_df = cook_df[0:len(cook_df)-len(cook_df)%118]
flick_df = flick_df[0:len(flick_df)-len(flick_df)%118]

print(cook_df.info())
print(flick_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5782 entries, 0 to 5781
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Time    5782 non-null   int64  
 1   Ax      5782 non-null   float64
 2   Ay      5782 non-null   float64
 3   Az      5782 non-null   float64
 4   Gx      5782 non-null   float64
 5   Gy      5782 non-null   float64
 6   Gz      5782 non-null   float64
dtypes: float64(6), int64(1)
memory usage: 316.3 KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5782 entries, 0 to 5781
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Time    5782 non-null   int64  
 1   Ax      5782 non-null   float64
 2   Ay      5782 non-null   float64
 3   Az      5782 non-null   float64
 4   Gx      5782 non-null   float64
 5   Gy      5782 non-null   float64
 6   Gz      5782 non-null   float64
dtypes: float64(6), int64(1)
memory usage: 316.3 KB
None


In [7]:
#scaling this puts all the values between 0 and 1
cook_df[['Ax','Ay','Az']] = (cook_df[['Ax','Ay','Az']] + 4)/8
flick_df[['Ax','Ay','Az']] = (flick_df[['Ax','Ay','Az']] + 4)/8

cook_df[['Gx','Gy','Gz']] = (cook_df[['Gx','Gy','Gz']] + 2000)/4000
flick_df[['Gx','Gy','Gz']] = (flick_df[['Gx','Gy','Gz']] + 2000)/4000

cook_df['Class'] = 'cook'
flick_df['Class'] = 'flick'

In [8]:
#one hot encoding manually
cook_df['Class'] = 0
flick_df['Class'] = 1

print(flick_df.head())
print(flick_df.info())

   Time        Ax        Ay        Az        Gx        Gy        Gz  Class
0     2  0.486405  0.478210  0.671585  0.497223  0.510376  0.499649      1
1    10  0.493500  0.474228  0.670349  0.496780  0.515778  0.498718      1
2    18  0.492096  0.476258  0.679122  0.496170  0.519775  0.497681      1
3    26  0.488556  0.478942  0.683990  0.496490  0.525742  0.496445      1
4    34  0.481857  0.476761  0.675095  0.497879  0.531738  0.495636      1
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5782 entries, 0 to 5781
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Time    5782 non-null   int64  
 1   Ax      5782 non-null   float64
 2   Ay      5782 non-null   float64
 3   Az      5782 non-null   float64
 4   Gx      5782 non-null   float64
 5   Gy      5782 non-null   float64
 6   Gz      5782 non-null   float64
 7   Class   5782 non-null   int64  
dtypes: float64(6), int64(2)
memory usage: 361.5 KB
None


In [9]:
def formatData(X,Y, window):
    Xarr = []
    Yarr = []

    for i in range(0, window*(len(X)//window), int(window)):
        Xarr.append(X.iloc[i:i+window].values)  
        Yarr.append(Y.iloc[0])

    return np.array(Xarr), np.array(Yarr)

In [10]:
window = 119
cook_x, cook_y = formatData(cook_df[['Ax', 'Ay', 'Az', 'Gx', 'Gy', 'Gz']], cook_df[['Class']], window)
flick_x, flick_y = formatData(flick_df[['Ax', 'Ay', 'Az','Gx', 'Gy', 'Gz']], flick_df[['Class']], window)


print(flick_x[:4])
print(cook_y[:4])

[[[0.486405   0.47821    0.671585   0.4972229  0.51037598 0.49964905]
  [0.4935     0.4742275  0.67034875 0.49678039 0.51577759 0.49871826]
  [0.49209625 0.4762575  0.6791225  0.49617005 0.51977539 0.49768066]
  ...
  [0.59964    0.4995875  0.44567875 0.50679016 0.4851532  0.50369263]
  [0.59732    0.49827625 0.4426425  0.50509644 0.4851532  0.50369263]
  [0.59516875 0.49466    0.44278    0.5040741  0.47891235 0.50323486]]

 [[0.59375    0.48991375 0.4458925  0.50413513 0.47233582 0.50358581]
  [0.58781375 0.48468    0.44551125 0.50570679 0.46484375 0.50468445]
  [0.5791625  0.47847    0.44493125 0.50782776 0.45605469 0.50683594]
  ...
  [0.5022425  0.4968875  0.64238    0.50941467 0.45953369 0.50071716]
  [0.49321    0.49180625 0.6428075  0.50769043 0.4621582  0.49813843]
  [0.48400875 0.48756375 0.6454925  0.50474548 0.46562195 0.49653625]]

 [[0.476395   0.48381    0.65332    0.50108337 0.46910095 0.4961853 ]
  [0.47062625 0.4798275  0.6634375  0.49812317 0.47268677 0.49678039]
  [0

In [18]:
#concatenating the flick and cook datasets before randomizing
inputs = np.concatenate((cook_x, flick_x))
outputs = np.concatenate((cook_y, flick_y))

print(cook_x.shape)
print(inputs.shape)

(48, 119, 6)
(96, 119, 6)


In [19]:
inputs.reshape(96, 119*6)
print(inputs.shape)

(96, 119, 6)


In [20]:
num_inputs = len(inputs)
randomize = np.arange(num_inputs)
np.random.shuffle(randomize)

# Swap the consecutive indexes (0, 1, 2, etc) with the randomized indexes
inputs = inputs[randomize]
outputs = outputs[randomize]

# Split the recordings (group of samples) into three sets: training, testing and validation
TRAIN_SPLIT = int(0.6 * num_inputs)
TEST_SPLIT = int(0.2 * num_inputs + TRAIN_SPLIT)

inputs_train, inputs_test, inputs_validate = np.split(inputs, [TRAIN_SPLIT, TEST_SPLIT])
outputs_train, outputs_test, outputs_validate = np.split(outputs, [TRAIN_SPLIT, TEST_SPLIT])

print("Data set randomization and splitting complete.")

Data set randomization and splitting complete.


In [21]:
print(inputs_train.shape, inputs_test.shape, inputs_validate.shape)
print(outputs_train.shape)

(57, 119, 6) (19, 119, 6) (20, 119, 6)
(57, 1)


In [23]:
#shape needs to be formatted such that the inputs to each example is in a 1D representation
inputs_train = inputs_train.reshape(57, 119*6)
print(inputs_train.shape)


(57, 714)
