In [1]:
import pandas as pd
import os
import csv
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential, layers, optimizers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
ROOT_DATA_DIR = '../raw_data/Out_Feature_CSVs'
DATA_SUB_DIRs = ["Train", "Test"]
CLS_LIST = ["Bad", "Good"]

def get_y(data_path, class_list):
    
    data_dict = {"csv_paths":[], "csv_files":[], "labels":[]}
    for i, clss in enumerate(class_list):
        dir_path = os.path.join(data_path, clss)
        list_csvs = [l for l in os.listdir(dir_path) if l.split(".")[-1]=="csv"]
        print(f"Found {len(list_csvs)} CSVs in {dir_path} Directory")
        data_dict["csv_files"].extend(list_csvs)
        data_dict["labels"].extend([i for k in range(len(list_csvs))])
        data_dict["csv_paths"].extend([dir_path for _ in range(len(list_csvs))])
    total_csvs = len(data_dict["csv_files"])
    total_labels = len(data_dict["labels"])
    print(f"Number of csvs : {total_csvs}")
    print(f"Number of Labels : {total_labels}")
    return data_dict

train_data = pd.DataFrame(get_y(data_path=os.path.join(ROOT_DATA_DIR, DATA_SUB_DIRs[0]),
                               class_list=CLS_LIST))


test_data = pd.DataFrame(get_y(data_path=os.path.join(ROOT_DATA_DIR, DATA_SUB_DIRs[1]),
                               class_list=CLS_LIST))  

Found 65 CSVs in /content/drive/MyDrive/Squat_Pro/Out_Feature_CSVs/Train/Bad Directory
Found 65 CSVs in /content/drive/MyDrive/Squat_Pro/Out_Feature_CSVs/Train/Good Directory
Number of csvs : 130
Number of Labels : 130
Found 17 CSVs in /content/drive/MyDrive/Squat_Pro/Out_Feature_CSVs/Test/Bad Directory
Found 17 CSVs in /content/drive/MyDrive/Squat_Pro/Out_Feature_CSVs/Test/Good Directory
Number of csvs : 34
Number of Labels : 34


In [3]:
train_data.head()

Unnamed: 0,csv_paths,csv_files,labels
0,/content/drive/MyDrive/Squat_Pro/Out_Feature_C...,IMG_9364_03.csv,0
1,/content/drive/MyDrive/Squat_Pro/Out_Feature_C...,IMG_9444_01.csv,0
2,/content/drive/MyDrive/Squat_Pro/Out_Feature_C...,74.csv,0
3,/content/drive/MyDrive/Squat_Pro/Out_Feature_C...,IMG_9347_02.csv,0
4,/content/drive/MyDrive/Squat_Pro/Out_Feature_C...,IMG_9336_01.csv,0


In [4]:
#create y_train and y_test as float64 type
y_train = train_data['labels']/1.0
y_test = test_data['labels']/1.0

In [5]:
def csv_to_list_of_lists(csv_path):
    df = pd.read_csv(csv_path)
    df.fillna(0, inplace=True)
    list_of_lists = df.iloc[:,1:].values.tolist()
    return list_of_lists

def get_x(dataframe):
  list_of_csv_files = [os.path.join(row['csv_paths'], row["csv_files"]) for index, row in dataframe.iterrows()]
  X_list = [csv_to_list_of_lists(f) for f in list_of_csv_files]
  return X_list

In [6]:
#create X_train and X_test
X_train = get_x(train_data)
X_test = get_x(test_data)

In [7]:
#pad X_train so all arrays are of the same shape
X_train_pad = pad_sequences(X_train, dtype='float32', padding='post', value=-1000)
print(X_train_pad.shape)

#pad X_test so all arrays are of the same shape
X_test_pad = pad_sequences(X_test, dtype='float32', padding='post', value=-1000, maxlen=X_train_pad.shape[1])
print(X_test_pad.shape)

(130, 175, 2048)
(34, 175, 2048)


In [8]:
def build_model(input_shape):

  model_LSTM = Sequential()

  model_LSTM.add(layers.Masking(mask_value=-1000, input_shape=input_shape))
  model_LSTM.add(layers.LSTM(64, activation='tanh', return_sequences=True))
  model_LSTM.add(layers.Dropout(0.2))
  model_LSTM.add(layers.LSTM(32, activation='tanh'))
  model_LSTM.add(layers.Dense(8, activation='relu'))
  model_LSTM.add(layers.Dense(4, activation='relu'))
  model_LSTM.add(layers.Dense(1, activation='sigmoid'))

  model_LSTM.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=[tf.keras.metrics.BinaryAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
  return model_LSTM

  print(model_LSTM.summary())

model = build_model(input_shape=(X_train_pad.shape[1], X_train_pad.shape[2]))

In [9]:
#Fitting the model to the train set
es = EarlyStopping(patience=100)

model.fit(X_train_pad, y_train, 
          epochs=1750, 
          batch_size=8, 
          verbose=1, 
          callbacks = [es],
          validation_split=0.2,
          shuffle=True)

Epoch 1/1750
Epoch 2/1750
Epoch 3/1750
Epoch 4/1750
Epoch 5/1750
Epoch 6/1750
Epoch 7/1750
Epoch 8/1750
Epoch 9/1750
Epoch 10/1750
Epoch 11/1750
Epoch 12/1750
Epoch 13/1750
Epoch 14/1750
Epoch 15/1750
Epoch 16/1750
Epoch 17/1750
Epoch 18/1750
Epoch 19/1750
Epoch 20/1750
Epoch 21/1750
Epoch 22/1750
Epoch 23/1750
Epoch 24/1750
Epoch 25/1750
Epoch 26/1750
Epoch 27/1750
Epoch 28/1750
Epoch 29/1750
Epoch 30/1750
Epoch 31/1750
Epoch 32/1750
Epoch 33/1750
Epoch 34/1750
Epoch 35/1750
Epoch 36/1750
Epoch 37/1750
Epoch 38/1750
Epoch 39/1750
Epoch 40/1750
Epoch 41/1750
Epoch 42/1750
Epoch 43/1750
Epoch 44/1750
Epoch 45/1750
Epoch 46/1750
Epoch 47/1750
Epoch 48/1750
Epoch 49/1750
Epoch 50/1750
Epoch 51/1750
Epoch 52/1750
Epoch 53/1750
Epoch 54/1750
Epoch 55/1750
Epoch 56/1750
Epoch 57/1750
Epoch 58/1750
Epoch 59/1750
Epoch 60/1750
Epoch 61/1750
Epoch 62/1750
Epoch 63/1750
Epoch 64/1750
Epoch 65/1750
Epoch 66/1750
Epoch 67/1750
Epoch 68/1750
Epoch 69/1750
Epoch 70/1750
Epoch 71/1750
Epoch 72/1750
E

<tensorflow.python.keras.callbacks.History at 0x7f00dc3cd850>

model.save("my_model")
model.save_weights("weights.h5")

In [10]:
#Evaluate the model on the train set
model.evaluate(X_train_pad, y_train)



[1.106017827987671, 0.9461538195610046, 1.0, 0.892307698726654]

In [11]:
#Evaluate the model on the test set
model.evaluate(X_test_pad, y_test)



[4.900697231292725, 0.6470588445663452, 0.692307710647583, 0.529411792755127]

In [12]:
#predictions of the test set compared to the actuals
model.predict(X_test_pad)

array([[3.7534251e-10],
       [4.9826770e-10],
       [4.0444950e-10],
       [8.8692296e-01],
       [9.1654164e-01],
       [3.9266743e-10],
       [9.1654164e-01],
       [3.7250539e-10],
       [9.5891953e-04],
       [7.3855527e-10],
       [4.1249737e-10],
       [4.0279127e-10],
       [9.1654164e-01],
       [6.0222355e-10],
       [6.7143308e-10],
       [1.5096875e-09],
       [5.3026766e-10],
       [9.1654164e-01],
       [5.0497091e-09],
       [3.0936214e-09],
       [9.1654164e-01],
       [1.8534090e-05],
       [9.1654164e-01],
       [9.1654164e-01],
       [9.1654164e-01],
       [4.0378123e-10],
       [4.2068449e-10],
       [3.9142761e-10],
       [9.1654164e-01],
       [1.5284333e-09],
       [9.1654164e-01],
       [9.1654164e-01],
       [4.9069437e-10],
       [9.1654164e-01]], dtype=float32)

In [13]:
y_test

0     0.0
1     0.0
2     0.0
3     0.0
4     0.0
5     0.0
6     0.0
7     0.0
8     0.0
9     0.0
10    0.0
11    0.0
12    0.0
13    0.0
14    0.0
15    0.0
16    0.0
17    1.0
18    1.0
19    1.0
20    1.0
21    1.0
22    1.0
23    1.0
24    1.0
25    1.0
26    1.0
27    1.0
28    1.0
29    1.0
30    1.0
31    1.0
32    1.0
33    1.0
Name: labels, dtype: float64