In [1]:
import pandas as pd
import os
import csv
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential, layers, optimizers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
ROOT_DATA_DIR = '../raw_data/Out_Feature_CSVs'
#ROOT_DATA_DIR = '../raw_data/Out_Feature_CSVs'
DATA_SUB_DIRs = ["Train", "Test"]
CLS_LIST = ["Bad", "Good"]

def get_y(data_path, class_list):
    
    data_dict = {"csv_paths":[], "csv_files":[], "labels":[]}
    for i, clss in enumerate(class_list):
        dir_path = os.path.join(data_path, clss)
        list_csvs = [l for l in os.listdir(dir_path) if l.split(".")[-1]=="csv"]
        print(f"Found {len(list_csvs)} CSVs in {dir_path} Directory")
        data_dict["csv_files"].extend(list_csvs)
        data_dict["labels"].extend([i for k in range(len(list_csvs))])
        data_dict["csv_paths"].extend([dir_path for _ in range(len(list_csvs))])
    total_csvs = len(data_dict["csv_files"])
    total_labels = len(data_dict["labels"])
    print(f"Number of csvs : {total_csvs}")
    print(f"Number of Labels : {total_labels}")
    return data_dict

train_data = pd.DataFrame(get_y(data_path=os.path.join(ROOT_DATA_DIR, DATA_SUB_DIRs[0]),
                               class_list=CLS_LIST))


test_data = pd.DataFrame(get_y(data_path=os.path.join(ROOT_DATA_DIR, DATA_SUB_DIRs[1]),
                               class_list=CLS_LIST))  

Found 65 CSVs in ../raw_data/Out_Feature_CSVs/Train/Bad Directory
Found 65 CSVs in ../raw_data/Out_Feature_CSVs/Train/Good Directory
Number of csvs : 130
Number of Labels : 130
Found 17 CSVs in ../raw_data/Out_Feature_CSVs/Test/Bad Directory
Found 17 CSVs in ../raw_data/Out_Feature_CSVs/Test/Good Directory
Number of csvs : 34
Number of Labels : 34


In [3]:
train_data.head()

Unnamed: 0,csv_paths,csv_files,labels
0,../raw_data/Out_Feature_CSVs/Train/Bad,IMG_9347_03.csv,0
1,../raw_data/Out_Feature_CSVs/Train/Bad,IMG_9336_03.csv,0
2,../raw_data/Out_Feature_CSVs/Train/Bad,54.csv,0
3,../raw_data/Out_Feature_CSVs/Train/Bad,IMG_9451_02.csv,0
4,../raw_data/Out_Feature_CSVs/Train/Bad,IMG_9451_03.csv,0


In [4]:
#create y_train and y_test as float64 type
y_train = train_data['labels']/1.0
y_test = test_data['labels']/1.0

In [5]:
def csv_to_list_of_lists(csv_path):
    df = pd.read_csv(csv_path)
    df.fillna(0, inplace=True)
    list_of_lists = df.iloc[:,1:].values.tolist()
    return list_of_lists

def get_x(dataframe):
  list_of_csv_files = [os.path.join(row['csv_paths'], row["csv_files"]) for index, row in dataframe.iterrows()]
  X_list = [csv_to_list_of_lists(f) for f in list_of_csv_files]
  return X_list

In [6]:
#create X_train and X_test
X_train = get_x(train_data)
X_test = get_x(test_data)

In [7]:
#pad X_train so all arrays are of the same shape
X_train_pad = pad_sequences(X_train, dtype='float32', padding='post', value=-1000)
print(X_train_pad.shape)

#pad X_test so all arrays are of the same shape
X_test_pad = pad_sequences(X_test, dtype='float32', padding='post', value=-1000, maxlen=X_train_pad.shape[1])
print(X_test_pad.shape)

(130, 175, 2048)
(34, 175, 2048)


In [8]:
from keras import optimizers
import itertools

In [9]:
lstm_neurons=[256,128,64]
neurons=[64,32,16]
hyperparams = itertools.product(lstm_neurons,neurons)

In [10]:
def build_model(input_shape):
  rmsprop = optimizers.RMSprop(lr=0.0001)
  model_LSTM = Sequential()
  model_LSTM.add(layers.Masking(mask_value=-1000, input_shape=input_shape))
  model_LSTM.add(layers.LSTM(128, activation='tanh', return_sequences=True))
  model_LSTM.add(layers.Dropout(0.2))
  model_LSTM.add(layers.LSTM(64, activation='tanh'))
  model_LSTM.add(layers.Dense(64, activation='relu'))
  model_LSTM.add(layers.Dense(32, activation='relu'))
  model_LSTM.add(layers.Dense(16, activation='relu'))
  model_LSTM.add(layers.Dense(8, activation='relu'))
  model_LSTM.add(layers.Dense(4, activation='relu'))
  model_LSTM.add(layers.Dense(2, activation='relu'))
  model_LSTM.add(layers.Dense(1, activation='sigmoid'))
  model_LSTM.compile(loss='binary_crossentropy', optimizer=rmsprop,metrics='accuracy')
  return model_LSTM

In [11]:
#Fitting the model to the train set
for i in range(5):
    model = build_model(input_shape=(X_train_pad.shape[1], X_train_pad.shape[2]))
    es = EarlyStopping(patience=20)

    model.fit(X_train_pad, y_train, 
              epochs=500, 
              batch_size=32, 
              verbose=0, 
              callbacks = [es],
              validation_split=0.2,
              shuffle=True)
    print(f'train={model.evaluate(X_train_pad, y_train)[1]}, test= {model.evaluate(X_test_pad, y_test)[1]}')

train=0.9076923131942749, test= 0.6470588445663452
train=0.892307698726654, test= 0.7058823704719543
train=0.5, test= 0.5
train=0.9384615421295166, test= 0.7647058963775635
train=0.9076923131942749, test= 0.6764705777168274


In [12]:
#Evaluate the model on the train set
model.evaluate(X_train_pad, y_train)



[0.42553606629371643, 0.9076923131942749]

In [13]:
#Evaluate the model on the test set
model.evaluate(X_test_pad, y_test)



[0.5786683559417725, 0.6764705777168274]

In [14]:
#predictions of the test set compared to the actuals
model.predict(X_test_pad)

array([[0.4800888 ],
       [0.4800888 ],
       [0.5174011 ],
       [0.5022851 ],
       [0.4800888 ],
       [0.4800888 ],
       [0.4800888 ],
       [0.95042264],
       [0.4800888 ],
       [0.4800888 ],
       [0.4800888 ],
       [0.4800888 ],
       [0.4800888 ],
       [0.6670113 ],
       [0.5424812 ],
       [0.5192611 ],
       [0.5536526 ],
       [0.4800888 ],
       [0.9501583 ],
       [0.7663051 ],
       [0.48369762],
       [0.9516977 ],
       [0.8494618 ],
       [0.91676307],
       [0.5069054 ],
       [0.9512907 ],
       [0.4800888 ],
       [0.4800888 ],
       [0.84109205],
       [0.9304563 ],
       [0.55009866],
       [0.95159733],
       [0.9516989 ],
       [0.935415  ]], dtype=float32)

In [15]:
y_test

0     0.0
1     0.0
2     0.0
3     0.0
4     0.0
5     0.0
6     0.0
7     0.0
8     0.0
9     0.0
10    0.0
11    0.0
12    0.0
13    0.0
14    0.0
15    0.0
16    0.0
17    1.0
18    1.0
19    1.0
20    1.0
21    1.0
22    1.0
23    1.0
24    1.0
25    1.0
26    1.0
27    1.0
28    1.0
29    1.0
30    1.0
31    1.0
32    1.0
33    1.0
Name: labels, dtype: float64

In [16]:
#model.save("my_model")
#model.save_weights("weights.h5")