In [None]:
import tensorflow as tf
import numpy as np
import os
import glob
import pandas as pd
print("TensorFlow version:", tf.__version__)

In [None]:
from google.colab import drive
drive.mount('/content/drive')
data_dir = '/content/drive/MyDrive/training-validation/'

In [None]:
# list directories
!ls "$data_dir"
# counter number of files
!find "$data_dir" -name '*.csv' -type f | wc -l

In [51]:
csv_files = glob.glob(data_dir + "/**/*.csv")

In [52]:
data_list = []
for f in csv_files:
  write = True
  data = pd.read_csv(f, header=None)
  del data[data.columns[0]]
  data = data.values.tolist()
  for i in range(len(data)):
    instance = data[i]
    if instance[3] == "#":
      write = False
      break
    if instance[2] == "--1":
      write = False
      break
  if write:
    data_list.append(data)

In [53]:
from sklearn.model_selection import train_test_split

train, valid = train_test_split(data_list, test_size=0.1)
train_x = train
train_y = []
for i in range(len(train_x)):
  appending = True
  for j in range(len(train_x[i])):
    instance = train_x[i][j]
    last_value = instance.pop()
    if appending:
      train_y.append(last_value)
      appending = False
    train_x[i][j] = instance
valid_x = valid
valid_y = []
for i in range(len(valid_x)):
  appending = True
  for j in range(len(valid_x[i])):
    instance = valid_x[i][j]
    last_value = instance.pop()
    if appending:
      valid_y.append(last_value)
      appending = False
    valid_x[i][j] = instance

In [54]:
# change to tensors
train_x = tf.convert_to_tensor(train_x, dtype=tf.float32)
train_y = tf.convert_to_tensor(train_y, dtype=tf.float32)
valid_x = tf.convert_to_tensor(valid_x, dtype=tf.float32)
valid_y = tf.convert_to_tensor(valid_y, dtype=tf.float32)

In [55]:
# --- MODEL ---

In [57]:
# GRU NETWORK
initializer = tf.keras.initializers.GlorotUniform(seed=123)
# initializer = tf.keras.initializers.RandomUniform(seed=123)
# initializer = tf.keras.initializers.GlorotNormal(seed=123)
# reg = tf.keras.regulizers.L1()
# reg = tf.keras.regularizers.L1L2()

# GRU NETWORK
model = tf.keras.models.Sequential([
    tf.keras.layers.GRU(units=60, return_sequences = True, kernel_initializer=initializer),
    tf.keras.layers.LayerNormalization(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.GRU(units=60, return_sequences = True, kernel_initializer=initializer),
    tf.keras.layers.LayerNormalization(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.GRU(units=60, kernel_initializer=initializer),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(3)
])

# parameters
# loss_fn = tf.keras.losses.MeanSquaredError()
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# opt = tf.keras.optimizers.SGD(momentum=0.9)
# opt = tf.keras.optimizers.RMSprop()
# opt = tf.keras.optimizers.Adamax()
# opt = tf.keras.optimizers.Adagrad()
opt = tf.keras.optimizers.Adam()
# batch_size_val = 5
# batch_size_val = 10
batch_size_val = 15
# batch_size_val = 20

In [None]:
# save model
checkpoint_path = "/content/drive/MyDrive/savedmodel/training_model.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights for best model (best = highest train accuracy)
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path, 
    verbose=1, 
    save_weights_only=True,
    save_best_model=True)

# compile model
model.compile(optimizer=opt,
              loss=loss_fn,
              metrics=['accuracy'])

# fit model
epochs = 100
history = model.fit(train_x, train_y, 
                    batch_size = batch_size_val,
                    epochs = epochs,
                    callbacks=[cp_callback],
                    validation_data=(valid_x,valid_y))

In [None]:
import matplotlib.pyplot as plt

# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [60]:
# TEST

In [None]:
# --- TEST SCRIPT CELL ---
import json

# model
initializer = tf.keras.initializers.GlorotUniform(seed=123)
model = tf.keras.models.Sequential([
    tf.keras.layers.GRU(units=60, return_sequences = True, kernel_initializer=initializer),
    tf.keras.layers.LayerNormalization(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.GRU(units=60, return_sequences = True, kernel_initializer=initializer),
    tf.keras.layers.LayerNormalization(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.GRU(units=60, kernel_initializer=initializer),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(3)
])
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
opt = tf.keras.optimizers.Adam()
batch_size_val = 15
model.compile(optimizer=opt,
              loss=loss_fn,
              metrics=['accuracy'])

# load model weights
checkpoint_path = "/content/drive/MyDrive/savedmodel/training_model.ckpt"
model.load_weights(checkpoint_path)



# testing
dictionary = {}
test_directory = "/content/drive/MyDrive/training-validation"
list_files = csv_files = glob.glob(test_directory + "/**/*.csv")
class_names = [0, 1, 2]

data_list = []
for file in csv_files:
  write = True
  file_name = file.split('/')[-1]
  data = pd.read_csv(file, header=None)
  del data[data.columns[0]]
  data = data.values.tolist()
  for i in range(len(data)):
    instance = data[i]
    if instance[3] == "#":
      print("Skipping file as contains unsupported character")
      write = False
      break
    if instance[2] == "--1":
      print("Skipping file as contains unsupported character")
      write = False
      break
  if write:
      for j in range(len(data)):
        # if testing data contains last column as in training dataset
        instance = data[j]
        last_value = instance.pop()
        data[j] = instance
      data = tf.convert_to_tensor(data, dtype=tf.float32)
      data = tf.expand_dims(data, 0)
      predictions = model.predict(data)
      score = tf.nn.softmax(predictions[0])
      prediction_class = class_names[np.argmax(score)]
      prediction_class_t = predictions.argmax(axis=-1)
      dictionary[file_name] = prediction_class

# Writing to prediction.json file
json_object = json.dumps(dictionary, indent=2)
with open("/content/drive/MyDrive/prediction.json", "w+") as outfile:
    outfile.write(json_object)