In [9]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

In [15]:
#generate the dataset
def generate_data(numb_samples, seq_length):
  targets = [tf.one_hot(np.random.randint(low=0, high=10, size = 2),10) for i in range(numb_samples)]
  sequences = [tf.one_hot(np.random.randint(low=0,high=10, size= seq_length),10) for i in range(numb_samples)]
  
  ground_truth = []
  for target,seq in zip(targets,sequences):
    ground_truth.append(calculate_target(targets, sequences))

  sequences = tf.data.Dataset.from_tensor_slices(sequences)
  targets = tf.data.Dataset.from_tensor_slices(targets)
  ground_truth = tf.data_Dataset.from_tensor_slices(ground_truth)

  dataset = tf.data.Dataset.zip((sequences, targets, ground_truth))

  dataset = dataset.batch(16)
  dataset = dataset.shuffle(16)
  dataset = dataset.prefetch(8)

  print(dataset)
  return dataset

In [None]:
def calculate_target(target_options, sequence):
  first_target = np.sum(sequence == target_options[0])
  second_target = np.sum(sequence == target_options[1])

  #if the first target number appears more often or equally often as the second we will return 0
  larger_sum = 0
  if first_target < second_target:
    larger_sum = 1

  return larger_sum

In [13]:
#LSTM Cell
class LSTMCell(tf.keras.layers.Layer):
  def __init__(self, units):
    super(LSTMCell, self).__init__()

    self.cell_state = tf.zeros(shape= (BATCHSIZE, units=500))
    self.hidden_state = tf.zeros(shape = (BATCHSIZE, units=500))

    #gates as dense layers with respective activation function
    self.forget_gate = tf.keras.layers.Dense(units = 2 , activation = tf.keras.activations.sigmoid, bias_initializer='ones')
    self.input_gate = tf.keras.layers.Dense(units = 2, activation = tf.keras.activations.sigmoid)
    self.output_gate = tf.keras.layers.Dense(units = 2, activation = tf.keras.activations.sigmoid)
    
    #cell state candidates with tanh
    self.candidates = tf.keras.layers.Dense(units = 2, activation = tf.keras.activations.tanh)

    self.tanh = tf.keras.layers.tanh()

  def call(self, input,hidden_state, cell_state):
    #concatenate input and hidden state
    concat_input = tf.concat(hidden_state,input)

    #forget part of cell state
    cell_state = cell_state * self.forget_gate(concat_input)

    #compute update for cell state
    update = self.input_gate(concat_input) * self.candidates(concat_input)
    #use input gate and candidates to update cell state
    cell_state = cell_state + update

    #compute new hidden state
    hidden_state = ouput_gate(concat_input) * self.tanh(cell_State)

    #ouput the hidden state
    return hidden_state

In [None]:
class LSTM(tf.keras.layers.Layer):
  def __init__(self):
    super(LSTM, self).__init__()
    self.read_in = 
    self.cell = LSTMCell()
   
  def call(self, x):
    #inititialize cell_state
    cell_state = tf.zeros()
    hidden_state = tf.zeros()

    #sequence and query concatenation
    hidden_state, cell_state = self.cell(x, hidden_state, cell_state)

    return x

In [None]:
class Model(tf.keras.Model):
  def __init__(self):
    super(Model,self).__init__()
    self.input_layer = #?
    self.lstm = LSTM()
    self.flatten = tf.keras.layers.Flatten()
    #output which of the target numbers is more likely
    self.read_out = tf.keras.layers.Dense(units=1, activation=tf.keras.activations.softmax)

  def call(self, x):
    x = self.input_layer(x)
    x = self.flatten(x)
    x = self.lstm(x)
    x = self.read_out(x)

    return x

In [None]:
def train_step(model, data, queries, target, loss_function, optimizer):
  
  with tf.GradientTape() as tape:
    prediction = model(data,queries)

    #calculate the loss as a sum of the SGD to minimize prediction error and l2 regularization (penalize large weights)
    loss = loss_function(prediction, target)
    #calculate the accuracy by moving along the vector of targets (per input)
    # comparing the correct target value to the class with the highest prediction
    accuracy = np.sum(np.argmax(target, axis=1) == np.argmax(prediction, axis=1)) / target.shape[0]
    #calculate the gradients for the weights with respect to the loss
    gradients = tape.gradient(loss, model.trainable_variables)
  
  #update the weights
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))

  return loss, accuracy

In [None]:
#testing a batch
def test(model, data, loss_function):
  test_losses = []
  test_accuracies = []

  #iterate over image, label tuples in the batch
  for (input,queries, target) in data:
    #compute the prediction for the model (forward pass)
    prediction = model(input,queries)
    #compute the loss of the model with the loss function 
    loss = loss_function(prediction,target)
    
    #compare the real to the predicted label 
    #the predicted label is the category with the highest probability
    accuracy = (np.argmax(prediction, axis=1) == np.argmax(target,axis=1))
  
    #add the computed values to the aggregation lists
    test_losses.append(loss.numpy())
    #before adding the accuracy we take it's mean to 
    test_accuracies.append(np.mean(accuracy))

  t_loss = np.mean(test_losses)
  t_accuracy = np.mean(test_accuracies)

  return t_loss, t_accuracy

In [16]:
#train
tf.keras.backend.clear_session()

#create data
train_data = generate_data(60000,30)
test_data = generate_data(1000,30)

#model
model = Model()

#Define hyperparameters
#How many training epochs do we perform
epochs = 30
#define the learning rate which influences the magnitude with which we update the models parameters
learning_rate = 0.001
loss_function = tf.keras.losses.BinaryCrossEntropy()
optimizer = tf.keras.optimizers.Adam(learning_rate)

running_average_factor = 0.95

steps = []
train_losses = []
train_accuracy = []
test_losses = []
test_accuracy = []

for epoch in range(epochs):
  print(epoch, ". epoch --------------------------------------------------------------------------------")
  steps.append(epoch)


  start = time.time()
  original_loss = 0
  for data,queries,target in train_data:
    train_loss,train_accuracy = train_step(model, data, queries, target, loss_function, optimizer)

    original_loss = running_average_factor * original_loss + (1-running_average_factor) * train_loss
    original_acc = running_average_factor * original_acc + (1-running_average_factor) * train_accuracy

  train_losses.append(original_loss)
  train_accuracy.append(original_acc)

  test_loss,test_accuracy = test(model,test_data, loss_function)
  test_losses.append(test_loss)
  test_accuracies.append(test_accuracy)

  print(f"the training step and test evaluation took {timing(start)} seconds")
  #some sort of plotting
  print("train_loss", np.mean(train_losses))
  print("test_loss", test_loss.numpy())

<PrefetchDataset shapes: ((None, 50, 10), (None, 2, 10)), types: (tf.float32, tf.float32)>


NameError: ignored

In [None]:
import matplotlib.pyplot as plt

#do the visualization
#test loss and training loss
plt.figure()
line1, = plt.plot(train_losses)
line2, = plt.plot(test_losses)
plt.xlabel("Training steps")
plt.ylabel("Loss")
plt.legend((line1,line2),("training","test"))
plt.show()

#test accuracy and training accuracy
plt.figure()
line1, = plt.plot(train_accuracies)
line2, = plt.plot(test_accuracies)
plt.xlabel("Training steps")
plt.ylabel("Test/Training accuracy")
plt.legend((line1,line2),("training","test"))
plt.show()