In [9]:
import tensorflow as tf
import numpy as np 
import math as m

In [10]:
class gru_model(tf.keras.Model):
    def __init__(self, rnn_units, num_layers):
        super(gru_model, self).__init__()
        self.num_layers = num_layers
        self.rnn_units = rnn_units
        self.grus = [tf.keras.layers.GRU(rnn_units[i],
                                   return_sequences=True, 
                                   return_state=True) for i in range(num_layers)]
        
        self.w1 = tf.keras.layers.Dense(10) ## Dense is a linear layer 
        self.w2 = tf.keras.layers.Dense(1) 


    def call(self, inputs, training=False):
        #input shape: (batch_size, seq_length, features)
        assert self.num_layers == len(self.rnn_units)
        x = inputs
        if (len(x.shape) != 3):
            print('what are you doing? this is wrong shape')
        print('input dim ({}, {}, {})'.format(x.shape[0], x.shape[1], x.shape[2]))
        whole_seq = x

        ## RNN stuff
        for i in range(self.num_layers):
            whole_seq, final_s = self.grus[i](whole_seq, training=training)
        ##
        
        ## extra layers
        target = self.w1(final_s)
        target = tf.nn.relu(target) ## just non-linearity 
        target = self.w2(target)
        
        ### The Dense layer right before the sigmoid must be dimension 1. 
        ### We must keep sigmoid for probability in [0, 1]
        target = tf.nn.sigmoid(target)
        
        return target

In [25]:
## if inputs are [x1, x2]
## Dense(x1, x2) == w1x1 + w2x2 

In [11]:
def nll(y, y_pred):
    '''y_pred is a probability value between 0 to 1'''
    bce = tf.keras.losses.BinaryCrossentropy()    
    return bce(y, y_pred)


In [12]:
def build_graph():
    
    @tf.function(experimental_relax_shapes=True)
    def train_step(gru, optimizer, x,y, training=True):

        with tf.GradientTape(persistent=True) as tape:

            μ = gru(x, training) 
            xent = nll(y, μ)
        
        gradients = tape.gradient(xent, gru.trainable_variables)
        optimizer.apply_gradients(zip(gradients, gru.trainable_variables))
        return μ, xent

    tf.keras.backend.set_floatx('float32')
    return train_step

In [13]:
array = np.random.normal(size = (32, 20, 1))
y = np.random.choice([0, 1], size = 32)
y

array([1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 0, 0, 0, 0, 0, 0, 1, 1])

In [14]:
array.shape # batch_size X seq_len (timestamps in time-series) X features

(32, 20, 1)

In [15]:
g_model = gru_model([32], 1) #term in [] is size of hidden layers

In [16]:
g_model(array)
# output shape batch_size X 1 
# outptut is a probability between 0 to 1 
# interpretation is how likely each sequence to be a coral/not coral  

input dim (32, 20, 1)


<tf.Tensor: shape=(32, 1), dtype=float32, numpy=
array([[0.52951884],
       [0.51612014],
       [0.52604854],
       [0.5359983 ],
       [0.5121278 ],
       [0.51458424],
       [0.52550614],
       [0.5353785 ],
       [0.5046424 ],
       [0.54036444],
       [0.5136277 ],
       [0.50333095],
       [0.5187199 ],
       [0.5060967 ],
       [0.5012707 ],
       [0.516319  ],
       [0.51963377],
       [0.5215365 ],
       [0.5102375 ],
       [0.51256204],
       [0.5110576 ],
       [0.5048156 ],
       [0.51682305],
       [0.5292926 ],
       [0.53894913],
       [0.5182917 ],
       [0.5086914 ],
       [0.51529205],
       [0.51711893],
       [0.525114  ],
       [0.5144692 ],
       [0.51757175]], dtype=float32)>

In [14]:
nll(y, g_model(array))

input dim (32, 20, 1)


<tf.Tensor: shape=(), dtype=float32, numpy=0.68439996>

In [15]:
optimizer = tf.keras.optimizers.Adam(3e-4)

In [17]:
with tf.device("/CPU:0"):
    num_epochs = 1
    num_batches = 100
    tr_step = build_graph()
    for epoch in range(num_epochs):
        for batch in range(num_batches):
            array = np.random.normal(size = (32, 20, 1))# fake data
            y = np.random.choice([0, 1], size = 32)  # fake             
            ### array, y  = batcher_fun(data, training = True) shapes: (batch_s, seq_l, features), (batch_s, 1)
            y_pred, xent = tr_step(g_model, optimizer, array, y, training=True)
            
         ## validation set 
         ## test_set 

input dim (32, 20, 1)
