In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("../")

In [8]:
from pathlib import Path
import xarray as xa
import numpy as np
import math as m
import tensorflow as tf

#issues with numpy deprecation in pytorch_env
from coralshift.processing import data

## Load in data

In [4]:
# load in xa_array
array_path = Path("/Volumes/MRes Drive/reef_baseline/Allen_atlas/Great-Barrier-Reef-and-Torres-Strait-20230310013521/Processed/coral_climate_1_12.nc")
xa_coral_climate_1_12 = xa.open_dataset(array_path)

In [34]:
# return only parameter values (exclude target and spatial reference)
parameter_names = list(set(list(xa_coral_climate_1_12.data_vars))-set(['spatial_ref', 'coral_algae_1-12_degree']))
xa_coral_climate_1_12_features = xa_coral_climate_1_12[parameter_names]

In [268]:
# create 3D array from xarray dataset variables. Shape: (num_samples, num_parameters, sequence_len)
X = data.xa_ds_to_3d_numpy(xa_coral_climate_1_12_features)
print(f'X shape (num_samples: {X.shape[0]}, num_parameters: {X.shape[1]}, sequence_len: {X.shape[2]})')

100%|██████████| 11/11 [00:00<00:00, 12024.33it/s]


X shape (num_samples: 25619, num_parameters: 11, sequence_len: 336)


#### Remove observations for which there are nan values

These are likely all those containing land. Would be a good thing to investigate, however.

In [269]:
# filter out columns that contain entirely NaN values
col_mask = ~np.all(np.isnan(X), axis=(0,2)) # boolean mask indicating which columns to keep
masked_cols = X[:, col_mask, :] # keep only the columns that don't contain entirely NaN values
masked_cols.shape

(25619, 9, 336)

In [270]:
# filter out all rows which contain any NaN values
row_mask = ~np.any(np.isnan(masked_cols), axis=2) # boolean mask indicating which rows to keep
masked_cols_rows = masked_cols[row_mask[:,0], :, :] # keep only the rows that don't contain any NaN values
masked_cols_rows.shape

(14766, 9, 336)

In [276]:
# filter out all depths which contain any NaN values
depth_mask = ~np.any(np.isnan(masked_cols_rows), axis=(0,1)) # boolean mask indicating which depths to keep
X = masked_cols_rows[:, :, depth_mask] # keep only the depths that don't contain any NaN values
X.shape

(14766, 9, 276)

In [272]:
# create target from coral ground truth. Shape: (num_samples, 1)
# TODO: not sure if this is shuffling the values when reshaping
y_with_nans = np.array(xa_coral_climate_1_12["coral_algae_1-12_degree"].sel(
    time=xa_coral_climate_1_12.time[-1])).reshape(-1, 1)
# remove ys with nan values in other variables
y = y_with_nans[row_mask[:,0]]

print(f'y shape: {y.shape}')

y shape: (14766, 1)


In [252]:
y_with_nans.shape

(25619, 1)

## GRU function definitions 

In [19]:
# Define Gated Recurrent Unit model class in TensorFlow
class gru_model(tf.keras.Model):
    # initialise class instance to define layers of the model
    def __init__(self, rnn_units: list[int], num_layers: int):
        """Sets up a GRU model architecture with multiple layers and dense layers for mapping the outputs of the GRU 
        layers to a desired output shape

        Parameters
        ----------
        rnn_units (list[int]): list containing the number of neurons to use in each layer
        num_layers (int): number of layers in GRU model
        """
        super(gru_model, self).__init__()   # initialise GRU model as subclass of tf.keras.Model
        # store values for later use
        self.num_layers = num_layers    # number of layers in GRU model
        self.rnn_units = rnn_units
        # define model layers: reating new `tf.keras.layers.GRU` layer for each iteration
        self.grus = [tf.keras.layers.GRU(rnn_units[i],  # number (integer) of rnn units/neurons to use in each model layer
                                   return_sequences=True,   # return full sequence of outputs for each timestep
                                   return_state=True) for i in range(num_layers)] # return last hidden state of RNN at end of sequence
        
        # dense layers are linear mappings of RNN layer outputs to desired output shape
        self.w1 = tf.keras.layers.Dense(10) # 10 units
        self.w2 = tf.keras.layers.Dense(1)  # 1 unit (dimension 1 required before final sigmoid function)


    def call(self, inputs: np.ndarray, training: bool=False):
        """Processes an input sequence of data through several layers of GRU cells, followed by a couple of
        fully-connected dense layers, and outputs the probability of an event happening.
        
        Parameters
        ----------
        inputs (np.ndarray): input tensor of shape (batch_size, seq_length, features)
            batch_size - defines the size of the sample drawn from datapoints
            seq_length - number of timesteps in sequence
            features - number of features associated with each datapoint
        training (bool, defaults to False): True if model is in training, False if in inference mode

        Returns
        -------
        target: probability of an event occuring, with shape (batch_size, 1)
        """
        # input shape: (batch_size, seq_length, features)
       
        assert self.num_layers == len(self.rnn_units)

        # check that input tensor has correct shape
        if (len(inputs.shape) != 3):
            print(f"Incorrect shape of input tensor. Expected 3D array. Recieved {len(inputs.shape)}D array.")

        # print('input dim ({}, {}, {})'.format(inputs.shape[0], inputs.shape[1], inputs.shape[2]))
        whole_seq = inputs

        # iteratively passes input tensor to GRU layers, overwritting preceding sequence 'whole_seq'
        for layer_num in range(self.num_layers):
            whole_seq, final_s = self.grus[layer_num](whole_seq, training=training)

        # adding extra layers
        target = self.w1(final_s)   # final hidden state of last layer used as input to fully connected dense layers...
        target = tf.nn.relu(target) # via ReLU activation function
        target = self.w2(target)    # final hidden layer must have dimension 1 
        
        # obtain a probability value between 0 and 1
        target = tf.nn.sigmoid(target)
        
        return target

    
def negative_log_likelihood(y: np.ndarray, y_pred: np.ndarray) -> float:
    """Compute binary cross-entropy loss between ground-truth binary labels and predicted probabilities.
    N.B. Binary cross-entropy loss defined as the negative log likelihood of the binary labels given the predicted
    probabilities
    
    Parameters
    ----------
    y (np.ndarray): true binary labels, where 0 represents the negative class
    y_pred (np.ndarray): predicted labels (as probability value between 0 and 1)

    Returns
    -------
    float: negative log likelihood loss computed using binary cross-entropy loss between 'y' and 'y_pred'
    """
    bce = tf.keras.losses.BinaryCrossentropy()    
    return bce(y, y_pred)


def build_graph():
    
    # compile function as graph using tf's autograph feature: leads to faster execution times, at expense of limitations
    # to Python objects/certain control flow structures (somewhat relaxed by experimental_relax_shapes)
    @tf.function(experimental_relax_shapes=True)
    def train_step(gru: tf.keras.Model, optimizer: tf.keras.optimizers.Optimizer, X: np.ndarray, y: np.ndarray, training: bool=True) -> tuple[np.ndarray, float]:
        """Train model using input `X` and target data `y` by computing gradients of the loss (via 
        negative_log_likelihood)
        
        Parameters
        ----------
        y (np.ndarray): true binary labels, where 0 represents the negative class
        y_pred (np.ndarray): predicted labels (as probability value between 0 and 1)

        Returns
        -------
        float: negative log likelihood loss computed using binary cross-entropy loss between 'y' and 'y_pred'
        """
        with tf.GradientTape(persistent=True) as tape:
            y_pred = gru(X, training) 
            xent = negative_log_likelihood(y, y_pred)
        
        gradients = tape.gradient(xent, gru.trainable_variables)
        optimizer.apply_gradients(zip(gradients, gru.trainable_variables))
        # return predicted output values and loss value
        return y_pred, xent

    # set default float type
    tf.keras.backend.set_floatx('float32')
    return train_step

### Test GRU functions

In [279]:
# initialise GRU model with 32 hidden layers, one GRU unit per layer 
g_model = gru_model([32], 1) # N.B. [x] is number of hidden layers in GRU network

In [280]:
# check that untrained model runs (should output array of non-nan values)
g_model(X[:32])

<tf.Tensor: shape=(32, 1), dtype=float32, numpy=
array([[0.8274484 ],
       [0.8210475 ],
       [0.81479585],
       [0.81394243],
       [0.8120978 ],
       [0.8118105 ],
       [0.8142073 ],
       [0.81293863],
       [0.80910945],
       [0.809272  ],
       [0.81085116],
       [0.80866545],
       [0.8067812 ],
       [0.8091517 ],
       [0.80911815],
       [0.8099028 ],
       [0.812065  ],
       [0.81272423],
       [0.8104135 ],
       [0.8155005 ],
       [0.8172883 ],
       [0.8165895 ],
       [0.830492  ],
       [0.8475363 ],
       [0.85871553],
       [0.86065084],
       [0.8597975 ],
       [0.8632064 ],
       [0.8536532 ],
       [0.8515766 ],
       [0.85629034],
       [0.8505629 ]], dtype=float32)>

In [283]:
# check log likelihood is computable
negative_log_likelihood(y[:32], g_model(X[:32]))

<tf.Tensor: shape=(), dtype=float32, numpy=1.7530972>

In [284]:
# initialize optimiser: will need hyperparameter scan for learning rate and others
# https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adam
optimizer = tf.keras.optimizers.Adam(3e-4)

## Train and test GRU

In [None]:
# define batcher function (by space and time)

In [287]:
with tf.device("/CPU:0"):
    num_epochs = 1
    num_batches = 100
    tr_step = build_graph()
    for epoch in range(num_epochs):
        for batch in range(num_batches):
            ### array, y  = batcher_fun(data, training = True) shapes: (batch_s, seq_l, features), (batch_s, 1)
            y_pred, xent = tr_step(g_model, optimizer, X[:32], y, training=True)
            
         ## validation set 
         ## test_set 

In [288]:
y_pred

<tf.Tensor: shape=(32, 1), dtype=float32, numpy=
array([[0.49866262],
       [0.49958506],
       [0.50092477],
       [0.5012568 ],
       [0.5009623 ],
       [0.5004142 ],
       [0.49979192],
       [0.49927443],
       [0.49924508],
       [0.49662614],
       [0.49602753],
       [0.49631017],
       [0.496643  ],
       [0.49772102],
       [0.4969528 ],
       [0.49577612],
       [0.4951186 ],
       [0.49473235],
       [0.49461022],
       [0.4947742 ],
       [0.4947489 ],
       [0.49511108],
       [0.49604115],
       [0.49770057],
       [0.4991103 ],
       [0.50082684],
       [0.5017571 ],
       [0.50299186],
       [0.504737  ],
       [0.5050446 ],
       [0.5037324 ],
       [0.5030605 ]], dtype=float32)>