In [32]:
import os
import glob
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import matplotlib
import tensorflow as tf
import time

In [33]:
%config Completer.use_jedi = False

In [34]:


class Timer():
    """
    A small class to measure time during training.
    """
    def __init__(self):
        self._start_time = None

    def start(self):
        """
        Start a new timer
        """
        self._start_time = time.perf_counter()

    def stop(self):
        """
        Stop the timer, and report the elapsed time
        """
        if self._start_time is None:
            print(f"Timer is not running. Use .start() to start it")
            return 0
    
        elapsed_time = time.perf_counter() - self._start_time
        self._start_time = None
        
        return elapsed_time

In [35]:
batch_size = 32
models = ['GFDL-ESM4','IPSL-CM6A-LR','MPI-ESM1-2-HR']  # models for temp, prec, LAI
day_len = 300  # for gen_data_card()

MODEL = np.random.choice(np.array(models))
temp_ds = np.array(xr.open_mfdataset('data/near_surface_air_temperature/historical/{}/*.nc'.format(MODEL)).tas)
prec_ds = np.array(xr.open_mfdataset('data/precipitation_flux/historical/{}/*.nc'.format(MODEL)).pr)

def gen_data_card():
    while True:
        output_month_i = np.random.randint(0+day_len//30, (2014-1850+1)*12)  # y_pred timepoint in int

        # month-based metrics
        lai = xr.open_mfdataset('data/leaf_area_index/historical/{}/*.nc'.format(MODEL))

        # compute day index
        try:
            endstamp = lai.indexes['time'].to_datetimeindex()[output_month_i]  # cfttimeindex to datetime
        except:
            endstamp = lai.indexes['time'][output_month_i]
        output_day_i = (endstamp - pd.Timestamp('1850-01-01T12')).days  # output is i-th day in int

        # continue with month-based metrics
        lai = np.array(lai.lai)[output_month_i]
        npp_files = glob.glob('data/net_primary_production_on_land/historical/**/*.nc', recursive=True) 
        npp = xr.open_mfdataset(np.random.choice(np.array(npp_files)))
        npp = np.array(npp.npp)[output_month_i]

        # day-based metrics
        temp = temp_ds[output_day_i-day_len:output_day_i]
        prec = prec_ds[output_day_i-day_len:output_day_i]

        inputs = np.stack((temp,prec), axis=-1)  # two features
        outputs = np.stack((lai,npp), axis=-1)

        yield (inputs, outputs)

In [36]:
BATCH_SIZE = 16

ds = tf.data.Dataset.from_generator(gen_data_card,output_types = (tf.float32,tf.float32))
train_ds = ds.batch(BATCH_SIZE).take(100)

val_ds = ds.batch(BATCH_SIZE).take(100)


In [37]:
a = []
for i in train_ds.take(1):
    print(i[0][0])

tf.Tensor(
[[[[2.3867644e+02 2.3649663e-06]
   [2.3943927e+02 2.2958307e-06]
   [2.4007401e+02 1.3001983e-06]
   ...
   [2.3632162e+02 0.0000000e+00]
   [2.3684244e+02 2.0750745e-06]
   [2.3722217e+02 2.4921312e-06]]

  [[2.4997867e+02 0.0000000e+00]
   [2.4988678e+02 0.0000000e+00]
   [2.5026993e+02 0.0000000e+00]
   ...
   [2.4791400e+02 0.0000000e+00]
   [2.4865665e+02 0.0000000e+00]
   [2.4922894e+02 0.0000000e+00]]

  [[2.6843695e+02 4.7932292e-05]
   [2.7041687e+02 5.4295335e-05]
   [2.6982816e+02 9.3458366e-05]
   ...
   [2.5985541e+02 3.9080294e-07]
   [2.5722461e+02 6.8657118e-07]
   [2.6226096e+02 7.1367213e-06]]

  ...

  [[2.4889444e+02 0.0000000e+00]
   [2.4881516e+02 0.0000000e+00]
   [2.4889247e+02 0.0000000e+00]
   ...
   [2.4671407e+02 0.0000000e+00]
   [2.4815601e+02 0.0000000e+00]
   [2.4851987e+02 0.0000000e+00]]

  [[2.4597534e+02 0.0000000e+00]
   [2.4625250e+02 0.0000000e+00]
   [2.4643192e+02 1.3536102e-06]
   ...
   [2.4510458e+02 0.0000000e+00]
   [2.4520667e+

In [38]:
class ConvLSTM(tf.keras.Model):
    def __init__(self, num_filters):
        super(ConvLSTM, self).__init__()
        
        
        self.convlstm2D_1 = tf.keras.layers.ConvLSTM2D(filters = num_filters, kernel_size=(3,3),
                                                     padding="same",return_sequences=True)
        
        
        self.bn_1 = tf.keras.layers.BatchNormalization()
        
        self.convlstm2D_2 = tf.keras.layers.ConvLSTM2D(filters = num_filters, kernel_size=(3,3),
                                                     padding="same",return_sequences=True)

        self.bn_2 = tf.keras.layers.BatchNormalization()


        self.convlstm2D_3 = tf.keras.layers.ConvLSTM2D(filters = num_filters, kernel_size=(3,3),
                                                     padding="same",return_sequences=True)

        self.bn_3 = tf.keras.layers.BatchNormalization()
        
        self.convlstm2D_4 = tf.keras.layers.ConvLSTM2D(filters = num_filters, kernel_size=(3,3),
                                                     padding="same",return_sequences=True)

        self.bn_4 = tf.keras.layers.BatchNormalization()
        
        self.conv3d = tf.keras.layers.Conv3D(filters = 2, kernel_size = (3,3,3), 
                                             activation= "relu", padding="same")
        
        

    def call(self,x,training):
        
        x = self.convlstm2D_1(x,training= training)
        x = self.bn_1(x,training = training)
        x = self.convlstm2D_2(x,training = training)
        x = self.bn_2(x,training = training)
        x = self.convlstm2D_3(x,training = training)
        x = self.bn_3(x,training = training)
        x = self.convlstm2D_4(x,training = training)
        x = self.bn_4(x, training = training)
        x = self.conv3d(x)
        
        # bottleneck (change time_step dim to be channel dimension so we can use the bottleneck)
        #x = tf.transpose(x, [0,4,2,3,1])
        #x = self.bottleneck(x)
        # change back to desired dimensions
        #x = tf.transpose(x, [0,4,2,3,1])
        
        return x

In [39]:
#@tf.function
def train_step(model, data, loss_function, optimizer, train_loss_metric, train_acc_metric):
    '''
    Training for one epoch.
    '''
    for img, target in train_ds:
        # forward pass with GradientTape
        with tf.GradientTape() as tape:
            prediction = model(img, training=True)
            loss = loss_function(target, prediction) + tf.reduce_sum(model.losses)

        # backward pass via GradienTape (auto-gradient calc)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        # update metrics
        train_loss_metric.update_state(loss)
        train_acc_metric.update_state(target, prediction)
        
        
def eval_step(model, ds, loss_function, loss_metric, acc_metric):
    '''
    Evaluation Loop.
    '''
    for sequence, target in ds:
        # forward pass
        prediction = model(sequence, training=False)
        # update metrics
        loss = loss_function(target, prediction)
        loss_metric.update_state(loss)
        acc_metric.update_state(target, prediction)

In [41]:
months = 10

#Shape: None(unspecified) batches, timesteps(in days), 72 (latitudes), 36 (longitudes), 2(temperature&precipitation)
input_shape = (16, months*30.5, 72, 36, 2)

model = ConvLSTM(num_filters=5)

model.build((16,120,72,36,2))
model.summary() # shows number of parameters

Model: "conv_lstm_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv_lst_m2d_4 (ConvLSTM2D)  multiple                  1280      
_________________________________________________________________
batch_normalization_4 (Batch multiple                  20        
_________________________________________________________________
conv_lst_m2d_5 (ConvLSTM2D)  multiple                  1820      
_________________________________________________________________
batch_normalization_5 (Batch multiple                  20        
_________________________________________________________________
conv_lst_m2d_6 (ConvLSTM2D)  multiple                  1820      
_________________________________________________________________
batch_normalization_6 (Batch multiple                  20        
_________________________________________________________________
conv_lst_m2d_7 (ConvLSTM2D)  multiple                  

In [42]:
import datetime

In [43]:
epochs = 30
learning_rate = 0.0003
model = ConvLSTM(num_filters = 10)
loss_function = tf.keras.losses.MSE
optimizer = tf.keras.optimizers.Adam(learning_rate) 

timer = Timer()

train_acc_metric = tf.keras.metrics.CategoricalAccuracy('train_accuracy')
val_acc_metric = tf.keras.metrics.CategoricalAccuracy('val_accuracy')

train_loss_metric = tf.keras.metrics.Mean('train_loss')
val_loss_metric = tf.keras.metrics.Mean('val_loss')

# initialize the logger for Tensorboard visualization
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = 'logs/gradient_tape/' + current_time + '/train_ConvLSTM'    
val_log_dir = 'logs/gradient_tape/' + current_time + '/val_ConvLSTM'       
train_summary_writer = tf.summary.create_file_writer(train_log_dir)  
val_summary_writer = tf.summary.create_file_writer(val_log_dir)


times = []

In [44]:
for epoch in range(epochs):
    print(f'\n[EPOCH] ____________________{epoch}____________________')
    
    # training step with metrics update--------------------------------------------------------
    timer.start()

    train_step(model, train_ds, loss_function, optimizer, train_loss_metric, train_acc_metric)

    # Evaluating training metrics
    train_loss = train_loss_metric.result()
    train_acc = train_acc_metric.result()
    
    with train_summary_writer.as_default():     # logging our metrics to a file which is used by tensorboard
        tf.summary.scalar('loss', train_loss, step=epoch)
        tf.summary.scalar('accuracy', train_acc, step=epoch)

    
    elapsed_time = timer.stop()
    
    print(f'[{epoch}] - Finished Epoch in {elapsed_time:0.2f} seconds - train_loss: {train_loss:0.4f}, train_acc: {train_acc:0.4f}')
    
    # evaluation step with metrics update--------------------------------------------------------
    timer.start()

    eval_step(model, val_ds, loss_function, 
              loss_metric=val_loss_metric, 
              acc_metric=val_acc_metric)

    # Evaluating validation metrics
    val_loss = val_loss_metric.result()
    val_acc = val_acc_metric.result()
    
    with val_summary_writer.as_default():       # logging our metrics to a file which is used by tensorboard
        tf.summary.scalar('loss', val_loss, step=epoch)
        tf.summary.scalar('accuracy', val_acc, step=epoch)
    
    #print(f'\n[{epoch}] - Finished evaluation - val_loss: {val_loss:0.4f}, val_accuracy: {val_acc:0.4f}')
    
    # Resetting train and validation metrics-----------------------------------------------------
    train_acc_metric.reset_states()
    val_acc_metric.reset_states()
    train_loss_metric.reset_states()
    val_loss_metric.reset_states()
    
    elapsed_time = timer.stop()
    times.append(elapsed_time)
  
    if epoch%3 == 0:
        print(f'\n[INFO] - Total time elapsed: {np.sum(times)/60:0.4f} min. Total time remaining: {(np.sum(times)/(epoch+1))*(epochs-epoch-1)/60:0.4f} min.')

print(f'[INFO] - Total run time: {np.sum(times)/60:0.4f} min.')


[EPOCH] ____________________0____________________


ResourceExhaustedError: OOM when allocating tensor with shape[16,36,72,10] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:Minimum]