In [16]:
import os
import glob
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import matplotlib
import tensorflow as tf

In [9]:
%config Completer.use_jedi = False

In [18]:
batch_size = 32
time_len = 10  # how long each training sample should be, in months
models = ['GFDL-ESM4','IPSL-CM6A-LR','MPI-ESM1-2-HR']  # models for temp, prec, LAI

def gen_data_card():  
    model = np.random.choice(np.array(models))  # which of 3 models to choose from
    
    # MONTHLY PICK
    start_year = np.random.randint(1850,2014+1) # randomly select a start year of a time slice
    start_month = np.random.randint(1,12+1)
    
    end_year = start_year + ((start_month+time_len-1) // 12)
    end_month = (start_month+time_len) % 12
    
    if end_month == 0:
        end_month = 12
        
    month_index_start = (start_year-1850)*12 + start_month  # convert date into index with 01-1850 as 0
    month_index_end = month_index_start + time_len
    
    # select appropriate time slices
    temp = xr.open_mfdataset('near_surface_air_temperature/historical/{}/*.nc'.format(model))
    temp = temp.tas.loc["{}-{}-16".format(start_year, start_month):"{}-{}-16".format(end_year, end_month)]  
    
    prec = xr.open_mfdataset('precipitation_flux/historical/{}/*.nc'.format(model))
    prec = prec.pr.loc["{}-{}-16".format(start_year, start_month):"{}-{}-16".format(end_year, end_month)]  
    
    lai = xr.open_mfdataset('leaf_area_index/historical/{}/*.nc'.format(model))
    lai = np.array(lai.lai)[month_index_start:month_index_end]
          
    # TODO: currently select randomly, but averaging or using only one is also an option
    npp_files = glob.glob('net_primary_production_on_land/historical/**/*.nc', recursive=True) 
    npp = xr.open_mfdataset(np.random.choice(np.array(npp_files)))
    npp = np.array(npp.npp)[month_index_start:month_index_end]
                
    # concatanate data
    inputs = np.array(xr.concat((temp,prec), dim='lat'))  # two maps next to each other
    outputs = np.concatenate((lai,npp), axis=1)
    
    yield(inputs, outputs)

In [19]:
train_ds = tf.data.Dataset.from_generator(gen_data_card,output_types = (tf.float32,tf.float32))

In [20]:
for i in train_ds.take(1):
    print(i)

index: 1853 1863
MPI-ESM1-2-HR 2004 5 2005 3
(<tf.Tensor: shape=(305, 72, 72), dtype=float32, numpy=
array([[[2.29741730e+02, 2.30782852e+02, 2.32843491e+02, ...,
         2.26571136e+02, 2.27270660e+02, 2.28691040e+02],
        [2.51899918e+02, 2.49397278e+02, 2.46977905e+02, ...,
         2.52794281e+02, 2.55649048e+02, 2.54411560e+02],
        [2.47973114e+02, 2.45883698e+02, 2.44359314e+02, ...,
         2.51481918e+02, 2.47000549e+02, 2.47164154e+02],
        ...,
        [0.00000000e+00, 2.43030081e-06, 2.77330400e-06, ...,
         1.46517868e-06, 1.73286833e-06, 1.56242186e-06],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [1.73843864e-06, 2.09214227e-06, 1.88455954e-06, ...,
         8.90273441e-07, 0.00000000e+00, 1.59804267e-06]],

       [[2.29186951e+02, 2.29513275e+02, 2.30194580e+02, ...,
         2.26919373e+02, 2.27798615e+02, 2.28777176e+02],
        [2.47875809e+02, 2.47096481e+02, 2.4

In [6]:
class ConvLSTM(tf.keras.Model):
    def __init__(self, num_filters,months):
        super(ConvLSTM, self).__init__()
        
        
        self.convlstm2D_1 = tf.keras.layers.ConvLSTM2D(filters = num_filters, kernel_size=(3,3),
                                                     padding="same",return_sequences=True)
        
        
        self.bn_1 = tf.keras.layers.BatchNormalization()
        
        self.convlstm2D_2 = tf.keras.layers.ConvLSTM2D(filters = num_filters, kernel_size=(3,3),
                                                     padding="same",return_sequences=True)

        self.bn_2 = tf.keras.layers.BatchNormalization()


        self.convlstm2D_3 = tf.keras.layers.ConvLSTM2D(filters = num_filters, kernel_size=(3,3),
                                                     padding="same",return_sequences=True)

        self.bn_3 = tf.keras.layers.BatchNormalization()
        
        self.convlstm2D_4 = tf.keras.layers.ConvLSTM2D(filters = num_filters, kernel_size=(3,3),
                                                     padding="same",return_sequences=True)

        self.bn_4 = tf.keras.layers.BatchNormalization()
        
        self.conv3d = tf.keras.layers.Conv3D(filters = 2, kernel_size = (3,3,3), 
                                             activation= "relu", padding="same")
        
        #self.bottleneck = tf.keras.layers.Conv2D(filters = months, kernel_size=1,
        #                                        strides = 1, activation = "relu",
        #                                        padding ="same")
        

    def call(self,x,training):
        
        x = self.convlstm2D_1(x)
        x = self.bn_1(x,training)
        x = self.convlstm2D_2(x)
        x = self.bn_2(x,training)
        x = self.convlstm2D_3(x)
        x = self.bn_3(x,training)
        x = self.convlstm2D_4(x)
        x = self.bn_4(x, training)
        x = self.conv3d(x)
        
        # bottleneck (change time_step dim to be channel dimension so we can use the bottleneck)
        #x = tf.transpose(x, [0,4,2,3,1])
        #x = self.bottleneck(x)
        # change back to desired dimensions
        #x = tf.transpose(x, [0,4,2,3,1])
        
        return x

In [7]:
@tf.function
def train_step(model, data, loss_function, optimizer, train_loss_metric, train_acc_metric):
    '''
    Training for one epoch.
    '''
    for img, target in train_ds:
        # forward pass with GradientTape
        with tf.GradientTape() as tape:
            prediction = model(img, training=True)
            tf.keras.layers.Flatten()
            loss = loss_function(target, prediction) + tf.reduce_sum(model.losses)

        # backward pass via GradienTape (auto-gradient calc)
        gradients = tape.gradient(loss_reg, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        # update metrics
        train_loss_metric.update_state(loss)
        train_acc_metric.update_state(target, prediction)

In [8]:
CONV_FILTERS = 40
months = 10

#Shape: None(unspecified) batches, timesteps(in days), 72 (latitudes), 36 (longitudes), 2(temperature&precipitation)
input_shape = (16, months*30.5, 72, 36, 2)

model = ConvLSTM(num_filters = 40, months = 10)

model.build((16,120,72,36,2))
model.summary() # shows number of parameters

Model: "conv_lstm_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv_lst_m2d_4 (ConvLSTM2D)  multiple                  60640     
_________________________________________________________________
batch_normalization_4 (Batch multiple                  160       
_________________________________________________________________
conv_lst_m2d_5 (ConvLSTM2D)  multiple                  115360    
_________________________________________________________________
batch_normalization_5 (Batch multiple                  160       
_________________________________________________________________
conv_lst_m2d_6 (ConvLSTM2D)  multiple                  115360    
_________________________________________________________________
batch_normalization_6 (Batch multiple                  160       
_________________________________________________________________
conv_lst_m2d_7 (ConvLSTM2D)  multiple                  

In [1]:
epochs = 25
learning_rate = 0.0003

loss_function = tf.keras.losses.MSE
optimizer = tf.keras.optimizers.Adam(learning_rate) 

train_acc_metric = tf.keras.metrics.CategoricalAccuracy('train_accuracy')
test_acc_metric = tf.keras.metrics.CategoricalAccuracy('test_accuracy')

train_loss_metric = tf.keras.metrics.Mean('train_loss')
test_loss_metric = tf.keras.metrics.Mean('test_loss')

# initialize the logger for Tensorboard visualization
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = 'logs/gradient_tape/' + current_time + '/train_ResNet'      # defining the log dir
test_log_dir = 'logs/gradient_tape/' + current_time + '/test_ResNet'        # defining the log dir
train_summary_writer = tf.summary.create_file_writer(train_log_dir)  # training logger
test_summary_writer = tf.summary.create_file_writer(test_log_dir)    # test logger

NameError: name 'tf' is not defined

In [None]:
print(f'train_loss: {train_loss:0.4f}, train_acc: {train_acc:0.4f}, test_loss: {test_loss:0.4f}, test_acc: {test_acc:0.4f}')

for epoch in range(epochs):
    print(f'\n[EPOCH] ____________________{epoch}____________________')
    
    # training step with metrics update--------------------------------------------------------
    timer.start()

    train_step(model, train_ds, loss_function, optimizer, train_loss_metric, train_acc_metric)

    # Evaluating training metrics
    train_loss = train_loss_metric.result()
    train_acc = train_acc_metric.result()
    
    with train_summary_writer.as_default():     # logging our metrics to a file which is used by tensorboard
        tf.summary.scalar('loss', train_loss, step=epoch)
        tf.summary.scalar('accuracy', train_acc, step=epoch)

    
    elapsed_time = timer.stop()
    
    print(f'[{epoch}] - Finished Epoch in {elapsed_time:0.2f} seconds - train_loss: {train_loss:0.4f}, train_acc: {train_acc:0.4f}')
    
    # evaluation step with metrics update--------------------------------------------------------
    timer.start()

    eval_step(model, val_ds, loss_function, 
              loss_metric=val_loss_metric, 
              acc_metric=val_acc_metric)

    # Evaluating validation metrics
    val_loss = val_loss_metric.result()
    val_acc = val_acc_metric.result()
    
    with test_summary_writer.as_default():       # logging our metrics to a file which is used by tensorboard
        tf.summary.scalar('loss', val_loss, step=epoch)
        tf.summary.scalar('accuracy', val_acc, step=epoch)
    
    print(f'\n[{epoch}] - Finished evaluation - val_loss: {val_loss:0.4f}, test_accuracy: {test_acc:0.4f}')
    
    # Resetting train and validation metrics-----------------------------------------------------
    train_acc_metric.reset_states()
    val_acc_metric.reset_states()
    train_loss_metric.reset_states()
    val_loss_metric.reset_states()
    
    elapsed_time = timer.stop()
    times.append(elapsed_time)
  
    if epoch%3 == 0:
        print(f'\n[INFO] - Total time elapsed: {np.sum(times)/60:0.4f} min. Total time remaining: {(np.sum(times)/(epoch+1))*(epochs-epoch-1)/60:0.4f} min.')

print(f'[INFO] - Total run time: {np.sum(times)/60:0.4f} min.')