In [6]:
import glob
import pandas as pd
from tensorflow import keras
import numpy as np
import os 
import matplotlib.pylab as plt
import tensorflow as tf
from tensorflow.keras.layers import TimeDistributed, Conv2D, Conv2DTranspose, MaxPooling2D, AveragePooling2D, BatchNormalization, concatenate, Input, ConvLSTM2D, Reshape, Conv3D, Flatten, LSTM, GRU, Dense,Dropout, Add
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from tensorflow.keras.models import Sequential, load_model
from sklearn.utils import shuffle

In [7]:
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, batch_size=32, dim=(120,120), n_channels=1, n_timesteps = 4, shuffle=True, augment_data = True,
                standardize = False):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_timesteps = n_timesteps 
        self.shuffle = shuffle
        self.augment_data = augment_data  
        self.standardize = standardize 
        self.on_epoch_end() 

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' 
        
        if self.augment_data == True:  # only augment data when training 
            # Initialization
            X = np.empty((self.batch_size*6, 120, 120, 4))
            y = np.empty((self.batch_size*6, 120, 120, 1)) 

            # Generate data
            for i, ID in enumerate(list_IDs_temp):
                data = np.load('./storage/precipitation/train/' + ID)
                # Store sample
                x_data = data[:,:,:4] 
                y_data = data[:,:,-1].reshape((120,120,1)) 
                
                X[i,] = x_data
                y[i] = y_data 
                
                # add 90 degrees rotation 
                X[i+self.batch_size,] = np.rot90(x_data)
                y[i+self.batch_size] = np.rot90(y_data)  
                
                # add 180 degrees rotation 
                X[i+self.batch_size*2,] = np.rot90(np.rot90(x_data)) 
                y[i+self.batch_size*2] = np.rot90(np.rot90(y_data)) 
                
                # add 270 degrees rotation 
                X[i+self.batch_size*3,] = np.rot90(np.rot90(np.rot90(x_data)))
                y[i+self.batch_size*3] = np.rot90(np.rot90(np.rot90(y_data)))  
                
                # add horizontal flip 
                X[i+self.batch_size*4,] = np.fliplr(x_data)
                y[i+self.batch_size*4] = np.fliplr(y_data) 
                
                # add vertical filp 
                X[i+self.batch_size*5,] = np.flipud(x_data) 
                y[i+self.batch_size*5] = np.flipud(y_data)
            
            # shuffle once more to make training harder 
            X,y = shuffle(X,y) 
            return (X, y)
        
        else: 
            # Initialization
            x1 = [] 
            x2 = [] 
            y = [] 

            # Generate data
            for i, ID in enumerate(list_IDs_temp):
                data = np.load('./storage/precipitation/train/' + ID).astype(np.float32) 
                data = np.log(data + 0.01) 
                
                
                if self.standardize:  
                    data = (data - mu)/std
                # for point(58,66)
                x1.append(data[53:63,61:71,:4].reshape((4,10,10,1)))
                x2.append(data[48:68,56:76,:4].reshape((4,20,20,1)))
                y.append(data[53:63,61:71,-1].reshape((10,10,1))) 
            
            x1 = np.asarray(x1)
            x2 = np.asarray(x2)
            y = np.asarray(y)
            return [x1,x2],y


In [8]:
def build_model(): 
    inputs1 = Input((4,10,10,1))
    inputs2 = Input((4,20,20,1)) 
    
    convlstm1 = ConvLSTM2D(32,3,padding='same',return_sequences=True)(inputs1) 
    bn1 = BatchNormalization()(convlstm1) 
    convlstm1 = ConvLSTM2D(32,3,padding='same',return_sequences=False)(bn1)
    bn1 = BatchNormalization()(convlstm1)
    
    convlstm2 = ConvLSTM2D(32,3,padding='same',return_sequences=True)(inputs2) 
    bn2 = BatchNormalization()(convlstm2) 
    maxpool = TimeDistributed(MaxPooling2D((2,2)))(bn2) 
    convlstm2 = ConvLSTM2D(32,3,padding='same',return_sequences=False)(maxpool) 
    bn2 = BatchNormalization()(convlstm2) 
    
    concat = concatenate([bn1,bn2])  
    outputs = Conv2D(1,1,padding='same',activation='relu')(concat) 
    model = Model(inputs=[inputs1,inputs2],outputs=outputs) 
    model.compile(loss='mae',optimizer='adam') 
    
    return model 


In [None]:
train_files = [x for x in os.listdir('./storage/precipitation/train/')] 

def k_fold(k,files):  
    folds = [] 
    fold_size = len(files) // k 
    for i in range(k): 
        if i == k-1:  
            l = files[i*fold_size:] 
        else: 
            l = files[i*fold_size:(i+1)*fold_size]  
        folds.append(l)   
    return folds  

train_files = shuffle(train_files, random_state = 888) # shuffle train files before splitting them into a fold 
train_folds = k_fold(5, train_files)

for i in range(5): 
    print("........ Fold {} Training ........".format(i+1)) 
        
    # split data in train and validations et 
    td = train_folds[:i] + train_folds[i+1:] 
    train_data = [] 
    for j in td: 
        for name in j: 
            train_data.append(name)
    val_data = train_folds[i] 
    
    # create partition dictionary and parameter dictionary 
    partition = {'train':[], 'validation':[]} 
    params_train_gen = {'dim': (120,120),
                    'batch_size': 32,
                    'n_channels': 4,
                    'n_timesteps': 4,
                    'shuffle': True,
                    'augment_data': False,
                    'standardize': False} 

    params_val_gen = {'dim': (120,120), 
                  'batch_size': 32, 
                  'n_channels': 4, 
                  'n_timesteps': 4,
                  'shuffle': True,
                  'augment_data': False,
                  'standardize': False}


    for filename in train_data: 
        partition['train'].append(filename) 
    for filename in val_data: 
        partition['validation'].append(filename)  
        
    
    # Generators
    training_generator = DataGenerator(partition['train'], **params_train_gen)
    validation_generator = DataGenerator(partition['validation'], **params_val_gen) 
    
    # prepare model 
    model = build_model() 

    # conduct training 
    model_path = './storage/convlstm_g3/kfold' + str(i+1) + '/epoch_{epoch:03d}_val_loss_{val_loss:.3f}.h5'
    learning_rate_reduction = ReduceLROnPlateau(monitor = 'val_loss', patience = 2, verbose = 1, factor = 0.8)
    annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x)
    checkpoint = ModelCheckpoint(filepath = model_path, monitor = 'val_loss', verbose = 1, save_best_only = True)
    early_stopping = EarlyStopping(monitor = 'val_loss', patience = 10) 
    history = model.fit_generator(generator = training_generator, validation_data = validation_generator, epochs = 150, callbacks = [checkpoint, early_stopping, learning_rate_reduction])


........ Fold 1 Training ........
Epoch 1/150
Epoch 00001: val_loss improved from inf to 3.87132, saving model to ./storage/convlstm_g3/kfold1/epoch_001_val_loss_3.871.h5
Epoch 2/150
Epoch 00002: val_loss improved from 3.87132 to 3.85091, saving model to ./storage/convlstm_g3/kfold1/epoch_002_val_loss_3.851.h5
Epoch 3/150