In [9]:
import glob
import pandas as pd
from tensorflow import keras
import numpy as np
import os 
import matplotlib.pylab as plt
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.layers import TimeDistributed, Conv2D, Conv2DTranspose, MaxPooling2D, AveragePooling2D, BatchNormalization, concatenate, Input, ConvLSTM2D, Reshape, Conv3D, Flatten, LSTM, GRU, Dense,Dropout, Add
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from tensorflow.keras.models import Sequential, load_model
from sklearn.utils import shuffle

In [10]:
# mean and standard deviation for train data 
mu = 13.262550318358528
std = 36.12859290913875

In [18]:
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, batch_size=32, dim=(120,120), n_channels=1, n_timesteps = 4, shuffle=True, augment_data = True,
                standardize = False):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_timesteps = n_timesteps 
        self.shuffle = shuffle
        self.augment_data = augment_data  
        self.standardize = standardize 
        self.on_epoch_end() 

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' 
        # Initialization
        X = [] 
        y = []

            
        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            data = np.load('./storage/precipitation/train/' + ID).astype(np.float32) 
            data /= 255.0 
            if self.standardize==True:  
                data = (data - self.mu)/self.std
            for j in range(5): 
                for k in range(5): 
                    partial_x = data[24*j:24*(j+1),24*k:24*(k+1),:4] 
                    partial_y = data[24*j:24*(j+1),24*k:24*(k+1),-1] 
                    X.append(partial_x) 
                    y.append(partial_y)   
        
        X = np.asarray(X).reshape((-1,24,24,4))  
        y = np.asarray(y).reshape((-1,24,24,1)) 
                
        return X,y  


In [19]:
def ssim_loss(y_true, y_pred):
    return 1-tf.reduce_mean(tf.image.ssim(y_true, y_pred, 1))


In [20]:
def build_unet(start_neurons): 
    inputs = Input((24,24,4)) 
    conv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(inputs)
    pool1 = BatchNormalization()(conv1)
    pool1 = MaxPooling2D((2, 2))(pool1)

    conv2 = Conv2D(start_neurons * 2, (3, 3), activation="relu", padding="same")(pool1)
    pool2 = BatchNormalization()(conv2)
    pool2 = MaxPooling2D((2, 2))(pool2)

    convm = Conv2D(start_neurons * 4, (3, 3), activation="relu", padding="same")(pool2)

    deconv2 = Conv2DTranspose(start_neurons * 2, (3, 3), strides=(2, 2), padding="same")(convm)
    uconv2 = concatenate([deconv2, conv2])
    uconv2 = Conv2D(start_neurons * 2, (3, 3), activation="relu", padding="same")(uconv2)
    uconv2 = BatchNormalization()(uconv2)

    deconv1 = Conv2DTranspose(start_neurons * 1, (3, 3), strides=(2, 2), padding="same")(uconv2)
    uconv1 = concatenate([deconv1, conv1])
    uconv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(uconv1)
    uconv1 = BatchNormalization()(uconv1)
    outputs = Conv2D(1, (1,1), padding="same", activation="relu")(uconv1)
    model = Model(inputs=inputs,outputs=outputs) 
    model.compile(loss=ssim_loss,optimizer='adam') 
    return model 


In [21]:
model = build_unet(64)
model.summary()

Model: "model_5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            [(None, 24, 24, 4)]  0                                            
__________________________________________________________________________________________________
conv2d_30 (Conv2D)              (None, 24, 24, 64)   2368        input_6[0][0]                    
__________________________________________________________________________________________________
batch_normalization_20 (BatchNo (None, 24, 24, 64)   256         conv2d_30[0][0]                  
__________________________________________________________________________________________________
max_pooling2d_10 (MaxPooling2D) (None, 12, 12, 64)   0           batch_normalization_20[0][0]     
____________________________________________________________________________________________

In [24]:
# due to time limitations, we will not do k-fold ensemble 
# fix the train and validation sets. 
train_files = [x for x in os.listdir('./storage/precipitation/train/')] 
train_files = shuffle(train_files)
k = int(0.8 * len(train_files)) 
train_data = train_files[:k]
val_data = train_files[k:]

partition = {'train':[], 'validation':[]} 

for filename in train_data: 
    partition['train'].append(filename) 
for filename in val_data: 
    partition['validation'].append(filename)  

cnt = 1 
params_train_gen = {'dim': (120,120),
                    'batch_size': 256,
                    'n_channels': 4,
                    'n_timesteps': 4,
                    'shuffle': True,
                    'augment_data': True}  

params_val_gen = {'dim': (120,120), 
                  'batch_size': 256, 
                  'n_channels': 4, 
                  'n_timesteps': 4,
                  'shuffle': True,
                  'augment_data': False}  
         

training_generator = DataGenerator(partition['train'], **params_train_gen)
validation_generator = DataGenerator(partition['validation'], **params_val_gen) 
model = build_unet(64)
        
model_path = './storage/precip_unet_test/epoch_{epoch:03d}_val_loss_{val_loss:.3f}.h5'
learning_rate_reduction = ReduceLROnPlateau(monitor = 'val_loss', patience = 2, verbose = 1, factor = 0.8)
checkpoint = ModelCheckpoint(filepath = model_path, monitor = 'val_loss', verbose = 1, save_best_only = True)
early_stopping = EarlyStopping(monitor = 'val_loss', patience = 10) 
history = model.fit_generator(generator = training_generator, validation_data = validation_generator, epochs = 200, callbacks = [checkpoint, early_stopping, learning_rate_reduction]) 
cnt += 1 


Epoch 1/200
Epoch 00001: val_loss improved from inf to 0.24590, saving model to ./storage/precip_unet_test/epoch_001_val_loss_0.246.h5
Epoch 2/200
Epoch 00002: val_loss improved from 0.24590 to 0.16305, saving model to ./storage/precip_unet_test/epoch_002_val_loss_0.163.h5
Epoch 3/200
Epoch 00003: val_loss improved from 0.16305 to 0.12710, saving model to ./storage/precip_unet_test/epoch_003_val_loss_0.127.h5
Epoch 4/200
Epoch 00004: val_loss improved from 0.12710 to 0.12416, saving model to ./storage/precip_unet_test/epoch_004_val_loss_0.124.h5
Epoch 5/200
Epoch 00005: val_loss improved from 0.12416 to 0.12250, saving model to ./storage/precip_unet_test/epoch_005_val_loss_0.123.h5
Epoch 6/200
Epoch 00006: val_loss improved from 0.12250 to 0.12096, saving model to ./storage/precip_unet_test/epoch_006_val_loss_0.121.h5
Epoch 7/200
Epoch 00007: val_loss improved from 0.12096 to 0.11829, saving model to ./storage/precip_unet_test/epoch_007_val_loss_0.118.h5
Epoch 8/200
Epoch 00008: val_lo

In [None]:
# code for prediction 
test_path = './storage/precipitation/test'
test_files = sorted(glob.glob(test_path + '/*.npy'))

X_test = []

for file in tqdm(test_files, desc = 'test'):
    data = np.load(file)
    X_test.append(data)

X_test = np.array(X_test).astype(np.float32)

X_test /= 255.0 