In [1]:
import glob
import pandas as pd
import numpy as np
import os 
from tensorflow import keras
import matplotlib.pylab as plt
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras import Input, Model 
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, concatenate, Conv2D, Conv2DTranspose, Dropout, AlphaDropout, MaxPooling2D, AveragePooling2D, BatchNormalization, Concatenate, Flatten, Reshape, Add, Activation
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from tensorflow.python.keras.utils.data_utils import Sequence
from tensorflow.keras.models import Sequential, load_model



In [2]:
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, batch_size=32, dim=(120,120), n_channels=4, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size, *self.dim, 1))

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            data = np.load('./storage/precipitation/train/' + ID) 
            # Store sample
            X[i,] = data[:,:,:4]

            # Store class
            y[i] = data[:,:,-1].reshape((120,120,1)) 
        
        return (X, y)


In [3]:
params = {'dim': (120,120),
          'batch_size': 32,
          'n_channels': 4,
          'shuffle': True}

partition = {'train':[], 'validation':[]} 
train_files = [x for x in os.listdir('./storage/precipitation/train')] 
k = int(len(train_files) * 0.8) 
for i in range(0,k): 
    partition['train'].append(train_files[i]) 
for i in range(k,len(train_files)): 
    partition['validation'].append(train_files[i]) 


In [4]:
# Generators
training_generator = DataGenerator(partition['train'], **params)
validation_generator = DataGenerator(partition['validation'], **params)

In [5]:
def base_model(input_layer, start_neurons):
    
    bn = BatchNormalization()(input_layer)
    
    conv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(bn)
    pool1 = BatchNormalization()(conv1)
    pool1 = MaxPooling2D((2, 2))(pool1)

    conv2 = Conv2D(start_neurons * 2, (3, 3), activation="relu", padding="same")(pool1)
    pool2 = BatchNormalization()(conv2)
    pool2 = MaxPooling2D((2, 2))(pool2)

    convm = Conv2D(start_neurons * 4, (3, 3), activation="relu", padding="same")(pool2)

    deconv2 = Conv2DTranspose(start_neurons * 2, (3, 3), strides=(2, 2), padding="same")(convm)
    uconv2 = concatenate([deconv2, conv2])
    uconv2 = Conv2D(start_neurons * 2, (3, 3), activation="relu", padding="same")(uconv2)
    uconv2 = BatchNormalization()(uconv2)

    deconv1 = Conv2DTranspose(start_neurons * 1, (3, 3), strides=(2, 2), padding="same")(uconv2)
    uconv1 = concatenate([deconv1, conv1])
    uconv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(uconv1)
    uconv1 = BatchNormalization()(uconv1)
    output_layer = Conv2D(1, (1,1), padding="same", activation='relu')(uconv1)
    
    return output_layer

input_layer = Input((120, 120, 4))
output_layer = base_model(input_layer,64)


In [6]:
model = Model(input_layer, output_layer)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 120, 120, 4) 0                                            
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 120, 120, 4)  16          input_1[0][0]                    
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 120, 120, 64) 2368        batch_normalization[0][0]        
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 120, 120, 64) 256         conv2d[0][0]                     
______________________________________________________________________________________________

In [7]:
model_path = './storage/precipitation_best/epoch_{epoch:03d}_val_{val_loss:.3f}.h5' 
learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', patience=3, verbose=1, factor=0.8)
checkpoint = ModelCheckpoint(filepath=model_path,monitor='val_loss',verbose=1,save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss',patience=10) 

model.compile(optimize = 'adam', loss = 'mae')
history = model.fit_generator(generator = training_generator, validation_data = validation_generator, epochs = 100, callbacks=[checkpoint, early_stopping, learning_rate_reduction])



Epoch 1/100
Epoch 00001: val_loss improved from inf to 3.31308, saving model to ./storage/precipitation_best/epoch_001_val_3.313.h5
Epoch 2/100
Epoch 00002: val_loss improved from 3.31308 to 2.97901, saving model to ./storage/precipitation_best/epoch_002_val_2.979.h5
Epoch 3/100
Epoch 00003: val_loss did not improve from 2.97901
Epoch 4/100
Epoch 00004: val_loss improved from 2.97901 to 2.93728, saving model to ./storage/precipitation_best/epoch_004_val_2.937.h5
Epoch 5/100
Epoch 00005: val_loss improved from 2.93728 to 2.91145, saving model to ./storage/precipitation_best/epoch_005_val_2.911.h5
Epoch 6/100
Epoch 00006: val_loss improved from 2.91145 to 2.87744, saving model to ./storage/precipitation_best/epoch_006_val_2.877.h5
Epoch 7/100
Epoch 00007: val_loss did not improve from 2.87744
Epoch 8/100
Epoch 00008: val_loss improved from 2.87744 to 2.86083, saving model to ./storage/precipitation_best/epoch_008_val_2.861.h5
Epoch 9/100
Epoch 00009: val_loss did not improve from 2.86083

In [8]:
test_path = './storage/precipitation/test'
test_files = sorted(glob.glob(test_path + '/*.npy'))

X_test = []

for file in tqdm(test_files, desc = 'test'):
    data = np.load(file)
    X_test.append(data)

X_test = np.array(X_test).astype(np.float32)


test: 100%|██████████| 2674/2674 [00:03<00:00, 767.57it/s]


In [10]:
best_model = load_model('./storage/precipitation_best/epoch_025_val_2.826.h5')

In [11]:
best_model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 120, 120, 4) 0                                            
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 120, 120, 4)  16          input_1[0][0]                    
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 120, 120, 64) 2368        batch_normalization[0][0]        
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 120, 120, 64) 256         conv2d[0][0]                     
______________________________________________________________________________________________

In [12]:
predictions = best_model.predict(X_test) 

In [13]:
submission = pd.read_csv('./storage/precipitation/sample_submission.csv') 
submission.iloc[:,1:] = predictions.reshape(-1, 14400).astype(int)
submission.to_csv('./storage/unet_baseline.csv', index = False)

In [14]:
submission.head()

Unnamed: 0,file_name,0,1,2,3,4,5,6,7,8,...,14390,14391,14392,14393,14394,14395,14396,14397,14398,14399
0,test_00000.npy,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,test_00001.npy,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,test_00002.npy,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,test_00003.npy,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,test_00004.npy,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
