In [1]:
!pip install keras

[33mYou are using pip version 9.0.1, however version 9.0.2 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
import keras
import h5py 
import numpy as np

PATH_DATA = 'data/eighth.h5'
PATH_PREDICT_WITHOUT_GT = 'data/pred_students/pred_eighties_from_half_1_without_gt.h5'
PATH_SUBMIT = 'data/submit/pred_eighties_from_half_1_AWESOMEGROUP.h5'
PATH_PREDICT_WITH_GT = 'data/pred_teachers/pred_eighties_from_half_1.h5'

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [4]:
from keras.utils import to_categorical

class DataGenerator(object):
    'Generates data for Keras'
    
    def __init__(self, data, database_size, img_height, img_width, num_classes, num_input_channels, batch_size = 32, perc_test = 0.3, shuffle = True):
        'Initialization'
        self.data = data
        self.database_size = database_size
        self.img_height = img_height
        self.img_width = img_width
        self.num_classes = num_classes
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.num_input_channels = num_input_channels
        self.perc_test = perc_test
        self.perc_train = 1.0 - perc_test

        'Dataset split'
        list_IDs = np.arange(self.database_size)
        self.train_IDs = list_IDs[:int(self.perc_train * self.database_size)]
        self.test_IDs = list_IDs[int(self.perc_train * self.database_size):]

    def frequency_balancing(self):
        'Median frequency balacing'
        # Initialize class weights
        class_weights = dict()

        # Initialize class frequencies
        class_frequencies = np.zeros(num_classes,)

        # For every picture inthe dataset, compute every class frequency
        for i in range(self.database_size):
            y = to_categorical(np.reshape(self.data['TOP_LANDCOVER'][i], (-1, 1)), num_classes=self.num_classes)
            class_frequencies[np.argmax(y)] += 1

        # Compute classes weights
        class_frequencies /= database_size

        for j in range(self.num_classes):
            if class_frequencies[j] != 0:
                class_weights[j] = np.median(class_frequencies) / class_frequencies[j]
            else:
                class_weights[j] = 0

        return class_weights

    def generate_train(self):
        'Generates batches of samples'
        # Infinite loop
        while 1:
            # Generate order of exploration of dataset
            list_IDs = self.train_IDs

            # Generate batches
            imax = int(len(list_IDs)/self.batch_size)
            idxs = np.arange(imax)
            
            if self.shuffle:
                np.random.shuffle(idxs)
                
            for i in idxs:
                # Find list of IDs                
                list_IDs_temp = list_IDs[i*self.batch_size:(i+1)*self.batch_size]

                # Generate data
                X, Y = self.__data_generation(list_IDs_temp)
                yield X, Y

    def generate_test(self):
        'Generates batches of samples'
        # Infinite loop
        while 1:
            # Generate order of exploration of dataset
            list_IDs = self.test_IDs

            # Generate batches
            imax = int(len(list_IDs)/self.batch_size)
            idxs = np.arange(imax)
            
            if self.shuffle:
                np.random.shuffle(idxs)
                
            for i in idxs:
                # Find list of IDs
                list_IDs_temp = list_IDs[i*self.batch_size:(i+1)*self.batch_size]

                # Generate data
                X, Y = self.__data_generation(list_IDs_temp)
                yield X, Y

    def __data_generation(self, list_IDs_temp):
        'Generates data of batch_size samples' # X : (n_samples, v_size, v_size, v_size, n_channels)
        # Initialization
        X = np.zeros((self.batch_size, self.img_height, self.img_width, self.num_input_channels))
        Y = np.zeros((self.batch_size, NUM_CLASSES))

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store image
            X[i, :, :, :] = self.data['S2'][ID]
            # Store label vector
            Y[i, :] = to_categorical(self.data['TOP_LANDCOVER'][ID], num_classes=self.num_classes)

        return X, Y

In [5]:
from keras.models import Model
from keras.layers import Input, Activation, Flatten
from keras.layers import MaxPooling2D, Conv2D, BatchNormalization, UpSampling2D, Dense
from keras import losses

In [6]:
# Pictures dimensions
NB_CHANNELS_INPUTS = 4

# Number of classes
NUM_CLASSES = 23

IMG_HEIGHT = 16
IMG_WIDTH = 16

# Input layer
inputs = Input(shape=(IMG_HEIGHT, IMG_WIDTH, NB_CHANNELS_INPUTS))
conv1 = BatchNormalization()(inputs)

#First,let us build the encoder network
conv1 = Conv2D(filters=16, kernel_size=(2, 2), padding="same")(conv1)
conv1 = BatchNormalization()(conv1)
conv1 = Activation('relu')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2), padding="same")(conv1)

conv2 = Conv2D(filters=32, kernel_size=(2, 2), padding="same")(pool1)
conv2 = BatchNormalization()(conv2)
conv2 = Activation('relu')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2), padding="same")(conv2)

flat = Flatten()(pool2)
predictions = Dense(NUM_CLASSES, activation = "softmax")(flat)

#Finally, let us build the model
model = Model(inputs=inputs, outputs=predictions)

In [None]:
# Parameters
training_data = h5py.File(PATH_DATA, "r")
database_size = training_data['S2'].shape[0]
img_height = training_data['S2'].shape[1]
img_width = training_data['S2'].shape[2]
batch_size = 32
nb_epochs = 5
num_classes = NUM_CLASSES
num_input_channels = NB_CHANNELS_INPUTS
perc_test = 0.3
perc_train = 1.0 - perc_test

# Instanciate generator
data_gen = DataGenerator(data = training_data, database_size = database_size, img_height = img_height, img_width = img_width, num_classes = num_classes, num_input_channels = num_input_channels, batch_size = batch_size, perc_test = perc_test)

# Get class weights balancy based on the training db
class_weight = data_gen.frequency_balancing()
print("class_weight : ", class_weight)

np.save('eighth_weigths.npz', class_weight)

In [None]:
class_weight = np.load('eighth_weigths.npz')

In [7]:
class_weight = {0: 0, 1: 5.452017700884136e-05, 2: 2.2951308798384227e-05, 3: 1.5047512520784376e-05, 4: 3.780170485688905e-05, 5: 9.29195316855603e-06, 6: 1.0, 7: 0, 8: 0, 9: 0, 10: 0.0008384572386808273, 11: 0.0084985835694051, 12: 1.1775534756472127e-05, 13: 0, 14: 0.008368200836820085, 15: 0, 16: 0, 17: 3.0, 18: 0, 19: 0.014527845036319612, 20: 2.4580593621335954e-05, 21: 0, 22: 0}


In [8]:
# Parameters
training_data = h5py.File(PATH_DATA, "r")
database_size = training_data['S2'].shape[0]
img_height = training_data['S2'].shape[1]
img_width = training_data['S2'].shape[2]
batch_size = 128
nb_epochs = 5
num_classes = NUM_CLASSES
num_input_channels = NB_CHANNELS_INPUTS
perc_test = 0.2
perc_train = 1.0 - perc_test

# Instanciate generator
data_gen = DataGenerator(data = training_data, database_size = database_size, img_height = img_height, img_width = img_width, num_classes = num_classes, num_input_channels = num_input_channels, batch_size = batch_size, perc_test = perc_test)

# Create the training and testing generators
train_generator = data_gen.generate_train()
validation_generator = data_gen.generate_test()

#Visualize model
model.summary()

# Compile model
model.compile(loss=losses.categorical_crossentropy, optimizer='adam', metrics = ['categorical_accuracy'])

# Train model on dataset
callback = model.fit_generator(generator=train_generator, class_weight = class_weight, steps_per_epoch=int(perc_train*database_size)//batch_size, epochs=nb_epochs, validation_data=validation_generator, validation_steps=int(0.2*perc_test*database_size//batch_size))


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 16, 16, 4)         0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 16, 16, 4)         16        
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 16, 16, 16)        272       
_________________________________________________________________
batch_normalization_2 (Batch (None, 16, 16, 16)        64        
_________________________________________________________________
activation_1 (Activation)    (None, 16, 16, 16)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 8, 8, 16)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 8, 8, 32)          2080      
__________

  % delta_t_median)


  537/14608 [>.............................] - ETA: 39:05 - loss: 7.6492e-04 - categorical_accuracy: 0.1261

  % delta_t_median)
  % delta_t_median)




  % delta_t_median)


Epoch 2/5
Epoch 3/5
 1659/14608 [==>...........................] - ETA: 29:49 - loss: 1.7020e-04 - categorical_accuracy: 0.3380

  % delta_t_median)




  % delta_t_median)


Epoch 4/5
    3/14608 [..............................] - ETA: 18:36 - loss: 3.9265e-05 - categorical_accuracy: 0.2266

  % delta_t_median)




  % delta_t_median)




  % delta_t_median)


Epoch 5/5
    3/14608 [..............................] - ETA: 23:55 - loss: 1.8949e-05 - categorical_accuracy: 0.4505

  % delta_t_median)




  % delta_t_median)




In [9]:
from keras.models import load_model
model.save('second_model.h5')