## ResNet

In [1]:
from __future__ import division

import six
from keras.models import Model
from keras.layers import Dense, Flatten, Activation, Input
from keras.layers.convolutional import Conv2D, MaxPooling2D, AveragePooling2D
from keras import optimizers
from keras.layers.merge import add
from keras.layers.normalization import BatchNormalization
from keras.regularizers import l2
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from keras import backend as K
K.set_image_dim_ordering('th')
K.set_floatx('float32')

import pandas as pd
import numpy as np
np.random.seed(17)

train_data = np.load('train.npy')
train_target = np.load('train_target.npy')

x_train,x_val_train,y_train,y_val_train = train_test_split(train_data,train_target,test_size=0.25, random_state=32)

Using TensorFlow backend.


Definindo helpers

In [2]:
def _bn_relu(input):
    """Constroi bloco BN -> ReLU"""
    norm = BatchNormalization(axis=CHANNEL_AXIS)(input)
    return Activation('relu')(norm)

def _conv_bn_relu(**conv_params):
    """Constrói bloco Conv -> BN -> ReLU"""
    filters = conv_params['filters']
    kernel_size = conv_params['kernel_size']
    strides = conv_params.setdefault('strides', (1,1))
    kernel_initializer = conv_params.setdefault('kernel_initializer', 'he_normal')
    padding = conv_params.setdefault('padding', 'same')
    kernel_regularizer = conv_params.setdefault('kernel_regularizer', l2(1.e-4))
    
    def f(input):
        conv = Conv2D(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, \
                     kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer)(input)
        return _bn_relu(conv)
    
    return f

def _bn_relu_conv(**conv_params):
    """Constrói bloco BN -> ReLU -> Conv
    Esta eh uma versao melhorada proposta em http://arxiv.org/pdf/1603.05027v2.pdf
    """
    filters = conv_params['filters']
    kernel_size = conv_params['kernel_size']
    strides = conv_params.setdefault('strides', (1,1))
    kernel_initializer = conv_params.setdefault('kernel_initializer', 'he_normal')
    padding = conv_params.setdefault('padding', 'same')
    kernel_regularizer = conv_params.setdefault('kernel_regularizer', l2(1.e-4))
    
    def f(input):
        activation = _bn_relu(input)
        return Conv2D(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, \
                     kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer)(activation)
    
    return f

def _shortcut(input, residual):
    """Adiciona um atalho entre a entrada e o bloco residual e funde os dois com uma soma"""
    # Expandir os canais do atalho para se adequarem ao residual
    # Aplicar stride apropriadamente para se adequar ao residual (largura, altura)
    # Deve ser int se a arquitetura da rede estiver corretamente configurada
    input_shape = K.int_shape(input)
    residual_shape = K.int_shape(residual)
    stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS]))
    stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS]))
    equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS]
    
    shortcut = input
    
    # 1 x 1 conv se o shape for diferente, senão identidade
    if stride_width > 1 or stride_height > 1 or not equal_channels:
        shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS], kernel_size=(1,1), strides=(stride_width, stride_height), \
                         padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(0.0001))(input)
        
    return add([shortcut, residual])

def _residual_block(block_function, filters, repetitions, is_first_layer=False):
    """Constrói um bloco residual repetindo blocos bottleneck ou blocos basicos"""
    def f(input):
        for i in range(repetitions):
            init_strides = (1, 1)
            if i == 0 and not is_first_layer:
                init_strides = (2, 2)
            input = block_function(filters=filters, init_strides=init_strides, \
                                   is_first_block_of_first_layer=(is_first_layer and i == 0))(input)
        return input
    
    return f


def basic_block(filters, init_strides=(1, 1), is_first_block_of_first_layer=False):
    """Blocos de convoluções 3x3 basicas para uso em resnets com menos de 34 camadas.
    Segue o esquema melhorado em http://arxiv.org/pdf/1603.05027v2.pdf
    """
    def f(input):
        if is_first_block_of_first_layer:
            #não repita bn->relu já que fizemos bn->relu->maxpool
            conv1 = Conv2D(filters=filters, kernel_size=(3, 3), strides=init_strides, padding='same', \
                          kernel_initializer='he_normal', kernel_regularizer=l2(1e-4))(input)
        else:
            conv1 = _bn_relu_conv(filters=filters, kernel_size=(3, 3), strides=init_strides)(input)
        
        residual = _bn_relu_conv(filters=filters, kernel_size=(3, 3))(conv1)
        return _shortcut(input, residual)
    
    return f

def bottleneck(filters, init_strides=(1, 1), is_first_block_of_first_layer=False):
    """Arquitetura bottleneck para resnets com mais de 34 camadas
    Segue o esquema melhorado em http://arxiv.org/pdf/1603.05027v2.pdf
    
    Retorna:
        Uma camada conv de filters * 4
        
    """
    def f(input):
        
        if is_first_block_of_first_layer:
            conv_1_1 = Conv2D(filters=filters, kernel_size=(1, 1), strides=init_strides, padding='same', \
                             kernel_initializer='he_normal', kernel_regularizer=l2(1e-4))(input)
        else:
            conv_1_1 = _bn_relu_conv(filters=filters, kernel_size=(3, 3), strides=init_strides)(input)
            
        conv_3_3 = _bn_relu_conv(filters=filters, kernel_size=(3, 3))(conv_1_1)
        residual = _bn_relu_conv(filters=filters * 4, kernel_size=(1,1))(conv_3_3)
        
        return _shortcut(input, residual)
    
    return f

In [3]:
def _handle_dim_ordering():
    global ROW_AXIS
    global COL_AXIS
    global CHANNEL_AXIS
    if K.image_dim_ordering() == 'tf':
        ROW_AXIS = 1
        COL_AXIS = 2
        CHANNEL_AXIS = 3
    else:
        CHANNEL_AXIS = 1
        ROW_AXIS = 2
        COL_AXIS = 3
        
def _get_block(identifier):
    if isinstance(identifier, six.string_types):
        res = globals().get(identifier)
        if not res:
            raise ValueError('Invalid {}'.format(identifier))
        return res
    return identifier



In [4]:
class ResnetBuilder(object):
    @staticmethod
    def build(input_shape, num_outputs, block_fn, repetitions):
        """Constrói uma ResNet
        
        Argumentos:
            input_shape: Entrada na forma (nb_channels, nb_rows, nb_cols)
            num_outputs: O numero de saidas na camada softmax final
            block_fn: A funcao de block a usar. Pode ser basic_block ou bottleneck
                O artigo original usa basic_block para redes com menos de 50 camadas
            repetitions: Numero de repeticoes em varias unidades de blocos
                Em cada unidade de bloco, o numero de filtros eh dobrado e o tamanho da entrada e cortado pela metade
                
        Retorna:
            O modelo Keras
            
        """
        
        _handle_dim_ordering()
        if len(input_shape) != 3:
            raise Exception("Input shape should be a tuple (nb_channels, nb_rows, nb_cols)")
        
        #Se necessario, permutar a ordem da entrada
        if K.image_dim_ordering() == 'tf':
            input_shape = (input_shape[1], input_shape[2], input_shape[0])
            
        #Carregue a função da string se preciso
        block_fn = _get_block(block_fn)
        
        input = Input(shape=input_shape)
        conv1 = _conv_bn_relu(filters=64, kernel_size=(7, 7), strides=(2, 2))(input)
        pool1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(conv1)
        
        block = pool1
        filters = 64
        for i, r in enumerate(repetitions):
            block = _residual_block(block_fn, filters=filters, repetitions=r, is_first_layer=(i == 0))(block)
            filters *= 2
            
        #Última ativação
        block = _bn_relu(block)
        
        #bloco de classificação
        block_shape = K.int_shape(block)
        pool2 = AveragePooling2D(pool_size=(block_shape[ROW_AXIS], block_shape[COL_AXIS]), strides=(1, 1))(block)
        
        flatten1 = Flatten()(pool2)
        dense = Dense(units=num_outputs, kernel_initializer='he_normal', activation='softmax')(flatten1)
        
        model = Model(inputs=input, outputs=dense)
        
        return model
    
    @staticmethod
    def build_resnet_18(input_shape, num_outputs):
        return ResnetBuilder.build(input_shape, num_outputs, basic_block, [2, 2, 2, 2])
    
    @staticmethod
    def build_resnet_34(input_shape, num_outputs):
        return ResnetBuilder.build(input_shape, num_outputs, basic_block, [3, 4, 6, 3])
    
    @staticmethod
    def build_resnet_50(input_shape, num_outputs):
        return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 4, 6, 3])
    
    @staticmethod
    def build_resnet_101(input_shape, num_outputs):
        return ResnetBuilder.build(input_shape, num_outputs, basic_block, [3, 4, 23, 3])
    
    @staticmethod
    def build_resnet_152(input_shape, num_outputs):
        return ResnetBuilder.build(input_shape, num_outputs, basic_block, [3, 8, 36, 3])

In [5]:
model = ResnetBuilder.build_resnet_34((3, 64, 64), 3)
model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

A perda está muito alta, ajustar o generator nos dá melhores resultados. Pelos forums do kaggle, existe um caso de um inception v3 com o mesmo problema, perda muito alta e o sugerido foi ajustar o generator

In [6]:
datagen = ImageDataGenerator(rotation_range=0.9, zoom_range=10, shear_range=0.3, \
                            data_format='channels_first', horizontal_flip=True, vertical_flip=False)
datagen.fit(train_data)

model.fit_generator(datagen.flow(x_train, y_train, batch_size=15, shuffle=True), steps_per_epoch=len(x_train)/100, \
                    epochs=100, verbose=1, validation_data=(x_val_train, y_val_train))

Epoch 1/100

KeyboardInterrupt: 

In [8]:
test_data = np.load('test.npy')
test_id = np.load('test_id.npy')

pred = model.predict(test_data)
df = pd.DataFrame(pred, columns=['Type_1','Type_2','Type_3'])
df['image_name'] = test_id
df.to_csv('submission.csv', index=False)