# Imagenet 32x32


In [1]:
from __future__ import print_function
import keras
import keras.backend as K
from keras.datasets import cifar10, cifar100
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, load_model
from keras.layers import Activation, Dense, Dropout, Flatten, Input, BatchNormalization
from keras.layers import MaxPooling2D #Conv2D, 
from keras.layers.convolutional import Conv2D
from keras.callbacks import ModelCheckpoint
from keras import Model, regularizers
import os

import numpy as np
os.environ["CUDA_VISIBLE_DEVICES"]="0"

Using TensorFlow backend.


## Specify the neural network

In [2]:
# set parameters for the model
batch_size = 128
num_classes = 1000
epochs = 100
data_augmentation = True
#num_predictions = 20

learning_rate = 0.01 #0.0001
decay = 1e-6 # learning rate decay
lr_drop = 20
l2_reg = 5e-4
momentum = 0.9

In [3]:
IMAGENET_PATH_TRAIN = "/home/nick/Datasets/downsampled_imagenet/32x32/train/box"
IMAGENET_PATH_VAL = "/home/nick/Datasets/downsampled_imagenet/32x32/val/box"
CHECKPOINT_FILENAME = 'imagenet32x32_ffd_dev_chk'

In [4]:
# The data, split between train and test sets:
#(x_train, y_train), (x_test, y_test) = cifar10.load_data()
#print('x_train shape:', x_train.shape)
#print(x_train.shape[0], 'train samples')
#print(x_test.shape[0], 'test samples')
#print('y_train shape:', y_train.shape)
#print(y_train.shape[0], 'train samples')
#print(y_test.shape[0], 'test samples')

In [5]:
# Convert class vectors to binary class matrices.
#y_train_oh = keras.utils.to_categorical(y_train, num_classes)
#y_test_oh = keras.utils.to_categorical(y_test, num_classes)

In [6]:
# confirm shape of images
input_shape_x = (32, 32, 3)

In [7]:
def model_fn_vgg(input_shape_x):
    """ function that builds a keras model and returns both a deep layer together with the output
    
    arguments:
    input_shape_x - the shape of the x
    returns: 
    model - the keras model, with two inputs (x and one-hot encoded y) 
            and two outputs (output softmax and an rdm)
    rdm - direct access to the rdm layer (for analysing rdm layer)
    """
    
    #VGGNET
    # define the main branch of the model
    
    #Conv1 32x32x64
    x = Input(shape=input_shape_x, name='x')
    m = Conv2D(64, (3,3), padding='same', activation='relu', 
               kernel_regularizer=regularizers.l2(l2_reg),
               kernel_initializer='glorot_normal', 
               name='conv1a')(x)
    m = BatchNormalization()(m)
    m = Conv2D(64, (3,3), padding='same', activation='relu', 
               kernel_regularizer=regularizers.l2(l2_reg),
               kernel_initializer='glorot_normal',
               name='conv1b')(m)
    m = BatchNormalization()(m)
    m = MaxPooling2D(pool_size=(2,2), name='pool1')(m)

    #Conv2 16x16x128
    m = Conv2D(128, (3,3), padding='same', activation='relu', 
               kernel_regularizer=regularizers.l2(l2_reg),
               kernel_initializer='glorot_normal',
               name='conv2a')(m)
    m = BatchNormalization()(m)
    m = Conv2D(128, (3,3), padding='same', activation='relu', 
               kernel_regularizer=regularizers.l2(l2_reg),
               kernel_initializer='glorot_normal',
               name='conv2b')(m)
    m = BatchNormalization()(m)
    m = MaxPooling2D(pool_size=(2,2), name='pool2')(m)

    
    #Conv3 8x8x256
    m = Conv2D(256, (3,3), padding='same', activation='relu', 
               kernel_regularizer=regularizers.l2(l2_reg),
               kernel_initializer='glorot_normal',
               name='conv3a')(m)
    m = BatchNormalization()(m)
    m = Conv2D(256, (3,3), padding='same', activation='relu', 
               kernel_regularizer=regularizers.l2(l2_reg),
               kernel_initializer='glorot_normal',
               name='conv3b')(m)
    m = BatchNormalization()(m)
    m = Conv2D(256, (3,3), padding='same', activation='relu', 
               kernel_regularizer=regularizers.l2(l2_reg),
               kernel_initializer='glorot_normal',
               name='conv3c')(m)
    m = BatchNormalization()(m)
#    m = Conv2D(256, (3,3), padding='same', activation='relu', 
#               kernel_regularizer=regularizers.l2(l2_reg),
#               kernel_initializer='glorot_normal',
#               name='conv3d')(m)
#    m = BatchNormalization()(m)
    m = MaxPooling2D(pool_size=(2,2), name='pool3')(m)
    

    #Conv4 4x4x512
    m = Conv2D(512, (3,3), padding='same', activation='relu', 
               kernel_regularizer=regularizers.l2(l2_reg),
               kernel_initializer='glorot_normal',
               name='conv4a')(m)
    m = BatchNormalization()(m)
    m = Conv2D(512, (3,3), padding='same', activation='relu', 
               kernel_regularizer=regularizers.l2(l2_reg),
               kernel_initializer='glorot_normal',
               name='conv4b')(m)
    m = BatchNormalization()(m)
    m = Conv2D(512, (3,3), padding='same', activation='relu', 
               kernel_regularizer=regularizers.l2(l2_reg),
               kernel_initializer='glorot_normal',
               name='conv4c')(m)
    m = BatchNormalization()(m)
#    m = Conv2D(512, (3,3), padding='same', activation='relu', 
#               kernel_regularizer=regularizers.l2(l2_reg),
#               kernel_initializer='glorot_normal',
#               name='conv4d')(m)
#    m = BatchNormalization()(m)
    m = MaxPooling2D(pool_size=(2,2), name='pool4')(m)
    
    
    #Conv5 2x2x512
    m = Conv2D(512, (3,3), padding='same', activation='relu', 
               kernel_regularizer=regularizers.l2(l2_reg),
               kernel_initializer='glorot_normal',
               name='conv5a')(m)
    m = BatchNormalization()(m)
    m = Conv2D(512, (3,3), padding='same', activation='relu', 
               kernel_regularizer=regularizers.l2(l2_reg),
               kernel_initializer='glorot_normal',
               name='conv5b')(m)
    m = BatchNormalization()(m)
    m = Conv2D(512, (3,3), padding='same', activation='relu', 
               kernel_regularizer=regularizers.l2(l2_reg),
               kernel_initializer='glorot_normal',
               name='conv5c')(m)
#    m = BatchNormalization()(m)
#    m = Conv2D(512, (3,3), padding='same', activation='relu', 
#               kernel_regularizer=regularizers.l2(l2_reg),
#               kernel_initializer='glorot_normal',
#               name='conv5d')(m)
#    m = BatchNormalization()(m)
    m = MaxPooling2D(pool_size=(2,2), name='pool5')(m)
    
    
    m = Flatten(name='flatten')(m)
    dense1 = Dense(4096, activation='relu', name='dense1')(m)
    n = Dropout(0.5, name='dropout1')(dense1)
    n = Dense(4096, activation='relu', name='dense2')(dense1)
    n = Dropout(0.5, name='dropout2')(n)
    n = Dense(num_classes, name='dense3')(n)
    out = Activation('softmax', name='softmax')(n)
    
    
    model_softmax = Model(inputs=x, outputs=out)


    
    return model_softmax

In [8]:
train_sets = [(os.path.join(dp, f), dp.split('/')[-1]) for dp, dn, fn in os.walk(IMAGENET_PATH_TRAIN) for f in fn]
x_train, y_cls_train = zip(*train_sets)
y_classes = {y:i for i, y in enumerate(set(y_cls_train))}
y_train = [y_classes[y] for y in y_cls_train]
y_train_oh = keras.utils.to_categorical(np.copy(y_train))

val_sets = [(os.path.join(dp, f), dp.split('/')[-1]) for dp, dn, fn in os.walk(IMAGENET_PATH_VAL) for f in fn]
x_val, y_cls_val = zip(*val_sets)
y_val = [y_classes[y] for y in y_cls_val]
y_val_oh = keras.utils.to_categorical(np.copy(y_val))

## Sample from train set to get featurewise norm (mean, std) statistics

In [9]:
type(np.asarray([0, 1, 2]))

numpy.ndarray

In [10]:
#from skimage.io import imread
#from skimage.transform import resize
import imageio

In [11]:
import numpy as np
from random import sample

# Here, `x_set` is list of path to the images
# and `y_set` are the associated classes.

class ImagenetSequence(keras.utils.Sequence):
    
    def __init__(self, x_set, y_set, batch_size, 
                 featurewise_normalization=None, 
                 featurewise_normalization_sample_size=1000):
        """
        featurewise_normalization --- 
                a tuple (mean, std), "per_channel" or "global" -
                - if a tuple, center and standardize accordingly.
                per_channel calculates mean & std for a sample.
                "per_channel" processes RGB independently, "global" calculates single mean/std
        """
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size
        self.shuffle_order = np.arange(len(self.x))
        np.random.shuffle(self.shuffle_order)
        self.x_shuf = self.x[self.shuffle_order]
        self.y_shuf = self.y[self.shuffle_order]

        
        if type(featurewise_normalization) is tuple:
                self.mean, self.std = featurewise_normalization
                if type(self.mean) is np.ndarray:
                    self.featurewise_normalization = "per_channel"
                else:
                    self.featurewise_normalization = "global"
                
        elif featurewise_normalization is not None:
            
            
            
            
            print (featurewise_normalization)
            self.featurewise_normalization=featurewise_normalization
            
            print (self.featurewise_normalization)
            self.featurewise_normalization_sample_size=featurewise_normalization_sample_size

            self.mean, self.std = self.compute_stats()
        else:
            self.featurewise_normalization=None
            self.mean = 0.
            self.std = 1.

    def compute_stats(self):
        """
        train_x_files --- a list of files, one per training example
        sample_size --- number of files to sample from train_x_files to calculate stats
        std_per_channel --- standardize and center independently for each RGB
        """
    
        if self.featurewise_normalization == "per_channel":
            ax = (0,1,2) # standardize & center for each channel independently
        
        elif self.featurewise_normalization == "global": 
            ax = (0,1,2,3) # overall standardize & center
            
        else:
            raise Exception("featurewise normalization should be 'per_channel' or 'global'")
            
            
        sample_files = sample(list(self.x), self.featurewise_normalization_sample_size)
        sample_matrix = np.asarray([imageio.imread(im) * 1./255 for im in sample_files], dtype='float32')
        mean = np.mean(sample_matrix, axis=ax)
        std = np.std(sample_matrix, axis=ax)
        return mean, std
    
            
    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.x_shuf[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y_shuf[idx * self.batch_size:(idx + 1) * self.batch_size]
        
        def read_image(self, file_name):
            image = imageio.imread(file_name)
            image = image * 1. / 255
            
            if self.featurewise_normalization is not None:
                image = (image - self.mean) / self.std
            
            
            return image
        
        return np.array([read_image(self, file_name) * 1. / 255 for file_name in batch_x]), np.array(batch_y, dtype='int')
        
    def on_epoch_end(self):
        np.random.shuffle(self.shuffle_order)
        


In [12]:
train_seq = ImagenetSequence(np.asarray(x_train),
                             np.asarray(y_train_oh), 
                             batch_size=128,
                             featurewise_normalization="global")

val_seq = ImagenetSequence(np.asarray(x_val),
                           np.asarray(y_val_oh), 
                           batch_size=128,
                           featurewise_normalization=(train_seq.mean, train_seq.std))

global
global


In [13]:
val_seq.__getitem__(0)

(array([[[[-1.38288987e-03,  4.22551686e-04, -1.59848816e-04],
          [-1.26640977e-03,  5.97271837e-04, -5.67529168e-04],
          [-2.37297073e-03, -7.42249319e-04, -2.66417098e-03],
          ...,
          [-9.75209520e-04, -1.01608766e-04, -1.32464982e-03],
          [-1.38288987e-03,  3.06071586e-04, -1.03344957e-03],
          [-2.25649063e-03, -1.59848816e-04, -1.96529037e-03]],
 
         [[-8.58729419e-04,  9.46712138e-04,  1.48713344e-05],
          [-9.16969470e-04,  1.06319224e-03, -1.01608766e-04],
          [ 1.89591485e-04,  1.99503304e-03, -1.59848816e-04],
          ...,
          [ 2.05327309e-03,  1.29615244e-03,  4.80791736e-04],
          [-1.84881027e-03, -2.14001053e-03, -2.72241103e-03],
          [-1.73233017e-03, -2.18088867e-04, -1.73233017e-03]],
 
         [[-9.75209520e-04,  9.46712138e-04, -4.51049068e-04],
          [-1.67409012e-03,  3.64311636e-04, -1.09168962e-03],
          [ 7.71991988e-04,  2.34447334e-03,  2.47831535e-04],
          ...,
    

## let's cheat to get the featurewise normalization parameters


In [14]:
# define LR scheduler callback to reduce lr every 25 epochs

def lr_scheduler(epoch):
    lr = learning_rate * (0.5 ** (epoch // lr_drop))
    print ("using learning rate: ", lr)
    return lr
reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler)

checkpoint = ModelCheckpoint(CHECKPOINT_FILENAME, monitor='val_acc', verbose=1, save_best_only=True, mode='max')

# Cross-entropy Loss

In [15]:
#Simple model

#model_softmax = model_fn(input_shape_x=x_train.shape[1:])

K.clear_session()

#VGGNET Architecture
model  = model_fn_vgg(input_shape_x=input_shape_x)

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 32, 32, 3)         0         
_________________________________________________________________
conv1a (Conv2D)              (None, 32, 32, 64)        1792      
_________________________________________________________________
batch_normalization_1 (Batch (None, 32, 32, 64)        256       
_________________________________________________________________
conv1b (Conv2D)              (None, 32, 32, 64)        36928     
_________________________________________________________________
batch_normalization_2 (Batch (None, 32, 32, 64)        256       
_________________________________________________________________
pool1 (MaxPooling2D)         (None, 16, 16, 64)        0         
_________________________________________________________________
conv2a (Conv2D)              (None, 16, 16, 128)       73856     
__________

In [16]:
opt = keras.optimizers.SGD(lr=learning_rate, 
                           momentum=momentum,
                           decay=decay,
                           nesterov=True)


model.compile(optimizer=opt, loss="categorical_crossentropy", 
              metrics=["acc", "top_k_categorical_accuracy"])

# don't use the fit generator for now

step_size_train = len(train_sets) // batch_size
step_size_val = len(val_sets) // batch_size
print ("train step size:", step_size_train)
print ("train step val:", step_size_val)

hist = model.fit_generator(train_seq,
                        steps_per_epoch=step_size_train,
                        epochs=epochs,
                        validation_data=val_seq,
                        validation_steps=step_size_val,
                        callbacks=[reduce_lr,
                                   checkpoint],
                        max_queue_size=16,
                        workers=8,
                        use_multiprocessing=True,
                        verbose=1,
                        )

train step size: 10009
train step val: 390
Epoch 1/100
using learning rate:  0.01
   65/10009 [..............................] - ETA: 26:06 - loss: 8.8887 - acc: 0.0012 - top_k_categorical_accuracy: 0.0058

ValueError: Decompressed Data Too Large

In [None]:
hist.params.items()