In [0]:
!kill -9 -1

# **Access Colaboratory**

In [0]:
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse

In [2]:
from google.colab import auth
auth.authenticate_user()

from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()

import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

Please, open the following URL in a web browser: https://accounts.google.com/o/oauth2/auth?client_id=32555940559.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&response_type=code&access_type=offline&approval_prompt=force
··········
Please, open the following URL in a web browser: https://accounts.google.com/o/oauth2/auth?client_id=32555940559.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&response_type=code&access_type=offline&approval_prompt=force
Please enter the verification code: Access token retrieved correctly.


In [0]:
!mkdir -p drive
!google-drive-ocamlfuse drive

In [4]:
!ls
!ls drive

datalab  drive
Colab Notebooks  Colaboratory  iMaterialist  Project Design.odt


# **Libraries**

In [5]:
import h5py, json, time, os
import cv2, numpy as np

from keras.models import Sequential
from keras.layers.core import Flatten, Dense, Dropout
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD

from matplotlib import pyplot as plt

from keras.utils import np_utils

%matplotlib inline

from keras import backend as K

K.set_image_dim_ordering('tf')

Using TensorFlow backend.


## Load Classes

In [0]:
os.chdir('/content/drive')

In [7]:
test_path="iMaterialist/validation_dataset/"
test_dataset_name = 'validation_last'
with h5py.File(test_path+'{}_labels.h5'.format(test_dataset_name), 'r') as hf:
    test_set_y_orig = np.array(hf['{}_labels'.format(test_dataset_name)][:])

classes = []
for i in range (1,len(test_set_y_orig)):
    if test_set_y_orig[i] not in classes:
        classes.append(test_set_y_orig[i])

classes = np.array(classes) # the list of classes

print(classes.shape)

test_set_y_orig = None

(128,)


## Time

In [0]:
def elapsed(start):
    """
    Returns elapsed time in hh:mm:ss format from start time in unix format
    """
    elapsed = time.time()-start
    hours, rem = divmod(elapsed, 3600)
    minutes, seconds = divmod(rem, 60)
    return("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

# **Load DataSets**

In [0]:
def load_trainDS(train_path, train_dataset_name, batch_size=1000):
    #train_path="iMaterialist/train_dataset/"
    #train_dataset_name = 'train_1'

    # Train dataset
    with h5py.File(train_path+'{}_images.h5'.format(train_dataset_name), 'r') as hf:
        train_set_x_orig = np.array(hf['{}_images'.format(train_dataset_name)][batch_size-1000:batch_size])   
    with h5py.File(train_path+'{}_labels.h5'.format(train_dataset_name), 'r') as hf:
        train_set_y_orig = np.array(hf['{}_labels'.format(train_dataset_name)][batch_size-1000:batch_size])

    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig

In [0]:
def load_testDS(test_path, test_dataset_name, batch_size=1000):
    #test_path="iMaterialist/validation_dataset/"
    #test_dataset_name = 'validation_last'

    # Test dataset (validation)
    with h5py.File(test_path+'{}_images.h5'.format(test_dataset_name), 'r') as hf:
        test_set_x_orig = np.array(hf['{}_images'.format(test_dataset_name)][batch_size-1000:batch_size])   
    with h5py.File(test_path+'{}_labels.h5'.format(test_dataset_name), 'r') as hf:
        test_set_y_orig = np.array(hf['{}_labels'.format(test_dataset_name)][batch_size-1000:batch_size])

    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return test_set_x_orig, test_set_y_orig

In [12]:
!free -m

              total        used        free      shared  buff/cache   available
Mem:          13029         479        4718         220        7830       12081
Swap:             0           0           0


In [0]:
#https://gist.github.com/baraldilorenzo/07d7802847aaad0a35d3
def VGG_16(weights_path=None):
  
  model = Sequential()
  model.add(ZeroPadding2D((1,1),input_shape=(224,224,3)))
  model.add(Convolution2D(64, (3, 3), activation='relu', name='conv1_1'))
  model.add(ZeroPadding2D((1,1)))
  model.add(Convolution2D(64, (3, 3), activation='relu', name='conv1_2'))
  model.add(MaxPooling2D((2,2), strides=(2,2)))

  model.add(ZeroPadding2D((1,1)))
  model.add(Convolution2D(128, (3, 3), activation='relu', name='conv2_1'))
  model.add(ZeroPadding2D((1,1)))
  model.add(Convolution2D(128, (3, 3), activation='relu', name='conv2_2'))
  model.add(MaxPooling2D((2,2), strides=(2,2)))

  model.add(ZeroPadding2D((1,1)))
  model.add(Convolution2D(256, (3, 3), activation='relu', name='conv3_1'))
  model.add(ZeroPadding2D((1,1)))
  model.add(Convolution2D(256, (3, 3), activation='relu', name='conv3_2'))
  model.add(ZeroPadding2D((1,1)))
  model.add(Convolution2D(256, (3, 3), activation='relu', name='conv3_3'))
  model.add(MaxPooling2D((2,2), strides=(2,2)))

  model.add(ZeroPadding2D((1,1)))
  model.add(Convolution2D(512, (3, 3), activation='relu', name='conv4_1'))
  model.add(ZeroPadding2D((1,1)))
  model.add(Convolution2D(512, (3, 3), activation='relu', name='conv4_2'))
  model.add(ZeroPadding2D((1,1)))
  model.add(Convolution2D(512, (3, 3), activation='relu', name='conv4_3'))
  model.add(MaxPooling2D((2,2), strides=(2,2)))

  model.add(ZeroPadding2D((1,1)))
  model.add(Convolution2D(512, (3, 3), activation='relu', name='conv5_1'))
  model.add(ZeroPadding2D((1,1)))
  model.add(Convolution2D(512, (3, 3), activation='relu', name='conv5_2'))
  model.add(ZeroPadding2D((1,1)))
  model.add(Convolution2D(512, (3, 3), activation='relu', name='conv5_3'))
  model.add(MaxPooling2D((2,2), strides=(2,2)))

  model.add(Flatten())
  model.add(Dense(4096, activation='relu'))
  model.add(Dropout(0.5))
  model.add(Dense(4096, activation='relu'))
  model.add(Dropout(0.5))
  model.add(Dense(129, activation='softmax', name='predict'))

  if weights_path:
    model.load_weights(weights_path)
  
  return model

In [19]:
#weights_path = 'Colaboratory/iMaterialist/vgg16_weights.h5'
model = VGG_16()

sgd = SGD(lr=0.5, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss='categorical_crossentropy')

#model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
zero_padding2d_27 (ZeroPaddi (None, 302, 302, 3)       0         
_________________________________________________________________
conv1_1 (Conv2D)             (None, 300, 300, 64)      1792      
_________________________________________________________________
zero_padding2d_28 (ZeroPaddi (None, 302, 302, 64)      0         
_________________________________________________________________
conv1_2 (Conv2D)             (None, 300, 300, 64)      36928     
_________________________________________________________________
max_pooling2d_11 (MaxPooling (None, 150, 150, 64)      0         
_________________________________________________________________
zero_padding2d_29 (ZeroPaddi (None, 152, 152, 64)      0         
_________________________________________________________________
conv2_1 (Conv2D)             (None, 150, 150, 128)     73856     
__________

In [0]:
train_path="iMaterialist/train_dataset/"
test_path="iMaterialist/validation_dataset/"
#train_dataset_name = 'train_1'
test_dataset_name = 'validation_last'

In [0]:
start = time.time()
load_batch_size = 1000

for dataset_number in range (1,10):
  train_dataset_name = "train_{}".format(dataset_number)
  for batch in range(load_batch_size,5001,load_batch_size):
    
    x_train, y_train = load_trainDS(train_path, train_dataset_name)
    #x_test, y_test  = load_testDS(test_path, test_dataset_name)

    x_train = x_train/255
    #x_test = x_test/255

    y_train = convert_to_one_hot(y_train, 129).T
    #y_test = convert_to_one_hot(y_test, 129).T
    
    print('\nTraining dataset {}'.format(dataset_number))
    print("\n*****Training batch# {}".format(batch)+"*****\n")
    model.fit(x_train, y_train, epochs = 20, batch_size = 10)
    print('\n-------------------------- Elapsed time: {} --------------------------'.format(elapsed(start)))
    model.save('iMaterlialist-Keras-VGGNet-{}.h5'.format(dataset_number))
    print('\nCheckpoint saved. Elapsed time: {}'.format(elapsed(start)))