In [4]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [5]:
from __future__ import division
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization, Input, Lambda, Reshape
from keras import backend as K
from keras.optimizers import SGD, Nadam
from keras import layers

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
%matplotlib inline

In [7]:
from keras.backend.tensorflow_backend import set_session
from utils import limited_gpu_memory_session
set_session(limited_gpu_memory_session())

In [16]:
DATA_DIR = os.path.abspath('./')
CHECKPOINTED_WEIGHTS = os.path.join(DATA_DIR, 'checkpointed_weights.hdf5')
INIT_WEIGHTS = os.path.join(DATA_DIR, 'init_weights_base.hdf5')
EXPERIENCE_BUFFER_FILE = os.path.join(DATA_DIR, 'experience_buffer.p')
MODEL_IMAGE = os.path.join(DATA_DIR, 'siamese_vgg16.png')

In [49]:
from keras.layers.advanced_activations import LeakyReLU
from keras.regularizers import l2, l1

def dense_relu_bn_dropout(x, size, dropout, alpha = 0.1, reg = 0):
    x = Dense(size, kernel_regularizer = l2(reg))(x)
    x = Activation('tanh')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout)(x)
    return x

def create_network(reg, dropout, alpha = 0.1):
    inputs = Input(shape=(INPUT_SHAPE,))
    x = dense_relu_bn_dropout(inputs, 16 , dropout, reg)
    x = dense_relu_bn_dropout(x, 8, dropout, reg)
    x = Dense(1)(x)
    base_network = Model(inputs=inputs, outputs = x)
    print(base_network.summary())
    return base_network

In [50]:
from keras import layers
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization, merge, Input, Lambda, Reshape

INPUT_SHAPE = 8

base_network = create_network(reg = 0.5, dropout = 0.5)
input_a = Input(shape=(INPUT_SHAPE,))
processed_a = base_network(input_a)
input_b = Input(shape=(INPUT_SHAPE,))
processed_b = base_network(input_b)
distance = layers.Subtract()([processed_a, processed_b])
out = Activation('sigmoid')(distance)
siamese_net = Model([input_a, input_b], out)
    
siamese_net.save_weights(INIT_WEIGHTS)
print(siamese_net.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 8)                 0         
_________________________________________________________________
dense_4 (Dense)              (None, 16)                144       
_________________________________________________________________
activation_4 (Activation)    (None, 16)                0         
_________________________________________________________________
batch_normalization_3 (Batch (None, 16)                64        
_________________________________________________________________
dropout_3 (Dropout)          (None, 16)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 8)                 136       
_________________________________________________________________
activation_5 (Activation)    (None, 8)                 0         
__________

In [51]:
import cPickle as pickle
MOVES = pickle.load(open("../moves_dict.p", "rb"))

In [38]:
for key, item in MOVES.iteritems():
    MOVES[key] = np.array(item)

In [52]:
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5,
              patience=5, verbose = 1, min_lr=1e-8)
early_stopping = EarlyStopping(monitor='val_acc',
                              min_delta=1e-4,
                              patience=25,
                              verbose=0, mode='auto')
checkpointer = ModelCheckpoint(filepath=CHECKPOINTED_WEIGHTS, verbose=1, save_best_only=True, monitor='val_acc')

In [56]:
nadam = Nadam(lr=1e-3)
siamese_net.compile(optimizer=nadam, loss='binary_crossentropy', metrics=['accuracy'])
siamese_net.load_weights(INIT_WEIGHTS)

In [54]:
from utils import DataGenerator

BATCH_SIZE = 512
load_from_file = os.path.exists(EXPERIENCE_BUFFER_FILE)
save_to_file = not load_from_file
datagen = DataGenerator(MOVES, batch_sz = BATCH_SIZE, load_from_file = load_from_file, 
                 save_to_file = save_to_file, file = EXPERIENCE_BUFFER_FILE)

Experience buffer loaded from /home/ubuntu/quackle/rishabh_code/neural_networks/experience_buffer.p
Train: 2272842 Val: 94702


In [57]:
NUM_TRAIN_PAIRS, NUM_VAL_PAIRS = datagen.get_num_pairs()
STEPS_PER_EPOCH = NUM_TRAIN_PAIRS//BATCH_SIZE
VALIDATION_STEPS = NUM_VAL_PAIRS//BATCH_SIZE
history = siamese_net.fit_generator(
        datagen.next_train(),
        steps_per_epoch=STEPS_PER_EPOCH,
        epochs=500,
        validation_data=datagen.next_val(),
        validation_steps=VALIDATION_STEPS,
        callbacks = [reduce_lr, checkpointer, early_stopping])

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 00012: reducing learning rate to 0.000500000023749.
Epoch 00012: val_acc did not improve
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 00017: reducing learning rate to 0.000250000011874.
Epoch 00017: val_acc did not improve
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 00022: reducing learning rate to 0.000125000005937.
Epoch 00022: val_acc did not improve
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 00027: reducing learning rate to 6.25000029686e-05.
Epoch 00027: val_acc did not improve
Epoch 29/500
Epoch 30/500
Epoch 31/500


Epoch 32/500
Epoch 33/500
Epoch 00032: reducing learning rate to 3.12500014843e-05.
Epoch 00032: val_acc did not improve
