In [1]:
from keras.datasets import cifar10

Using TensorFlow backend.


In [2]:
import tensorflow as tf
import tensorflow.contrib.eager as tfe

In [3]:
import numpy as np
import os
import sys
import time

In [4]:
tf.enable_eager_execution()

In [5]:
tf.set_random_seed(42)

In [17]:
IMG_SHAPE = [256,256,3]
NUM_CLASSES = 10
PERC_VALID = 0.7
NUM_TRAIN = 20000
NUM_TEST = 2000

# training hyperparameters
LEARNING_RATE = 1e-4
MOMENTUM = 0.9
RMSPROP_DECAY = 0.9     
RMSPROP_EPSILON = 1.0              
BATCH_SIZE = 128
EPOCHS = 5
DISPLAY_STEP = 5 #10
VALIDATION_STEP = 100 #1000
SAVE_STEP = 50 #100
CKPT_PATH = './ckpt_concat_cp'
CKPT_PREFIX = os.path.join(CKPT_PATH, 'ckpt')
SUMMARY_PATH = './summary_concat_cp'

# net architecture hyperparamaters
LAMBDA = 5e-4 #for weight decay
DROPOUT = 0.5

# test hyper parameters
K_PATCHES = 5
TOP_K = 5

In [6]:
(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()

In [7]:
(X_train, Y_train), (X_test, Y_test) = (X_train[:NUM_TRAIN], Y_train[:NUM_TRAIN]), (X_test[:NUM_TEST], Y_test[:NUM_TEST])

In [12]:
from scipy.ndimage.interpolation import zoom

In [13]:
def scale_dataset(x):
    new_x = []
    for i in range(len(x)):
        new_x.append(zoom(x[i], (8.0,8.0,1.0)))
    return new_x

In [14]:
x_train = scale_dataset(X_train)

In [11]:
x_test = scale_dataset(X_test)

In [15]:
x_train = tf.convert_to_tensor(x_train, np.float32)
# x_test = tf.convert_to_tensor(x_test, np.float32)
y_train = tf.convert_to_tensor(Y_train, np.float32)
# y_test = tf.convert_to_tensor(Y_test, np.float32)

In [16]:
total = x_train.numpy().shape[0]
train_len = int(total*(1-PERC_VALID))
val_len = total-train_len
train_len, val_len

(350, 150)

In [18]:
dataset = tf.data.Dataset.from_tensor_slices((x_train[:train_len], y_train[:train_len]))
dataset = dataset.shuffle(100).batch(BATCH_SIZE)
data_it = dataset.make_one_shot_iterator()

In [None]:
testset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
testset = testset.shuffle(10).batch(1)
test_it = testset.make_one_shot_iterator()

In [19]:
valset = tf.data.Dataset.from_tensor_slices((x_train[train_len:], y_train[train_len:]))
valset = valset.shuffle(100).batch(BATCH_SIZE)
val_it = valset.make_one_shot_iterator()

In [38]:
class AlexNet(tfe.Network):

    def __init__(self, training):
        super(AlexNet, self).__init__()
        self.training = training

        # convolutional layers

        conv_init = tf.contrib.layers.xavier_initializer_conv2d()

        self.conv1 = self.track_layer(tf.layers.Conv2D(96, 11, 4, 'SAME', 
                                                        activation=tf.nn.relu, 
                                                        kernel_initializer=conv_init))
        self.pool1 = self.track_layer(tf.layers.MaxPooling2D(3, 2, 'VALID'))

        self.conv2 = self.track_layer(tf.layers.Conv2D(256, 5, 1, 'SAME', 
                                                        activation=tf.nn.relu,
                                                        kernel_initializer=conv_init))
        self.pool2 = self.track_layer(tf.layers.MaxPooling2D(3, 2, 'VALID'))

        self.conv3 = self.track_layer(tf.layers.Conv2D(384, 3, 1, 'SAME', 
                                                        activation=tf.nn.relu,
                                                        kernel_initializer=conv_init))

        self.conv4 = self.track_layer(tf.layers.Conv2D(384, 3, 1, 'SAME', 
                                                        activation=tf.nn.relu,
                                                        kernel_initializer=conv_init))

        self.conv5 = self.track_layer(tf.layers.Conv2D(256, 3, 1, 'SAME', 
                                                        activation=tf.nn.relu,
                                                        kernel_initializer=conv_init))
        self.pool5 = self.track_layer(tf.layers.MaxPooling2D(3, 2, 'VALID'))

        # fully connected layers

        
        fc_init = tf.contrib.layers.xavier_initializer()

        self.fc1 = self.track_layer(tf.layers.Dense(512, 
                                                        activation=tf.nn.relu,
                                                        kernel_initializer=fc_init))
        self.drop1 = self.track_layer(tf.layers.Dropout(DROPOUT))
        
        self.perceptron_u = self.track_layer(tf.layers.Dense(1, 
                                                        activation=tf.nn.relu,
                                                        kernel_initializer=fc_init))

        self.att1 = self.track_layer(tf.layers.Dense(512, 
                                                        activation=tf.nn.relu,
                                                        kernel_initializer=fc_init))
        self.att2 = self.track_layer(tf.layers.Dense(512, 
                                                        activation=tf.nn.relu,
                                                        kernel_initializer=fc_init))
        
        self.out = self.track_layer(tf.layers.Dense(NUM_CLASSES,
                                                        kernel_initializer=fc_init))


    def call(self, x):
        """ Function that executes the model """
        output = self.conv1(x)
        print(f'Conv1: {output.numpy().shape}')
        output = tf.nn.lrn(output, depth_radius=2, bias=1.0, alpha=2e-05, beta=0.75)
        output = self.pool1(output)
        print(f'Pool1: {output.numpy().shape}')

        output = self.conv2(output)
        print(f'Conv2: {output.numpy().shape}')
        output = tf.nn.lrn(output, depth_radius=2, bias=1.0, alpha=2e-05, beta=0.75)
        output = self.pool2(output)
        print(f'Pool2: {output.numpy().shape}')

        output = self.conv3(output)
        print(f'Conv3: {output.numpy().shape}')

        output = self.conv4(output)
        print(f'Conv4: {output.numpy().shape}')
        output_conv4 = output 

        output = self.conv5(output)
        print(f'Conv5: {output.numpy().shape}')
        output_conv5 = output 
        output = self.pool5(output)
        print(f'Pool5: {output.numpy().shape}')

        output = tf.layers.flatten(output)
        print(f'Flatten1: {output.numpy().shape}')

        output = self.fc1(output)
        print(f'FC1: {output.numpy().shape}')
        if self.training:
            output = self.drop1(output)
        
        g = tf.layers.flatten(output)
        g = tf.reshape(g, [g.numpy().shape[0],1,g.numpy().shape[1]])
        print(f'G: {g.numpy().shape}')
        
        ##### Attention 1 - conv 4 #####
        output_att1 = self.att1(output_conv4)
        # reshaping L to match G's dimension to compute compatibility scores
        output_att1_shapes = output_att1.numpy().shape
        output_att1 = tf.reshape(output_att1,  [output_att1_shapes[0], output_att1_shapes[1]*output_att1_shapes[2], output_att1_shapes[3],])
        print(f'Att1: {output_att1.numpy().shape}')
        # compatibility score
#         compat_att1 = tf.matmul(g,output_att1)
        compat_att1 = self.perceptron_u(tf.add(g,output_att1))
        print(f'compatability scores 1: {compat_att1.numpy().shape}')
        # normalising the compatibiltiy scores by softmax
        att_att1 = tf.nn.softmax(compat_att1)
        att_att1 = tf.reshape(att_att1, [att_att1.numpy().shape[0], att_att1.numpy().shape[2], att_att1.numpy().shape[1]])
        print(f'A1: {att_att1.numpy().shape}')
        # reshape L1 for computing g1
        output_conv4_shapes = output_conv4.numpy().shape
        output_conv4 = tf.reshape(output_conv4, [output_conv4_shapes[0], output_conv4_shapes[1]*output_conv4_shapes[2], output_conv4_shapes[3]])
        print(f'L1: {output_conv4.numpy().shape}')
        # computing g1
        g1 = tf.matmul(att_att1, output_conv4)
        g1 = tf.layers.flatten(g1)
        print(f'--- G1: {g1.numpy().shape} --- \n')
        
        ##### Attention 2 -  conv 5 #####
        output_att2 = self.att2(output_conv5)
        print(f'Att2: {output_att2.numpy().shape}')
        # reshaping L to match G's dimension to compute compatibility scores
        output_att2_shapes = output_att2.numpy().shape
        output_att2 = tf.reshape(output_att2, [output_att2_shapes[0],output_att2_shapes[1]*output_att2_shapes[2], output_att2_shapes[3]])
        print(f'Att2: {output_att2.numpy().shape}')
        # compatibility scores
        #compat_att2 = tf.matmul(g,output_att2)
        compat_att2 = self.perceptron_u(tf.add(g,output_att2))
        print(f'compatability scores 2: {compat_att2.numpy().shape}')
        # normalising the compatibiltiy scores by softmax
        att_att2 = tf.nn.softmax(compat_att2)
        att_att2 = tf.reshape(att_att1, [att_att2.numpy().shape[0], att_att2.numpy().shape[2], att_att2.numpy().shape[1]])
        print(f'A2: {att_att2.numpy().shape}')
        # reshape L2 for computing g2
        output_conv5_shapes = output_conv5.numpy().shape
        output_conv5 = tf.reshape(output_conv5, [output_conv5_shapes[0], output_conv5_shapes[1]*output_conv5_shapes[2], output_conv5_shapes[3]])
        print(f'L2: {output_conv5.numpy().shape}')
        # computing g1
        g2 = tf.matmul(att_att2, output_conv5)
        g2 = tf.layers.flatten(g2)
        print(f'--- G2: {g2.numpy().shape} ---\n')
        # Computing final g' by concatenation
        g_ = tf.concat([g1,g2], axis=1)
        print(f'G dash: {g_.numpy().shape}')      
        output = self.out(g_)
        print(f'Logits: {output.numpy().shape}')  

        return output

In [39]:
model = AlexNet(True)


Please inherit from `tf.keras.Model`, and see its documentation for details. `tf.keras.Model` should be a drop-in replacement for `tfe.Network` in most cases, but note that `track_layer` is no longer necessary or supported. Instead, `Layer` instances are tracked on attribute assignment (see the section of `tf.keras.Model`'s documentation on subclassing). Since the output of `track_layer` is often assigned to an attribute anyway, most code can be ported by simply removing the `track_layer` calls.

`tf.keras.Model` works with all TensorFlow `Layer` instances, including those from `tf.layers`, but switching to the `tf.keras.layers` versions along with the migration to `tf.keras.Model` is recommended, since it will preserve variable names. Feel free to import it with an alias to avoid excess typing :).


In [40]:
logits = model(x_train[:5])

Conv1: (5, 64, 64, 96)
Pool1: (5, 31, 31, 96)
Conv2: (5, 31, 31, 256)
Pool2: (5, 15, 15, 256)
Conv3: (5, 15, 15, 384)
Conv4: (5, 15, 15, 384)
Conv5: (5, 15, 15, 256)
Pool5: (5, 7, 7, 256)
Flatten1: (5, 12544)
FC1: (5, 512)
G: (5, 1, 512)
Att1: (5, 225, 512)
compatability scores 1: (5, 225, 1)
A1: (5, 1, 225)
L1: (5, 225, 384)
--- G1: (5, 384) --- 

Att2: (5, 15, 15, 512)
Att2: (5, 225, 512)
compatability scores 2: (5, 225, 1)
A2: (5, 1, 225)
L2: (5, 225, 256)
--- G2: (5, 256) ---

G dash: (5, 640)
Logits: (5, 10)


In [60]:
writer = tf.contrib.summary.create_summary_file_writer(SUMMARY_PATH)



In [61]:
optimizer = tf.train.MomentumOptimizer(learning_rate=LEARNING_RATE, momentum=MOMENTUM)
# optimizer = tf.train.RMSPropOptimizer(learning_rate=LEARNING_RATE, RMSPROP_DECAY,momentum=MOMENTUM,epsilon=RMSPROP_EPSILON)
# optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)
# opt = tf.train.GradientDescent(learning_rate=LEARNING_RATE)

In [102]:
def loss(model, mode, x, y):
    logits = model(x)
    y = list(y.numpy().reshape(y.numpy().shape[0],))
    y = tf.one_hot(y, NUM_CLASSES)
    loss_value = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=logits)
#     loss_value = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = y), name="cross_entropy_loss")
    weight_decay = tf.reduce_sum(LAMBDA * tf.stack([tf.nn.l2_loss(v) for v in model.variables]))

    total_loss = loss_value + weight_decay

    tf.contrib.summary.scalar(mode, '/loss', total_loss)

    return total_loss

In [107]:
def accuracy(model, mode, x, y):
#     pred = tf.nn.softmax(model(x))

#     accuracy_value = tf.reduce_sum(
#                 tf.cast(
#                     tf.equal(
#                         tf.argmax(pred, axis=1, output_type=tf.int64),
#                         tf.argmax(y, axis=1, output_type=tf.int64)
#                     ),
#                     dtype=tf.float32
#                 ) 
#             ) / float(pred.shape[0].value)
    pred = tf.cast(tf.argmax(tf.nn.softmax(model(x)),axis=1), dtype=tf.float32)
    equality = tf.equal(pred, y)
    accuracy_value = tf.reduce_mean(tf.cast(equality, tf.float32))

    tf.contrib.summary.scalar(mode, '/accuracy', accuracy_value)

    return accuracy_value

In [111]:
def format_time(time):
    """ It formats a datetime to print it
        Args:
            time: datetime
        Returns:
            a formatted string representing time
    """
    m, s = divmod(time, 60)
    h, m = divmod(m, 60)
    d, h = divmod(h, 24)
    return ('{:02d}d {:02d}h {:02d}m {:02d}s').format(int(d), int(h), int(m), int(s))

In [114]:
all_variables = (model.variables + optimizer.variables() + [global_step])

In [112]:
start_time = time.time()
step_time = 0.0
global_step = 0
# with writer.as_default():
#     with tf.contrib.summary.record_summaries_every_n_global_steps(DISPLAY_STEP):

for epoch in range(EPOCHS):
    for (batch_i,datum) in enumerate(data_it):
        global_step = tf.train.get_global_step()
#                 global_step = tf.train.get_or_create_global_step()
        step = global_step.numpy() + 1
#         global_step += 1
        print(f'step: {step}')
        step_start_time = int(round(time.time() * 1000))
        optimizer.minimize(lambda: loss(model, 'train', datum[0], datum[1]), global_step=global_step)

        step_end_time = int(round(time.time() * 1000))
        step_time = step_time + step_end_time - step_start_time

        if (step % DISPLAY_STEP) == 0:
            l = loss(model, 'train', datum[0], datum[1])
            a = accuracy(model, 'train', datum[0], datum[1]).numpy()
            print ('Epoch: {:03d} Step/Batch: {:03d} Step mean time: {:04d}ms \nLoss: {:.7f} Training accuracy: {:.4f}'.format(epoch, step, int(step_time / step), l, a))

        if (step % VALIDATION_STEP) == 0:
            val_images, val_labels = val_it.get_next()
            l = loss(model, 'val', val_images, val_labels)
            a = accuracy(model, 'val', val_images, val_labels).numpy()
            int_time = time.time() - start_time
            print ('Elapsed time: {} --- Loss: {:.7f} Validation accuracy: {:.4f}'.format(format_time(int_time), l, a))

        if (step % SAVE_STEP) == 0:
            tfe.Saver(all_variables).save(os.path.join(CKPT_PATH, 'net.ckpt'), global_step=global_step)
            print('Variables saved')

step: 19
Epoch: 000 Step/Batch: 019 Step mean time: 1039ms 
Loss: nan Training accuracy: 1.0000
step: 20
Epoch: 000 Step/Batch: 020 Step mean time: 1989ms 
Loss: nan Training accuracy: 1.0000
Elapsed time: 00d 00h 01m 17s --- Loss: nan Validation accuracy: 1.0000


NameError: name 'all_variables' is not defined