In [2]:
import os
import matplotlib
import numpy as np
import pandas as pd
from scipy import ndimage
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import tensorflow as tf
import zipfile
import requests, StringIO
from sklearn import preprocessing


BATCH_SIZE = 20
NUM_CLASSES = 200
NUM_IMAGES_PER_CLASS = 500
NUM_IMAGES = NUM_CLASSES * NUM_IMAGES_PER_CLASS
TRAINING_IMAGES_DIR = '/datasets/tmp/cg181fdn/tiny-imagenet-200/train/'
TRAIN_SIZE = NUM_IMAGES

NUM_VAL_IMAGES = 10000
VAL_IMAGES_DIR = '/datasets/tmp/cg181fdn/tiny-imagenet-200/val/'

IMAGE_SIZE = 64
NUM_CHANNELS = 3
IMAGE_ARR_SIZE = IMAGE_SIZE * IMAGE_SIZE * NUM_CHANNELS
IMAGES_URL = 'http://cs231n.stanford.edu/tiny-imagenet-200.zip'

def download_images(url):
    if (os.path.isdir(TRAINING_IMAGES_DIR)):
        print ('Images already downloaded...')
        return
    r = requests.get(url, stream=True)
    print ('Downloading ' + url )
    zip_ref = zipfile.ZipFile(StringIO.StringIO(r.content))
    zip_ref.extractall('/datasets/tmp/cg181fdn/')
    zip_ref.close()

def load_training_images(image_dir, batch_size=500):

    image_index = 0
    
    images = np.ndarray(shape=(NUM_IMAGES, IMAGE_ARR_SIZE))
    names = []
    labels = []                       
    
    # Loop through all the types directories
    for type in os.listdir(image_dir):
        if os.path.isdir(image_dir + type + '/images/'):
            type_images = os.listdir(image_dir + type + '/images/')
            # Loop through all the images of a type directory
            batch_index = 0;
            #print ("Loading Class ", type)
            for image in type_images:
                image_file = os.path.join(image_dir, type + '/images/', image)

                # reading the images as they are; no normalization, no color editing
                image_data = mpimg.imread(image_file) 
                #print ('Loaded Image', image_file, image_data.shape)
                if (image_data.shape == (IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)):
                    images[image_index, :] = image_data.flatten()

                    labels.append(type)
                    names.append(image)
                    
                    image_index += 1
                    batch_index += 1
                if (batch_index >= batch_size):
                    break;
                    
    return (images, np.asarray(labels), np.asarray(names))

def get_label_from_name(data, name):
    for idx, row in data.iterrows():       
        if (row['File'] == name):
            return row['Class']
        
    return None


def load_validation_images(testdir, validation_data, batch_size=NUM_VAL_IMAGES):
    labels = []
    names = []
    image_index = 0
    
    images = np.ndarray(shape=(batch_size, IMAGE_ARR_SIZE))
    val_images = os.listdir(testdir + '/images/')
           
    # Loop through all the images of a val directory
    batch_index = 0;
    
    
    for image in val_images:
        image_file = os.path.join(testdir, 'images/', image)
        #print (testdir, image_file)

        # reading the images as they are; no normalization, no color editing
        image_data = mpimg.imread(image_file) 
        if (image_data.shape == (IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)):
            images[image_index, :] = image_data.flatten()
            image_index += 1
            labels.append(get_label_from_name(validation_data, image))
            names.append(image)
            batch_index += 1
            
        if (batch_index >= batch_size):
            break;
    
    print ("Loaded Validation images ", image_index)
    return (images, np.asarray(labels), np.asarray(names))
   
    

def plot_object(data):
    plt.figure(figsize=(1,1))
    image = data.reshape(IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)
    plt.imshow(image, cmap = matplotlib.cm.binary,
               interpolation="nearest")
    plt.axis("off")
    plt.show()

def plot_objects(instances, images_per_row=10, **options):
    size = IMAGE_SIZE
    images_per_row = min(len(instances), images_per_row)
    images = [instance.reshape(size,size,NUM_CHANNELS) for instance in instances]
    n_rows = (len(instances) - 1) // images_per_row + 1
    row_images = []
    n_empty = n_rows * images_per_row - len(instances)
    images.append(np.zeros((size, size * n_empty)))
    for row in range(n_rows):
        if (row == len(instances)/images_per_row):
            break
        rimages = images[row * images_per_row : (row + 1) * images_per_row]
        row_images.append(np.concatenate(rimages, axis=1))
    image = np.concatenate(row_images, axis=0)
    plt.imshow(image, **options)
    plt.axis("off")
    plt.show()
    
def get_next_batch(batchsize=50):
    for cursor in range(0, len(training_images), batchsize):
        batch = []
        batch.append(training_images[cursor:cursor+batchsize])
        batch.append(training_labels[cursor:cursor+batchsize])       
        yield batch
        
def get_val_batch(batchsize=50):
    for cursor in range(0, len(val_images), batchsize):
        batch = []
        batch.append(val_images[cursor:cursor+batchsize])
        batch.append(val_labels[cursor:cursor+batchsize])
        yield batch

def get_next_labels(batchsize=50):
    for cursor in range(0, len(training_images), batchsize):
        yield training_labels[cursor:cursor+batchsize]  
    
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [3]:
import gzip
import cPickle

def load_zipped_pickle(filename):
    with gzip.open(filename, 'rb') as f:
        loaded_object = cPickle.load(f)
        return loaded_object

download_images(IMAGES_URL)
# training_images, training_labels, training_files = load_training_images(TRAINING_IMAGES_DIR, batch_size=500)

# shuffle_index = np.random.permutation(len(training_labels))
# training_images = training_images[shuffle_index]
# training_labels = training_labels[shuffle_index]
# training_files  = training_files[shuffle_index]

# le = preprocessing.LabelEncoder()
# training_le = le.fit(training_labels)
# training_labels_encoded = training_le.transform(training_labels)

# print ("First 30 Training Labels", training_labels_encoded[0:30])
# plot_objects(training_images[0:30])


# val_data = pd.read_csv(VAL_IMAGES_DIR + 'val_annotations.txt', sep='\t', header=None, names=['File', 'Class', 'X', 'Y', 'H', 'W'])
# val_images, val_labels, val_files = load_validation_images(VAL_IMAGES_DIR, val_data, batch_size=50)
# #val_images, val_labels, val_files = load_validation_images(VAL_IMAGES_DIR, val_data)
# val_labels_encoded = training_le.transform(val_labels)
# plot_objects(val_images[0:30])
# print (val_labels_encoded[0:30])

# data = load_zipped_pickle("tinyImageData")

# shuffle_train_index = np.random.permutation(len(data['train']['data']))
# training_images = np.array(data['train']['data'])[shuffle_train_index]
# training_labels = np.array(data['train']['target'])[shuffle_train_index]

# shuffle_val_index = np.random.permutation(len(data['val']['data']))
# val_images = np.array(data['val']['data'])[shuffle_val_index]
# val_labels = np.array(data['val']['target'])[shuffle_val_index]

Downloading http://cs231n.stanford.edu/tiny-imagenet-200.zip


In [9]:
height = IMAGE_SIZE
width = IMAGE_SIZE
channels = NUM_CHANNELS
n_inputs = height * width * channels
n_outputs = 200

reset_graph()

X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
X_reshaped = tf.reshape(X, shape=[-1, height, width, channels])
y = tf.placeholder(tf.int32, shape=[None], name="y")

#input shape [-1, 64, 64, 3]
conv1 = tf.layers.conv2d(
            inputs=X_reshaped, 
            filters=64, 
            kernel_size=[11,11],
            padding='SAME',
            activation=tf.nn.relu, 
            name="conv1")

bn1 = tf.layers.batch_normalization(
            inputs=conv1, 
            name="bn1")

pool1 = tf.layers.max_pooling2d(
            inputs=bn1,
            pool_size=[2, 2],
            strides=2,
            name="pool1")

conv2 = tf.layers.conv2d(
            inputs=pool1, 
            filters=128, 
            kernel_size=[7,7],
            padding='SAME',
            activation=tf.nn.relu, 
            name="conv2")

bn2 = tf.layers.batch_normalization(
            inputs=conv2, 
            name="bn2")

pool2 = tf.layers.max_pooling2d(
            inputs=bn2,
            pool_size=[2, 2],
            strides=2,
            name="pool2")

conv3 = tf.layers.conv2d(
            inputs=pool2, 
            filters=192, 
            kernel_size=[3,3],
            padding='SAME',
            activation=tf.nn.relu, 
            name="conv3")

bn3 = tf.layers.batch_normalization(
            inputs=conv3, 
            name="bn3")

pool3 = tf.layers.max_pooling2d(
            inputs=bn3,
            pool_size=[2, 2],
            strides=2,
            name="pool3")

conv4 = tf.layers.conv2d(
            inputs=pool3, 
            filters=256, 
            kernel_size=[3,3],
            padding='SAME',
            activation=tf.nn.relu, 
            name="conv4")

bn4 = tf.layers.batch_normalization(
            inputs=conv4, 
            name="bn4")

pool4 = tf.layers.max_pooling2d(
            inputs=bn4,
            pool_size=[2, 2],
            strides=2,
            name="pool4")

#pool2_flat = tf.reshape(pool1, [-1, 8 * 8 * 64])
# Dense Layer
pool4_flat = tf.contrib.layers.flatten(pool4)

dense1 = tf.layers.dense(inputs=pool4_flat, units=4096, activation=tf.nn.relu)
bn5 = tf.layers.batch_normalization(
            inputs=dense1, 
            name="bn5")

dense2 = tf.layers.dense(inputs=bn5, units=512, activation=tf.nn.relu)
bn6 = tf.layers.batch_normalization(
            inputs=dense2, 
            name="bn6")

# Logits Layer
logits = tf.layers.dense(inputs=bn6, units=200, name='output')
Y_proba = tf.nn.softmax(logits, name="Y_proba")

xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.0001)
#optimizer = tf.keras.optimizers.SGD(lr=0.001, momentum=0.9, decay=0.000001, nesterov=True)
training_op = optimizer.minimize(loss)

correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [None]:
n_epochs = 10
batch_size = 10

with tf.Session() as sess:
    init.run()
    trainAccuracy = []
    trainLoss = []
    valAccuracy = []
    for epoch in range(n_epochs):       
        # get the accuracy and loss, with the count
        sumAccuracy = 0
        sumLoss = 0
        count = 0
        
        for batch in get_next_batch(1000):
            X_batch, y_batch = batch[0], batch[1]
            #print ('Training set', X_batch.shape, y_batch.shape)
            _, myLoss = sess.run([training_op, loss], feed_dict={X: X_batch, y: y_batch})
            sumAccuracy += accuracy.eval(feed_dict={X: X_batch, y: y_batch})
            sumLoss += myLoss
            count += 1
            if (count * 1000 % 1000 == 0):
                print("Total Batch finished: %d, Accuracy: %f, Loss: %f" % (count * 1000, sumAccuracy/count, sumLoss/count))
                
            save_path = saver.save(sess, "./tiny_imagenet/vgg_like")  

        trainAccuracy.append(sumAccuracy / count)
        trainLoss.append(sumLoss / count)
        print(epoch, "Train Accuracy:", trainAccuracy[-1], "Train Loss:", trainLoss[-1])

        print("Batching Validation...")
        for batch in get_val_batch(1000):        
            X_batch, y_batch = batch[0], batch[1]
            valAccuracy.append(accuracy.eval(feed_dict={X: X_batch, y: y_batch}))
        print("Test Accuracy:", sum(valAccuracy)/len(valAccuracy))
        
    print("Final Train Accuracy: %f" % (trainAccuracy[-1]))

Total Batch finished: 1000, Accuracy: 0.005000, Loss: 22.481092
Total Batch finished: 2000, Accuracy: 0.003500, Loss: 20.707058
Total Batch finished: 3000, Accuracy: 0.004000, Loss: 18.543517
Total Batch finished: 4000, Accuracy: 0.004750, Loss: 16.815016
Total Batch finished: 5000, Accuracy: 0.005400, Loss: 15.306990
Total Batch finished: 6000, Accuracy: 0.004667, Loss: 14.179837
Total Batch finished: 7000, Accuracy: 0.004571, Loss: 13.310828
Total Batch finished: 8000, Accuracy: 0.005250, Loss: 12.632472
Total Batch finished: 9000, Accuracy: 0.005556, Loss: 12.074749
Total Batch finished: 10000, Accuracy: 0.005900, Loss: 11.595311
Total Batch finished: 11000, Accuracy: 0.006273, Loss: 11.203014
Total Batch finished: 12000, Accuracy: 0.006250, Loss: 10.852432
Total Batch finished: 13000, Accuracy: 0.006000, Loss: 10.557579
Total Batch finished: 14000, Accuracy: 0.005929, Loss: 10.293337
Total Batch finished: 15000, Accuracy: 0.005867, Loss: 10.062110
Total Batch finished: 16000, Accur

Total Batch finished: 27000, Accuracy: 0.006037, Loss: 5.477651
Total Batch finished: 28000, Accuracy: 0.006071, Loss: 5.477294
Total Batch finished: 29000, Accuracy: 0.006069, Loss: 5.477197
Total Batch finished: 30000, Accuracy: 0.006067, Loss: 5.475989
Total Batch finished: 31000, Accuracy: 0.006000, Loss: 5.474656
Total Batch finished: 32000, Accuracy: 0.005938, Loss: 5.473199
Total Batch finished: 33000, Accuracy: 0.005848, Loss: 5.472074
Total Batch finished: 34000, Accuracy: 0.005765, Loss: 5.472054
Total Batch finished: 35000, Accuracy: 0.005771, Loss: 5.470270
Total Batch finished: 36000, Accuracy: 0.005750, Loss: 5.469306
Total Batch finished: 37000, Accuracy: 0.005757, Loss: 5.468078
Total Batch finished: 38000, Accuracy: 0.005816, Loss: 5.466606
Total Batch finished: 39000, Accuracy: 0.005897, Loss: 5.465841
Total Batch finished: 40000, Accuracy: 0.005850, Loss: 5.465690
Total Batch finished: 41000, Accuracy: 0.005878, Loss: 5.465513
Total Batch finished: 42000, Accuracy: 0

Total Batch finished: 53000, Accuracy: 0.006849, Loss: 5.374448
Total Batch finished: 54000, Accuracy: 0.006907, Loss: 5.373982
Total Batch finished: 55000, Accuracy: 0.006945, Loss: 5.373558
Total Batch finished: 56000, Accuracy: 0.006982, Loss: 5.373340
Total Batch finished: 57000, Accuracy: 0.006947, Loss: 5.373404
Total Batch finished: 58000, Accuracy: 0.006897, Loss: 5.373503
Total Batch finished: 59000, Accuracy: 0.006831, Loss: 5.373150
Total Batch finished: 60000, Accuracy: 0.006817, Loss: 5.372953
Total Batch finished: 61000, Accuracy: 0.006951, Loss: 5.372764
Total Batch finished: 62000, Accuracy: 0.007016, Loss: 5.372609
Total Batch finished: 63000, Accuracy: 0.007016, Loss: 5.372927
Total Batch finished: 64000, Accuracy: 0.006938, Loss: 5.373056
Total Batch finished: 65000, Accuracy: 0.007077, Loss: 5.372743
Total Batch finished: 66000, Accuracy: 0.007030, Loss: 5.372263
Total Batch finished: 67000, Accuracy: 0.007045, Loss: 5.372188
Total Batch finished: 68000, Accuracy: 0