In [1]:
# Import Modules
from __future__ import print_function
from six.moves import cPickle as pickle
from six.moves import range
from six.moves.urllib.request import urlretrieve
from scipy import ndimage
from PIL import Image
import numpy as np
import os
import sys
import tarfile
import h5py
from numpy import random

# Download data
print('Downloading data...')

url = 'http://ufldl.stanford.edu/housenumbers/'

def maybe_download(filename, force=False):
  """Download a file if not present, and make sure it's the right size."""
  if force or not os.path.exists(filename):
    print('Attempting to download:', filename) 
    filename, _ = urlretrieve(url + filename, filename)
    print('Download Complete!')
  statinfo = os.stat(filename)
  return filename

train_filename = maybe_download('train.tar.gz')
test_filename = maybe_download('test.tar.gz')
extra_filename = maybe_download('extra.tar.gz')

print('Successfully downloaded data!')


# Unzip Data
print('Unzipping data...')
np.random.seed(8)

def maybe_extract(filename, force=False):
  # Remove .tar.gz
  root = os.path.splitext(os.path.splitext(filename)[0])[0]  
  if os.path.isdir(root) and not force:
    # You may override by setting force=True.
    print('%s already present - Skipping extraction of %s.' % (root, filename))
  else:
    print('Extracting data for %s. This may take a while. Please wait.' % root)
    tar = tarfile.open(filename)
    sys.stdout.flush()
    tar.extractall()
    tar.close()
  data_folders = root
  print(data_folders)
  return data_folders
  
train_folders = maybe_extract(train_filename)
test_folders = maybe_extract(test_filename)
extra_folders = maybe_extract(extra_filename)

print('Successfully unzipped data!')

# Create dictionary for bounding boxes
print('Creating dictionary of bounding boxes...')
class DigitStructFile:
    def __init__(self, inf):
        self.inf = h5py.File(inf, 'r')
        self.digitStructName = self.inf['digitStruct']['name']
        self.digitStructBbox = self.inf['digitStruct']['bbox']

    def getName(self,n):
        return ''.join([chr(c[0]) for c in self.inf[self.digitStructName[n][0]].value])

    def bboxHelper(self,attr):
        if (len(attr) > 1):
            attr = [self.inf[attr.value[j].item()].value[0][0] for j in range(len(attr))]
        else:
            attr = [attr.value[0][0]]
        return attr

    def getBbox(self,n):
        bbox = {}
        bb = self.digitStructBbox[n].item()
        bbox['height'] = self.bboxHelper(self.inf[bb]["height"])
        bbox['label'] = self.bboxHelper(self.inf[bb]["label"])
        bbox['left'] = self.bboxHelper(self.inf[bb]["left"])
        bbox['top'] = self.bboxHelper(self.inf[bb]["top"])
        bbox['width'] = self.bboxHelper(self.inf[bb]["width"])
        return bbox
    
    def getDigitStructure(self,n):
        s = self.getBbox(n)
        s['name']=self.getName(n)
        return s

    def getAllDigitStructure(self):
        return [self.getDigitStructure(i) for i in range(len(self.digitStructName))]

    def getAllDigitStructure_ByDigit(self):
        pictDat = self.getAllDigitStructure()
        result = []
        structCnt = 1
        for i in range(len(pictDat)):
            item = { 'filename' : pictDat[i]["name"] }
            figures = []
            for j in range(len(pictDat[i]['height'])):
               figure = {}
               figure['height'] = pictDat[i]['height'][j]
               figure['label']  = pictDat[i]['label'][j]
               figure['left']   = pictDat[i]['left'][j]
               figure['top']    = pictDat[i]['top'][j]
               figure['width']  = pictDat[i]['width'][j]
               figures.append(figure)
            structCnt = structCnt + 1
            item['boxes'] = figures
            result.append(item)
        return result
    
print("Successfully created dictionary of bounding boxes!")


# Get Digit Structure
print('Getting digit structure for training data...')
digitFileTrain=DigitStructFile(os.path.join('train','digitStruct.mat'))
train_data=digitFileTrain.getAllDigitStructure_ByDigit()
print('Success!')

print('Getting digit structure for test data...')
digitFileTest=DigitStructFile(os.path.join('test','digitStruct.mat'))
test_data=digitFileTest.getAllDigitStructure_ByDigit()
print('Success!')


print('Getting digit structure for extra data...')
digitFileExtra=DigitStructFile(os.path.join('extra','digitStruct.mat'))
extra_data=digitFileExtra.getAllDigitStructure_ByDigit()
print('Success!')

# Crop Training Images
print('Cropping training images...')
train_imsize = np.ndarray([len(train_data),2])
for i in np.arange(len(train_data)):
    filename = train_data[i]['filename']
    fullname = os.path.join(train_folders, filename)
    im = Image.open(fullname)
    train_imsize[i, :] = im.size[:]

print('Success!')

# Crop Test Images
print('Cropping test images...')
test_imsize = np.ndarray([len(test_data),2])
for i in np.arange(len(test_data)):
    filename = test_data[i]['filename']
    fullname = os.path.join(test_folders, filename)
    im = Image.open(fullname)
    test_imsize[i, :] = im.size[:]

print('Success!')

# Crop Extra Images
print('Cropping extra images...')
extra_imsize = np.ndarray([len(extra_data),2])
for i in np.arange(len(extra_data)):
    filename = extra_data[i]['filename']
    fullname = os.path.join(extra_folders, filename)
    im = Image.open(fullname)
    extra_imsize[i, :] = im.size[:]

print('Success!')

# Use extra data
def generate_dataset(data, folder):

    dataset = np.ndarray([len(data),32,32,1], dtype='float32')
    labels = np.ones([len(data),6], dtype=int) * 10
    for i in np.arange(len(data)):
        filename = data[i]['filename']
        fullname = os.path.join(folder, filename)
        im = Image.open(fullname)
        boxes = data[i]['boxes']
        num_digit = len(boxes)
        labels[i,0] = num_digit
        top = np.ndarray([num_digit], dtype='float32')
        left = np.ndarray([num_digit], dtype='float32')
        height = np.ndarray([num_digit], dtype='float32')
        width = np.ndarray([num_digit], dtype='float32')
        for j in np.arange(num_digit):
            if j < 5: 
                labels[i,j+1] = boxes[j]['label']
                if boxes[j]['label'] == 10: labels[i,j+1] = 0
            else: print('#',i,'image has more than 5 digits.')
            top[j] = boxes[j]['top']
            left[j] = boxes[j]['left']
            height[j] = boxes[j]['height']
            width[j] = boxes[j]['width']
        
        im_top = np.amin(top)
        im_left = np.amin(left)
        im_height = np.amax(top) + height[np.argmax(top)] - im_top
        im_width = np.amax(left) + width[np.argmax(left)] - im_left
        
        im_top = np.floor(im_top - 0.1 * im_height)
        im_left = np.floor(im_left - 0.1 * im_width)
        im_bottom = np.amin([np.ceil(im_top + 1.2 * im_height), im.size[1]])
        im_right = np.amin([np.ceil(im_left + 1.2 * im_width), im.size[0]])

        im = im.crop((im_left, im_top, im_right, im_bottom)).resize([32,32], Image.ANTIALIAS)
        im = np.dot(np.array(im, dtype='float32'), [[0.2989],[0.5870],[0.1140]])
        mean = np.mean(im, dtype='float32')
        std = np.std(im, dtype='float32', ddof=1)
        if std < 1e-4: std = 1.
        im = (im - mean) / std
        dataset[i,:,:,:] = im[:,:,:]

    return dataset, labels

print('Generating training dataset and labels...')
train_dataset, train_labels = generate_dataset(train_data, train_folders)
print('Success! \n Training set: {} \n Training labels: {}'.format(train_dataset.shape, train_labels.shape))


print('Generating testing dataset and labels...')
test_dataset, test_labels = generate_dataset(test_data, test_folders)
print('Success! \n Testing set: {} \n Testing labels: {}'.format(test_dataset.shape, test_labels.shape))

print('Generating extra dataset and labels...')
extra_dataset, extra_labels = generate_dataset(extra_data, extra_folders)
print('Success! \n Testing set: {} \n Testing labels: {}'.format(extra_dataset.shape, extra_labels.shape))


# Clean up data by deleting digits more than 5 (very few)
print('Cleaning up training data...')
train_dataset = np.delete(train_dataset, 29929, axis=0)
train_labels = np.delete(train_labels, 29929, axis=0)
print('Success!')

# Expand Training Data
print('Expanding training data randomly...')

random.seed(8)

n_labels = 10
valid_index = []
valid_index2 = []
train_index = []
train_index2 = []
for i in np.arange(n_labels):
    valid_index.extend(np.where(train_labels[:,1] == (i))[0][:400].tolist())
    train_index.extend(np.where(train_labels[:,1] == (i))[0][400:].tolist())
    valid_index2.extend(np.where(extra_labels[:,1] == (i))[0][:200].tolist())
    train_index2.extend(np.where(extra_labels[:,1] == (i))[0][200:].tolist())

random.shuffle(valid_index)
random.shuffle(train_index)
random.shuffle(valid_index2)
random.shuffle(train_index2)

valid_dataset = np.concatenate((extra_dataset[valid_index2,:,:,:], train_dataset[valid_index,:,:,:]), axis=0)
valid_labels = np.concatenate((extra_labels[valid_index2,:], train_labels[valid_index,:]), axis=0)
train_dataset_new = np.concatenate((extra_dataset[train_index2,:,:,:], train_dataset[train_index,:,:,:]), axis=0)
train_labels_new = np.concatenate((extra_labels[train_index2,:], train_labels[train_index,:]), axis=0)

print('Success! \n Training set: {} \n Training labels: {}'.format(train_dataset_new.shape, train_labels_new.shape))
print('Success! \n Validation set: {} \n Validation labels: {}'.format(valid_dataset.shape, valid_labels.shape))
print('Success! \n Testing set: {} \n Testing labels: {}'.format(test_dataset.shape, test_labels.shape))


# Create Pickling File
print('Pickling data...')
pickle_file = 'SVHN.pickle'

try:
    f = open(pickle_file, 'wb')
    save = {
        'train_dataset': train_dataset_new,
        'train_labels': train_labels_new,
        'valid_dataset': valid_dataset,
        'valid_labels': valid_labels,
        'test_dataset': test_dataset,
        'test_labels': test_labels,
        }
    pickle.dump(save, f, pickle.HIGHEST_PROTOCOL)
    f.close()
except Exception as e:
    print('Unable to save data to {}: {}'.format(pickle_file, e))
    raise
    
statinfo = os.stat(pickle_file)
print('Success!')
print('Compressed pickle size: {}'.format(statinfo.st_size))

Downloading data...
Successfully downloaded data!
Unzipping data...
train already present - Skipping extraction of train.tar.gz.
train
test already present - Skipping extraction of test.tar.gz.
test
Extracting data for extra. This may take a while. Please wait.


EOFError: Compressed file ended before the end-of-stream marker was reached

In [None]:
print('Loading pickled data...')

pickle_file = 'SVHN.pickle'

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    X_train = save['train_dataset']
    y_train = save['train_labels']
    X_val = save['valid_dataset']
    y_val = save['valid_labels']
    X_test = save['test_dataset']
    y_test = save['test_labels']
    del save  
    print('Training data shape:', X_train.shape)
    print('Training label shape:',y_train.shape)
    print('Validation data shape:', X_val.shape)
    print('Validation label shape:', y_val.shape)
    print('Test data shape:', X_test.shape)
    print('Test label shape:', y_test.shape)

print('Data successfully loaded!')



print('Defining accuracy function...')
def accuracy(predictions, labels):
    return (100.0 * np.sum(np.argmax(predictions, 2).T == labels)
            / predictions.shape[1] / predictions.shape[0])
print('Accuracy function defined!')

# CNN Model
print('Loading data and building computation graph...')

'''Basic information'''
# We processed image size to be 32
image_size = 32
# Number of channels: 1 because greyscale
num_channels = 1
# Mini-batch size
batch_size = 16
# Number of output labels
num_labels = 11

'''Filters'''
# depth: number of filters (output channels) - should be increasing
# num_channels: number of input channels set at 1 previously
patch_size = 5
depth_1 = 16
depth_2 = depth_1 * 2
depth_3 = depth_2 * 3

# Number of hidden nodes in fully connected layer 1
num_hidden = 64
shape = [batch_size, image_size, image_size, num_channels]

graph = tf.Graph()

with graph.as_default():

    '''Input Data'''
    # X_train: (223965, 32, 32, 1)
    tf_train_dataset = tf.placeholder(
        tf.float32, shape=(batch_size, image_size, image_size, num_channels))

    # y_train: (223965, 7)
    tf_train_labels = tf.placeholder(
        tf.int32, shape=(batch_size, 6))

    # X_val: (11788, 32, 32, 1)
    tf_valid_dataset = tf.constant(X_val)

    # X_test: (13067, 32, 32, 1)
    tf_test_dataset = tf.constant(X_test)

    '''Variables'''

    # Create Variables Function
    def init_weights(shape, name):
        return tf.Variable(
            tf.random_normal(shape=shape, stddev=0.01),
            name=name)

    def init_biases(shape, name):
        return tf.Variable(
            tf.constant(1.0, shape=shape),
            name=name
        )

    # Create Function for Image Size: Pooling
    # 3 Convolutions
    # 2 Max Pooling
    def output_size_pool(input_size, conv_filter_size, pool_filter_size,
                         padding, conv_stride, pool_stride):
        if padding == 'same':
            padding = -1.00
        elif padding == 'valid':
            padding = 0.00
        else:
            return None
        # After convolution 1
        output_1 = (
            ((input_size - conv_filter_size - 2 * padding) / conv_stride) + 1.00)
        # After pool 1
        output_2 = (
            ((output_1 - pool_filter_size - 2 * padding) / pool_stride) + 1.00)
        # After convolution 2
        output_3 = (
            ((output_2 - conv_filter_size - 2 * padding) / conv_stride) + 1.00)
        # After pool 2
        output_4 = (
            ((output_3 - pool_filter_size - 2 * padding) / pool_stride) + 1.00)
        # After convolution 2
        output_5 = (
            ((output_4 - conv_filter_size - 2 * padding) / conv_stride) + 1.00)
        # After pool 2
        # output_6 = (
        #     ((output_5 - pool_filter_size - 2 * padding) / pool_stride) + 1.00)
        return int(output_5)

    # Convolution 1
    # Input channels: num_channels = 1
    # Output channels: depth = depth_1
    w_c1 = init_weights([patch_size, patch_size, num_channels, depth_1], 'w_c1')
    b_c1 = init_biases([depth_1], 'b_c1')

    # Convolution 2
    # Input channels: num_channels = depth_1
    # Output channels: depth = depth_2
    w_c2 = init_weights([patch_size, patch_size, depth_1, depth_2], 'w_c2')
    b_c2 = init_biases([depth_2], 'b_c2')

    # Convolution 3
    # Input channels: num_channels = depth_2
    # Output channels: depth = depth_3
    w_c3 = init_weights([patch_size, patch_size, depth_2, depth_3], 'w_c3')
    b_c3 = init_biases([depth_3], 'b_c3')

    # Fully Connect Layer 1
    final_image_size = output_size_pool(input_size=image_size,
                                        conv_filter_size=5, pool_filter_size=2,
                                        padding='valid', conv_stride=1,
                                        pool_stride=2)
    print('Final image size after convolutions {}'.format(final_image_size))
    w_fc1 = init_weights([final_image_size*final_image_size*depth_3, num_hidden], 'w_fc1')
    b_fc1 = init_biases([num_hidden], 'b_fc1')

    # Softmax 1
    w_s1 = init_weights([num_hidden, num_labels], 'w_s1')
    b_s1 = init_biases([num_labels], 'b_s1')

    # Softmax 2
    w_s2 = init_weights([num_hidden, num_labels], 'w_s2')
    b_s2 = init_biases([num_labels], 'b_s2')

    # Softmax 3
    w_s3 = init_weights([num_hidden, num_labels], 'w_s3')
    b_s3 = init_biases([num_labels], 'b_s3')

    # Softmax 4
    w_s4 = init_weights([num_hidden, num_labels], 'w_s4')
    b_s4 = init_biases([num_labels], 'b_s4')

    # Softmax 5
    w_s5 = init_weights([num_hidden, num_labels], 'w_s5')
    b_s5 = init_biases([num_labels], 'b_s5')

    def model(data, keep_prob, shape):
        with tf.name_scope("conv_layer_1"):
            conv_1 = tf.nn.conv2d(
                data, w_c1, strides=[1, 1, 1, 1], padding='VALID')
            hidden_conv_1 = tf.nn.relu(conv_1 + b_c1)
            pool_1 = tf.nn.max_pool(
                hidden_conv_1, [1, 2, 2, 1], [1, 2, 2, 1], padding='VALID')
        with tf.name_scope("conv_layer_2"):
            conv_2 = tf.nn.conv2d(
                pool_1, w_c2, strides=[1, 1, 1, 1], padding='VALID')
            hidden_conv_2 = tf.nn.relu(conv_2 + b_c2)
            pool_2 = tf.nn.max_pool(
                hidden_conv_2, [1, 2, 2, 1], [1, 2, 2, 1], padding='VALID')
        with tf.name_scope("conv_layer_3"):
            conv_3 = tf.nn.conv2d(
                pool_2, w_c3, strides=[1, 1, 1, 1], padding='VALID')
            hidden_conv_3 = tf.nn.relu(conv_3 + b_c3)
        with tf.name_scope("fc_layer_1"):
            hidden_drop = tf.nn.dropout(hidden_conv_3, keep_prob)
            shape = hidden_drop.get_shape().as_list()
            reshape = tf.reshape(
                hidden_drop, [shape[0], shape[1] * shape[2] * shape[3]])
            hidden_fc = tf.nn.relu(
                tf.matmul(reshape, w_fc1) + b_fc1)
        with tf.name_scope("softmax_1"):
            logits_1 = tf.matmul(hidden_fc, w_s1) + b_s1
        with tf.name_scope("softmax_2"):
            logits_2 = tf.matmul(hidden_fc, w_s2) + b_s2
        with tf.name_scope("softmax_3"):
            logits_3 = tf.matmul(hidden_fc, w_s3) + b_s3
        with tf.name_scope("softmax_4"):
            logits_4 = tf.matmul(hidden_fc, w_s4) + b_s4
        with tf.name_scope("softmax_5"):
            logits_5 = tf.matmul(hidden_fc, w_s5) + b_s5
        return [logits_1, logits_2, logits_3, logits_4, logits_5]

    '''Training Computation'''
    [logits_1, logits_2, logits_3, logits_4, logits_5] = model(
        tf_train_dataset, 0.5, shape)

    '''Loss Function'''
    with tf.name_scope("loss"):
        loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
                   logits_1, tf_train_labels[:, 1])) + \
               tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
                   logits_2, tf_train_labels[:, 2])) + \
               tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
                   logits_3, tf_train_labels[:, 3])) + \
               tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
                   logits_4, tf_train_labels[:, 4])) + \
               tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
                   logits_5, tf_train_labels[:, 5]))
        # Add scalar summary for cost
        tf.scalar_summary("loss", loss)

    '''Optimizer'''
    # Decaying learning rate
    # count the number of steps taken
    global_step = tf.Variable(0)
    start_learning_rate = 0.05
    learning_rate = tf.train.exponential_decay(
        start_learning_rate, global_step, 100000, 0.96, staircase=True)

    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(
        loss, global_step=global_step)

    '''Predictions'''
    def softmax_combine(dataset, shape):
        train_prediction = tf.pack([
            tf.nn.softmax(model(dataset, 1.0, shape)[0]),
            tf.nn.softmax(model(dataset, 1.0, shape)[1]),
            tf.nn.softmax(model(dataset, 1.0, shape)[2]),
            tf.nn.softmax(model(dataset, 1.0, shape)[3]),
            tf.nn.softmax(model(dataset, 1.0, shape)[4])])
        return train_prediction

    train_prediction = softmax_combine(tf_train_dataset, shape)
    valid_prediction = softmax_combine(tf_valid_dataset, shape)
    test_prediction = softmax_combine(tf_test_dataset, shape)

    '''Save Model (will be initiated later)'''
    saver = tf.train.Saver()

    '''Histogram for Weights'''
    # Add histogram summaries for weights
    tf.histogram_summary("w_c1_summ", w_c1)
    tf.histogram_summary("b_c1_summ", b_c1)

    tf.histogram_summary("w_c2_summ", w_c2)
    tf.histogram_summary("b_c2_summ", b_c2)

    tf.histogram_summary("w_c3_summ", w_c3)
    tf.histogram_summary("b_c3_summ", b_c3)

    tf.histogram_summary("w_fc1_summ", w_fc1)
    tf.histogram_summary("b_fc1_summ", b_fc1)

    tf.histogram_summary("w_s1_summ", w_s1)
    tf.histogram_summary("b_s1_summ", b_s1)

    tf.histogram_summary("w_s2_summ", w_s2)
    tf.histogram_summary("b_s2_summ", b_s2)

    tf.histogram_summary("w_s3_summ", w_s3)
    tf.histogram_summary("b_s3_summ", b_s3)

    tf.histogram_summary("w_s4_summ", w_s4)
    tf.histogram_summary("b_s4_summ", b_s4)

    tf.histogram_summary("w_s5_summ", w_s5)
    tf.histogram_summary("b_s5_summ", b_s5)

print('Data loaded and computation graph built!')

num_steps = 60000

print('Running computation and iteration...')
print('If you are unable to save the summary, please change the path to where you want it to write.')

with tf.Session(graph=graph) as session:
    writer = tf.train.SummaryWriter("/log_trial_1", session.graph)  # for 0.8
    merged = tf.merge_all_summaries()

    '''If you want to restore model'''
    # saver.restore(session, "model_trial_1.ckpt")
    # print("Model restored!")

    tf.initialize_all_variables().run()
    print('Initialized')
    for step in range(num_steps):
        offset = (step * batch_size) % (y_train.shape[0] - batch_size)
        batch_data = X_train[offset:(offset + batch_size), :, :, :]
        batch_labels = y_train[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset: batch_data,
                     tf_train_labels: batch_labels}
        _, l, predictions, summary = session.run([optimizer, loss, train_prediction, merged],
                                        feed_dict=feed_dict)
        writer.add_summary(summary)
        if (step % 500 == 0):
            print(('Minibatch loss at step {}: {}').format(step, l))
            print(
            ('Minibatch accuracy: {}%'.format(accuracy(predictions, batch_labels[:,1:6]))))
            print(
            ('Validation accuracy: {}%'.format(accuracy(valid_prediction.eval(),
                                                     y_val[:,1:6]))))
    print(
    ('Test accuracy: {}%'.format(accuracy(test_prediction.eval(), y_test[:,1:6]))))

    save_path = saver.save(session, "model_trial_1.ckpt")
    print('Model saved in file: {}'.format(save_path))


print('Successfully completed computation and iterations!')

print('To view Tensorboard\'s visualizations, please run \
\'tensorboard --logdir=log_trial_1\' in your terminal')
