In [None]:
%matplotlib inline
from __future__ import print_function
import os
import sys
import copy
import random
import traceback
import numpy as np
import tensorflow as tf
import lxml.etree as et

import convnet
import mutate
import convevo
import darwin

In [None]:
reload (convnet)
reload (mutate)
reload (convevo)
reload (darwin)

In [None]:
# http://stackoverflow.com/questions/29772158/make-ipython-notebook-print-in-real-time
oldsysstdout = sys.stdout
class flushfile():
    def __init__(self, f):
        self.f = f
    def __getattr__(self,name): 
        return object.__getattribute__(self.f, name)
    def write(self, x):
        self.f.write(x)
        self.f.flush()
    def flush(self):
        self.f.flush()
sys.stdout = flushfile(sys.stdout)

In [None]:
from six.moves import cPickle as pickle
pickle_file = '../ud730/notMNIST.pickle'

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    train_dataset = save['train_dataset']
    train_labels = save['train_labels']
    valid_dataset = save['valid_dataset']
    valid_labels = save['valid_labels']
    test_dataset = save['test_dataset']
    test_labels = save['test_labels']
    del save  # hint to help gc free up memory
    print('Training set', train_dataset.shape, train_labels.shape)
    print('Validation set', valid_dataset.shape, valid_labels.shape)
    print('Test set', test_dataset.shape, test_labels.shape)

In [None]:
datasets = {
    "image_size": 28,
    "label_count": 10,
    "channel_count": 1
}
datasets["total_image_size"] = datasets["image_size"] * datasets["image_size"]

def reformat(dataset, labels, name):
    dataset = dataset.reshape((-1, datasets["image_size"], datasets["image_size"], datasets["channel_count"])).astype(np.float32)
    # Map 2 to [0.0, 1.0, 0.0 ...], 3 to [0.0, 0.0, 1.0 ...]
    labels = (np.arange(datasets["label_count"]) == labels[:,None]).astype(np.float32)
    print(name + " set", dataset.shape, labels.shape)
    return dataset, labels
datasets["train"], datasets["train_labels"] = reformat(train_dataset, train_labels, "Training")
datasets["valid"], datasets["valid_labels"] = reformat(valid_dataset, valid_labels, "Validation")
datasets["test"], datasets["test_labels"] = reformat(test_dataset, test_labels, "Test")

print(datasets.keys())

In [None]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1)) / predictions.shape[0])

In [None]:
def batch_accuracy(session, graph_info, inputs, labels, batch_size):
    total_accuracy = 0
    batch_count = len(inputs) / batch_size
    for b in xrange(batch_count):
        batch_data = inputs[b * batch_size: (b + 1) * batch_size]
        predictions = session.run([graph_info["verify_predictions"]], feed_dict={graph_info["verify"] : batch_data})[0]
        total_accuracy += accuracy(predictions, labels[b * batch_size: (b + 1) * batch_size]) / float(batch_count)
    return total_accuracy

In [None]:
def run_graph(graph_info, data, step_count, report_every=50):
    with tf.Session(graph=graph_info["graph"]) as session:
        tf.initialize_all_variables().run()
        print("Initialized")
        batch_size = graph_info["batch_size"]
        for step in xrange(step_count + 1):
            # Pick an offset within the training data, which has been randomized.
            # Note: we could use better randomization across epochs.
            offset = (step * batch_size) % (data["train_labels"].shape[0] - batch_size)
            # Generate a minibatch.
            batch_data = data["train"][offset:(offset + batch_size), :, :, :]
            batch_labels = data["train_labels"][offset:(offset + batch_size), :]
            # Prepare a dictionary telling the session where to feed the minibatch.
            # The key of the dictionary is the placeholder node of the graph to be fed,
            # and the value is the numpy array to feed to it.
            targets = [graph_info["optimizer"], graph_info["loss"], graph_info["predictions"]]
            feed_dict = {graph_info["train"] : batch_data, graph_info["labels"] : batch_labels}
            _, l, predictions = session.run(targets, feed_dict=feed_dict)
            if np.isnan(l):
                print("Error computing loss")
                return 0
            if (step % report_every == 0):
                print("Minibatch loss at step", step, ":", l)
                print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
                valid_accuracy = batch_accuracy(session, graph_info, data["valid"], data["valid_labels"], batch_size)
                print("Validation accuracy: %.1f%%" % valid_accuracy)
        test_accuracy = batch_accuracy(session, graph_info, data["test"], data["test_labels"], batch_size)
        print("Test accuracy: %.1f%%" % test_accuracy)
        return test_accuracy

In [None]:
def shape_test(shape, options, func):
    graph = tf.Graph()
    with graph.as_default():
        input = tf.placeholder(tf.float32, shape=shape)
        parameters = convnet.setup_matrix(options)
        result = func(input, False, parameters, options)
        return tuple(int(d) for d in result.get_shape())
    
default_init = convnet.setup_initializer()
correct = 0
for w in xrange(1, 7):
    for p in xrange(1, w + 1):
        for s in xrange(1, p + 1):
            for pad in ["SAME", "VALID"]:
                for func in [convnet.apply_pool, convnet.apply_conv]:
                    options = {
                        "size":(p, p, 1, 1),
                        "stride": (s, s),
                        "padding":pad,
                        "pool_type": "max",
                        "bias":False,
                        "init":default_init}
                    calc = convnet.image_output_shape([1, w, w, 1], options)
                    shape = shape_test([1, w, w, 1], options, func)
                    if calc == shape:
                        correct += 1
                    else:
                        print("mismatch for", w, p, s, pad, shape, calc)
correct

In [None]:
def convnet_two_layer(batch_size, patch_size, depth, hidden_size, data):
    image_size = data["image_size"]
    label_count = data["label_count"]
    channel_count = data["channel_count"]
    graph = tf.Graph()
    with graph.as_default():
        # Input data.
        input_shape = (batch_size, image_size, image_size, channel_count)
        output_shape = (batch_size, label_count)
        train = tf.placeholder(tf.float32, shape=input_shape)
        labels= tf.placeholder(tf.float32, shape=output_shape)
        verify= tf.placeholder(tf.float32, shape=input_shape)
        
        stride = 2
        
        layers = [
            convnet.create_conv_layer((patch_size, patch_size), (stride, stride), 1, depth),
            convnet.create_relu_layer(),
            convnet.create_conv_layer((patch_size, patch_size), (stride, stride), depth, depth),
            convnet.create_relu_layer(),
            convnet.create_flatten_layer(),
            convnet.create_matrix_layer(image_size * image_size * depth / pow(stride, 4), hidden_size),
            convnet.create_relu_layer(),
            convnet.create_matrix_layer(hidden_size, label_count)
        ]
        
        for layer in layers:
            layer.setup_parameters()
        
        def model(nodes, train):
            for layer in layers:
                nodes.append(layer.connect(nodes[-1], train))
            return nodes[-1]

        logits = model([train], True)
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, labels))
        
        info = {
            "graph": graph,
            "batch_size": batch_size,
            "train": train,
            "labels": labels,
            "loss": loss,
            "optimizer": tf.train.GradientDescentOptimizer(0.05).minimize(loss),

            # Predictions for the training, validation, and test data.
            "predictions": tf.nn.softmax(logits),
            "verify": verify,
            "verify_predictions": tf.nn.softmax(model([verify], False))
        }
    return info

In [None]:
graph_2conv = convnet_two_layer(batch_size=16, patch_size=5, depth=16, hidden_size=64, data=datasets)

run_graph(graph_2conv, datasets, 1000)

In [None]:
def convnet_two_layer_stack(batch_size, patch_size, depth, hidden_size, data):
    image_size = data["image_size"]
    label_count = data["label_count"]
    channel_count = data["channel_count"]
    graph = tf.Graph()
    with graph.as_default():
        # Input data.
        input_shape = (batch_size, image_size, image_size, channel_count)
        output_shape = (batch_size, label_count)
        train = tf.placeholder(tf.float32, shape=input_shape)
        labels= tf.placeholder(tf.float32, shape=output_shape)
        verify= tf.placeholder(tf.float32, shape=input_shape)
        
        stride = 2
        
        evo_stack = convevo.LayerStack(flatten=True)
        default_init = lambda: convevo.Initializer("normal", scale=1)
        
        evo_stack.add_layer(convevo.ImageLayer("conv_bias", patch_size, stride, depth, "SAME", default_init()), relu=True)
        evo_stack.add_layer(convevo.ImageLayer("conv_bias", patch_size, stride, depth, "SAME", default_init()), relu=True)
        evo_stack.add_layer(convevo.HiddenLayer(hidden_size, bias=True, initializer=default_init()), relu=True)
        evo_stack.add_layer(convevo.HiddenLayer(label_count, bias=True, initializer=default_init()), relu=False)
        
        #print(et.tostring(evo_stack.to_xml(), pretty_print=True))
        
        evo_copy = copy.deepcopy(evo_stack)
        evo_copy.mutate(55)
        
        evo_copy.reseed(random.Random(101))
        
        layers = evo_copy.construct(input_shape)
        
        for layer in layers:
            layer.setup_parameters()
        
        def model(nodes, train):
            for layer in layers:
                nodes.append(layer.connect(nodes[-1], train))
            return nodes[-1]

        logits = model([train], True)
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, labels))
        
        info = {
            "graph": graph,
            "batch_size": batch_size,
            "train": train,
            "labels": labels,
            "loss": loss,
            "optimizer": tf.train.GradientDescentOptimizer(0.05).minimize(loss),

            # Predictions for the training, validation, and test data.
            "predictions": tf.nn.softmax(logits),
            "verify": verify,
            "verify_predictions": tf.nn.softmax(model([verify], False))
        }
    return info

In [None]:
tf.set_random_seed(42)

graph_2conv_stack = convnet_two_layer_stack(batch_size=16, patch_size=5, depth=16, hidden_size=64, data=datasets)

run_graph(graph_2conv, datasets, 1000)

In [None]:
def convnet_optimize(
    batch_size,
    patch_size,
    depth,
    hidden_size,
    data,
    rate_alpha=0.05,
    decay_rate=1.0,
    decay_steps=1000
):
    image_size = data["image_size"]
    label_count = data["label_count"]
    channel_count = data["channel_count"]
    graph = tf.Graph()
    with graph.as_default():
        # Input data.
        input_shape = (batch_size, image_size, image_size, channel_count)
        output_shape = (batch_size, label_count)
        train = tf.placeholder(tf.float32, shape=input_shape)
        labels= tf.placeholder(tf.float32, shape=output_shape)
        verify= tf.placeholder(tf.float32, shape=input_shape)
        
        stride = 2
        
        evo_stack = convevo.LayerStack(flatten=True)
        default_init = lambda: convevo.Initializer("normal", scale=0.1)
        
        evo_stack.add_layer(convevo.ImageLayer("conv_bias", patch_size, stride, depth, "SAME", default_init()), relu=True)
        evo_stack.add_layer(convevo.ImageLayer("conv_bias", patch_size, stride, depth, "SAME", default_init()), relu=True)
        evo_stack.add_layer(convevo.HiddenLayer(hidden_size, bias=True, initializer=default_init()), relu=True)
        evo_stack.add_layer(convevo.HiddenLayer(label_count, bias=True, initializer=default_init()), relu=False)

        layers = evo_stack.construct(input_shape)
        l2_loss = 0
        
        for layer in layers:
            layer.setup_parameters()
            l2_loss = layer.update_loss(l2_loss)
        
        def model(nodes, train):
            for layer in layers:
                nodes.append(layer.connect(nodes[-1], train))
            return nodes[-1]

        logits = model([train], True)
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, labels)) + l2_loss
        
        global_step = tf.Variable(0)
        learning_rate = tf.train.exponential_decay(rate_alpha, global_step, decay_steps, decay_rate)
        
        info = {
            "graph": graph,
            "batch_size": batch_size,
            "train": train,
            "labels": labels,
            "loss": loss,
            "optimizer": tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step),

            # Predictions for the training, validation, and test data.
            "predictions": tf.nn.softmax(logits),
            "verify": verify,
            "verify_predictions": tf.nn.softmax(model([verify], False))
        }
    return info

In [None]:
optimal_steps = 200000
tf.set_random_seed(45654)

graph_connive = convnet_optimize(
    batch_size=16, patch_size=5, depth=64, hidden_size=128,
    rate_alpha=0.02, decay_rate=0.9, decay_steps=optimal_steps/4,
    data=datasets)

run_graph(graph_connive, datasets, optimal_steps, report_every=1000)

In [None]:
evo = darwin.Darwin(range(0, 10), lambda i: str(i), lambda i, e: i, lambda ps, e: ps[0] + e.randint(1,20))
results = evo.evaluate(random.Random(42))
evo.repopulate(0.1, 2, results, random.Random(42))
evo.evaluate(random.Random(42))
print(evo.best())

In [None]:
def setup_evo_graph(
    batch_size,
    data,
    evo_stack,
    rate_alpha=0.05,
    decay_rate=1.0,
    decay_steps=1000
):
    image_size = data["image_size"]
    label_count = data["label_count"]
    channel_count = data["channel_count"]
    graph = tf.Graph()
    with graph.as_default():
        # Input data.
        input_shape = (batch_size, image_size, image_size, channel_count)
        output_shape = (batch_size, label_count)
        train = tf.placeholder(tf.float32, shape=input_shape)
        labels= tf.placeholder(tf.float32, shape=output_shape)
        verify= tf.placeholder(tf.float32, shape=input_shape)

        layers = evo_stack.construct(input_shape)
        l2_loss = 0
        
        for layer in layers:
            layer.setup_parameters()
            l2_loss = layer.update_loss(l2_loss)
        
        def model(nodes, train):
            for layer in layers:
                nodes.append(layer.connect(nodes[-1], train))
            return nodes[-1]

        logits = model([train], True)
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, labels)) + l2_loss
        
        global_step = tf.Variable(0)
        learning_rate = tf.train.exponential_decay(rate_alpha, global_step, decay_steps, decay_rate)
        
        info = {
            "graph": graph,
            "batch_size": batch_size,
            "train": train,
            "labels": labels,
            "loss": loss,
            "optimizer": tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step),

            # Predictions for the training, validation, and test data.
            "predictions": tf.nn.softmax(logits),
            "verify": verify,
            "verify_predictions": tf.nn.softmax(model([verify], False))
        }
    return info

In [None]:
def create_evo_stack(patch_size, stride, depth, hidden_size, label_count, init_scale):
    stack = convevo.LayerStack(flatten=True)
    default_init = lambda: convevo.Initializer("normal", scale=init_scale)

    stack.add_layer(convevo.ImageLayer("conv_bias", patch_size, stride, depth, "SAME", default_init()), relu=True)
    stack.add_layer(convevo.ImageLayer("conv_bias", patch_size, stride, depth, "SAME", default_init()), relu=True)
    stack.add_layer(convevo.HiddenLayer(hidden_size, bias=True, initializer=default_init()), relu=True)
    stack.add_layer(convevo.HiddenLayer(label_count, bias=True, initializer=default_init()), relu=False)
    
    return stack

In [None]:
eval_steps = 100000
def eval_stack(stack, entropy):
    stack.reseed(entropy)
    
    try:
        evo_graph = setup_evo_graph(16, datasets, stack)
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
        convevo.output_error(stack, lines, "temp")
        return -10

    try:
        return run_graph(evo_graph, datasets, eval_steps, report_every=eval_steps)
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
        convevo.output_error(stack, lines, "temp")
        return -1

In [None]:
mutate_entropy = random.Random(42)
eval_entropy = random.Random(57)

conv_population = convevo.init_population(create_evo_stack(5, 2, 64, 128, 10, 0.1), 20, mutate_entropy)

conv_darwin = darwin.Darwin(conv_population, convevo.serialize, eval_stack, convevo.breed)
results = conv_darwin.evaluate(eval_entropy)
convevo.output_results(results, "temp")
conv_darwin.repopulate(0.1, 2, results, mutate_entropy)
results = conv_darwin.evaluate(eval_entropy)
convevo.output_results(results, "temp")

In [None]:
failed = 1
print (convevo.serialize(conv_darwin.population[failed]))
eval_stack(conv_darwin.population[failed], eval_entropy)

In [None]:
best = conv_darwin.best()
print("Best score:", best[1])
print(convevo.serialize(best[0]))

In [None]:
results = darwin.descending_score(conv_darwin.history.values())
convevo.output_results(results, "test", "first_run.xml")
len(results)