diff --git a/create_data_pickles.py b/create_data_pickles.py
new file mode 100644
index 0000000..206fab4
--- /dev/null
+++ b/create_data_pickles.py
@@ -0,0 +1,100 @@
+"""
+################################################################################
+                PROCESSES THE RAW IMAGE FILES AND MAT FILES
+################################################################################
+
+Processes the image and mat files taken from the following urls:
+
+- http://ufldl.stanford.edu/housenumbers/train.tar.gz
+- http://ufldl.stanford.edu/housenumbers/test.tar.gz
+- http://ufldl.stanford.edu/housenumbers/extra.tar.gz
+
+And generates Pickle files that contain numpy arrays of the cropped/resized
+images as well as of the labels.
+
+See the README file for details on how to use this script.
+################################################################################
+"""
+from __future__ import print_function, absolute_import
+
+import os
+from process_data import merge_train_extra, process_the_data, \
+                         create_increased_representation_data
+
+if __name__ == "__main__":
+    import argparse
+    # --------------------------------------------------------------------------
+    #                                                     PROCESS ARGUMENT FLAGS
+    # --------------------------------------------------------------------------
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-i", "--input_dir", default=None, type=str,
+                        help="The parent directory containing the directories "
+                             "for train, test and extra data. If no argument "
+                             "given, then it assumes that the current working "
+                             "directory is where the data is.")
+    parser.add_argument("-o", "--output_dir", default=None, type=str,
+                        help="The directory to output the pickled data, by "
+                             "default it places the files in same directory as "
+                             "the input data directory")
+    parser.add_argument("-d", "--data", default=None, type=str,
+                        help="Which dataset to use. Legal options are: \n"
+                             "    'train' To process train data. \n"
+                             "    'test'  To process test data. \n"
+                             "    'extra' To process extra data. \n"
+                             "    'merge' To merge the train and extra data. \n"
+                             "    'rep'   To create the increased "
+                             "            representation data. \n"
+                             "     None   To process ALL data. \n"
+                             "If no argument provided, it processes ALL the "
+                             "data.")
+    parser.add_argument("--debug", action='store_true', help="Go into debug mode")
+
+    opts = parser.parse_args()
+    # PLACE LIMIT ON NUMBER OF DATA SAMPLES - for debugging purposes
+    if opts.debug:
+        limit = 1024
+        print("#"*70)
+        print("NOTE: YOU ARE IN DEBUG MODE")
+        print("#" * 70)
+    else:
+        limit = None
+    
+    
+    #  SET DEFAULT VALUES
+    if opts.input_dir is None:
+        opts.input_dir = os.path.abspath("")  # Abs path to current working dir
+    if opts.output_dir is None:
+        opts.output_dir = opts.input_dir  # Output, same as input dir
+    
+    # --------------------------------------------------------------------------
+    #                                    PROCESS THE RELEVANT DATASET TO WORK ON
+    # --------------------------------------------------------------------------
+    # ALL DATASETS FROM START TO FINISH
+    if opts.data is None:
+        for dataset in ["extra", "train", "test"]:
+            process_the_data(data=dataset,
+                             data_dir=opts.input_dir,
+                             out_dir=opts.output_dir,
+                             limit=limit)
+        merge_train_extra(opts.output_dir, shuffle=True)
+        create_increased_representation_data(opts.output_dir)
+    
+    # JUST TRAIN, TEST OR EXTRA
+    elif opts.data in ["train", "test", "extra"]:
+        process_the_data(data=opts.data,
+                         data_dir=opts.input_dir,
+                         out_dir=opts.output_dir,
+                         limit=limit)
+    
+    # MERGED DATA
+    elif opts.data == "merge":
+        merge_train_extra(opts.output_dir, shuffle=True)
+    
+    # INCREASED REPRESENTATION DATA
+    elif opts.data == "rep":
+        create_increased_representation_data(opts.output_dir)
+    
+    # HANDLE WRONG DATASET OPTION
+    else:
+        assert False, "Incorrect argument for data provided"
+
diff --git a/create_data_pickles.sh b/create_data_pickles.sh
new file mode 100644
index 0000000..ed912eb
--- /dev/null
+++ b/create_data_pickles.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+
+stdbuf -oL python create_data_pickles.py -i "data" | tee -a "logs/prepare_data.log"
diff --git a/evals.py b/evals.py
new file mode 100644
index 0000000..36a2424
--- /dev/null
+++ b/evals.py
@@ -0,0 +1,339 @@
+from __future__ import print_function
+from main import np
+from main import os
+import copy
+
+from nnet.graphops import GraphOps
+from file_support import pickle2obj, obj2pickle
+from support import verbose_print, verbose_print_done, limit_string
+from main import PRINT_WIDTH
+
+
+# ==============================================================================
+#                                                           PER_ELEMENT_ACCURACY
+# ==============================================================================
+def per_element_accuracy(a, b, axis=None):
+    """ Evaluates The accuracy(ies) of elements of predicted values against the
+        ground truth.
+        
+        Select axis=0 to calculate separate  accuracy for each separate
+        column of predictions.
+         
+    Args:
+        a:      (numpy array) truth or predictionc
+        b:      (numpy array) predictions or truth
+        axis:   (bool) if None (default) it calculates the accuracy over all
+                elements. axis=0 calculates separate accuracy for each column.
+    Returns:
+        if axis=None, then returns a scalar.
+        if axis=0, then returns a numpy array with same number of elements
+        as columns in `a`.
+    """
+    return (a == b).mean(axis=axis)
+
+
+# ==============================================================================
+#                                                              FULL_ROW_ACCURACY
+# ==============================================================================
+def full_row_accuracy(a, b):
+    """ for each row in a and b, it is only considered to be correct if
+        every single element in that row matches up. If there is even one
+        mismatch in the row, then the whole row is considrered False.
+        
+        Returns the proportion of rows that match up completely.
+        
+    Args:
+        a:      (numpy array) truth or predictionc
+        b:      (numpy array) predictions or truth
+        .
+    Returns:
+        Float
+    """
+    return (a == b).all(axis=1).mean()
+
+
+# ==============================================================================
+#                                                                      BATCH_IOU
+# ==============================================================================
+def batch_iou(a, b, epsilon=1e-8):
+    """ Given two arrays `a` and `b` where each row contains a bounding
+        box defined as a list of four numbers:
+            [x1,y1,x2,y2]
+        where:
+            x1,y1 represent the upper left corner
+            x2,y2 represent the lower right corner
+        It returns the Intersect of Union scores for each corresponding
+        pair of boxes.
+
+    Args:
+        a:          (numpy array) each row containing [x1,y1,x2,y2] coordinates
+        b:          (numpy array) each row containing [x1,y1,x2,y2] coordinates
+        epsilon:    (float) Small value to prevent division by zero
+
+    Returns:
+        (numpy array) The Intersect of Union scores for each pair of bounding
+        boxes.
+    """
+    # COORDINATES OF THE INTERSECTION BOXES
+    x1 = np.array([a[:, 0], b[:, 0]]).max(axis=0)
+    y1 = np.array([a[:, 1], b[:, 1]]).max(axis=0)
+    x2 = np.array([a[:, 2], b[:, 2]]).min(axis=0)
+    y2 = np.array([a[:, 3], b[:, 3]]).min(axis=0)
+    
+    # AREAS OF OVERLAP - Area where the boxes intersect
+    width = (x2 - x1)
+    height = (y2 - y1)
+    
+    # handle case where there is NO overlap
+    width[width < 0] = 0
+    height[height < 0] = 0
+    
+    area_overlap = width * height
+    
+    # COMBINED AREAS
+    area_a = (a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1])
+    area_b = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
+    area_combined = area_a + area_b - area_overlap
+    
+    # RATIO OF AREA OF OVERLAP OVER COMBINED AREA
+    iou = area_overlap / (area_combined + epsilon)
+    return iou
+
+
+# ==============================================================================
+#                                                         BATCH_MULTI_COLUMN_IOU
+# ==============================================================================
+def batch_multi_column_iou(a, b, epsilon=1e-5):
+    """ Given two arrays `a` and `b` where each row contains a several bounding
+        boxes in groups of 4 columns. Where each group of 4 columns represents
+        a single bounding box as:
+            [x1,y1,x2,y2]
+        where:
+            x1,y1 represent the upper left corner
+            x2,y2 represent the lower right corner
+        It returns the Intersect of Union scores for each bounding box in the
+        data.
+        
+        The shape of output is [n_samples, n_bboxes]
+
+    Args:
+        a:          (numpy array) each row containing [x1,y1,x2,y2] coordinates
+        b:          (numpy array) each row containing [x1,y1,x2,y2] coordinates
+        epsilon:    (float) Small value to prevent division by zero
+
+    Returns:
+        (numpy array) The Intersect of Union scores for each bounding box.
+    """
+    n_bboxes = a.shape[1] // 4
+    n_samples = a.shape[0]
+    ious = np.empty(shape=[n_samples, n_bboxes])
+        
+    for i in range(n_bboxes):
+        ious[:,i] = batch_iou(a[:, 4*i: 4+4*i], b[:, 4*i: 4+4*i])
+    
+    return ious
+
+
+# ==============================================================================
+#                                                                        AVG_IOU
+# ==============================================================================
+def avg_iou(preds, Y, axis=None):
+    return batch_iou(preds, Y).mean(axis=axis)
+
+
+# ==============================================================================
+#                                                           AVG_MULTI_COLUMN_IOU
+# ==============================================================================
+def avg_multi_column_iou(preds, Y, axis=None):
+    return batch_multi_column_iou(preds, Y).mean(axis=axis)
+
+
+# ==============================================================================
+#                                                                  STR_ELSE_NONE
+# ==============================================================================
+def str_else_none(a, f, e):
+    """ If `a` is not None, then it returns `a` formatted
+        as a string, specified by the formatting rule
+        specified by `f`, otherwise it returns the
+        alternative string specified by `e`
+    """
+    return f.format(a) if a is not None else e
+
+
+# ##############################################################################
+#                                                                          EVALS
+# ##############################################################################
+class Evals(object):
+    def __init__(self, d=None, pickle=None, verbose=False):
+        """ Creates an Evals object to store evaluation metrics for each epoch.
+        
+        Args:
+            d:          (dict or None)(optional) - initialize Evals object from
+                        a dictionary
+            pickle:     (str or None) (optional) path to a pickle file of a
+                        dictionary to initialize the Evals object.
+            verbose:    (bool)
+        """
+        self.stuff = dict()
+        self._initialized = True
+        
+        # INITIAL BLANK VALUES
+        self.pda_train = []
+        self.pda_valid = []
+        self.wna_train = []
+        self.wna_valid = []
+        self.iou_train = []
+        self.iou_valid = []
+        self.time_pred = []
+        self.time_train = []
+        self.loss = []
+        self.alpha = []
+        
+        # LOAD EVALS FROM DICTIONARY
+        if d is not None:
+            verbose_print("Loading evals from a dictionary", verbose=verbose, end="")
+            self.stuff.update(copy.deepcopy(d))
+
+        # LOAD EVALS FROM PICKLE FILE (of a dictionary)
+        elif pickle is not None:
+            short_path = limit_string(pickle, tail=PRINT_WIDTH-32)
+            verbose_print("Loading evals from " + short_path, verbose, end="")
+            if os.path.exists(pickle):
+                d = pickle2obj(pickle)
+                self.stuff.update(copy.deepcopy(d))
+            else:
+                verbose_print("\n-- file does not exist. Creating blank Evals", verbose, end="")
+        else:
+            verbose_print("Creating blank Evals", verbose, end="")
+
+        verbose_print_done(verbose)
+
+    def __getattr__(self, key):
+        """ Get items using dot notation """
+        return self.stuff[key]
+    
+    def __setattr__(self, key, value):
+        """ Set attributes (that will be stored in the dict `stuff` using dot
+            notation
+        """
+        if self.__dict__.has_key('_initialized'):
+            self.stuff[key] = value
+        else:
+            # allows attributes to be set in the __init__ method
+            return dict.__setattr__(self, key, value)
+    
+    def __getitem__(self, key):
+        """ Get items using dictionary notation """
+        return self.stuff[key]
+    
+    def __setitem__(self, key, val):
+        """ Set items using dictionary notation """
+        self.stuff[key] = val
+    
+    def make_copy(self):
+        """Returns a deep copy of this object"""
+        return Evals(d=self.stuff)
+    
+    def save_dict_pickle(self, f, verbose=False):
+        short_path = limit_string(f, front=10, tail=31)
+        verbose_print("Saving Evals to " + short_path, verbose, end="")
+        obj2pickle(self.stuff, file=f)
+        verbose_print_done(verbose)
+    
+    def as_dict(self):
+        return self.stuff
+    
+    def newest_is_max(self):
+        """ Returns true, if the latest entry has the highest Whole Number
+            Accuracy when evaluated on the validation dataset.
+        """
+        return max(self.wna_valid) == self.wna_valid[-1]
+        
+    @property
+    def epochs(self):
+        return len(self.loss)
+    
+    def append(self, **kwargs):
+        """ Given a set of keyword, value pairs, it appends the value to the
+            end of the list that is in the attribute specified by the keyword.
+            
+            If the keyword is not an already existing attribute of the Evals
+            object, then it creates one, and initializes a new list, with the
+            fisrt item being the value provided.
+        
+        Args:
+            **kwargs:
+        """
+        # GIVE FEEDBACK ABOUT MISSING OR EXRTANEOUS KEYS
+        inner_keys = set(self.stuff.keys())
+        new_keys = set(kwargs.keys())
+        missing = inner_keys.difference(new_keys)
+        extraneous = new_keys.difference(inner_keys)
+        overlapping = inner_keys.intersection(new_keys)
+        if missing != set():
+            print("WARNING!: Missing the following keys: ", sorted(list(missing)))
+        if extraneous != set():
+            print("WARNING!: The following keys are extraneous: ", sorted(list(extraneous)))
+        
+        # APPEND VALUES TO RELEVANT LIST
+        for key in overlapping:
+            self[key].append(kwargs[key])
+        
+        # INITIALIZE EXTRANEOUS KEY VALUES IN A NEW LIST
+        for key in extraneous:
+            self[key] = [(kwargs[key])]
+    
+    def print_header(self):
+        """ Prints out a header string that details the column names that will
+            be used in `print_line()`
+        """
+        h  = "-------------------------+--------------+----------------+----------------+\n"
+        h += "               TIME (ms) |       IoU    |      PDA       |       WNA      |\n"
+        h += " ep  LOSS    train  pred | train  valid | train    valid | train    valid |\n"
+        h += "-------------------------+--------------+----------------+----------------+"
+        print(h)
+
+    def print_loss(self, loss,end="\n"):
+        line = "     {:1.4f}              |              |                |                |"
+        print(line.format(loss), end=end)
+
+    def print_line(self, end="\n"):
+        """ Prints a line of the evaluation metrics. from the latest epoch.
+            (To be used in conjunction with print_header() to print out the
+            labels of each column)
+        """
+        s = ""
+        s += str_else_none(self.epochs,         f="{:3.0f} ",   e="    ")
+        s += str_else_none(self.loss[-1],       f=" {:1.4f} ",  e="        ")
+        s += str_else_none(self.time_train[-1], f=" {:2.2f} ",  e="       ")
+        s += str_else_none(self.time_pred[-1],  f=" {:1.2f} |", e="      |")
+        s += str_else_none(self.iou_train[-1],  f=" {:1.3f} ",  e="        ")
+        s += str_else_none(self.iou_valid[-1],  f=" {:1.3f} |", e="        |")
+        s += str_else_none(self.pda_train[-1],  f=" {:2.3f} ",  e="        ")
+        s += str_else_none(self.pda_valid[-1],  f=" {:2.3f} |", e="        |")
+        s += str_else_none(self.wna_train[-1],  f=" {:2.3f} ",  e="        ")
+        s += str_else_none(self.wna_valid[-1],  f=" {:2.3f} |", e="        |")
+    
+        print(s, end=end)
+    
+    
+# ==============================================================================
+#                                                               COMPARISONS_FILE
+# ==============================================================================
+def comparisons_file(opts, paths):
+    """ Saves a text file
+            accuracy__iou__modelname.txt
+        Inside the file, is the eval metrics for the best epoch.
+    """
+    evalsmax = Evals(pickle=paths.evals_max)
+    
+    filename = "{acc}__{iou}__{name}.txt".format(
+        acc=str(round(evalsmax.wna_valid[-1], 3)).zfill(7),
+        iou=str(round(evalsmax.iou_valid[-1], 3)).zfill(5),
+        name=opts.output_name,
+        )
+    path = os.path.join(paths.model_comparisons_dir, filename)
+    with open(path, mode="w") as textFile:
+        for key in sorted(evalsmax.as_dict().keys()):
+            textFile.write("{}   {}\n".format(key, evalsmax[key][-1:]))
+
diff --git a/graphs.py b/graphs.py
new file mode 100644
index 0000000..ecd0f61
--- /dev/null
+++ b/graphs.py
@@ -0,0 +1,442 @@
+from __future__ import print_function, absolute_import, division
+from main import tf
+from main import PRINT_WIDTH
+from support import print_headers
+
+# Neural Net Imports
+from nnet.graphops import GraphOps
+from nnet.initializers import he_weights_initializer
+from nnet.model_components import multi_digit_loss
+from nnet.nnet_components import fc_layer, flatten
+from nnet.nnet_components import conv_battery, fc_battery
+from nnet.nnet_components import trainer
+from nnet.misc import print_tensor_shape
+
+
+# ##############################################################################
+#                                                                          GRAPH
+# ##############################################################################
+def create_graph(logit_func, settings):
+    """ Creates a Tensorflow graph for the multi-digit classification + bounding
+        box task.
+        
+    Args:
+        logit_func: (function) A function that returns two tensors:
+                    - digit_logits
+                    - bbox_logits
+        settings:   (object) A Settings object that contains attribute values
+                    for the model.
+    Returns:
+        (tensorflow graph)
+    """
+    print_headers("GRAPH", border="=", width=PRINT_WIDTH)
+    graph = tf.Graph()
+    with graph.as_default():
+        # PLACEHOLDERS
+        X = tf.placeholder(tf.float32, shape=[None, 54, 54], name="X")  # Images
+        Y = tf.placeholder(tf.int32, shape=[None, 5], name="Y")         # Digits
+        BBOX = tf.placeholder(tf.float32, shape=[None, 24], name="BBOX")# Bboxes
+        
+        # OPTIONAL PLACEHOLDERS
+        alpha = tf.placeholder_with_default(0.001, shape=None, name="alpha")
+        is_training = tf.placeholder_with_default(False,
+                                                  shape=None,
+                                                  name="is_training")
+        
+        # VARIABLES
+        global_step = tf.Variable(0, name='global_step', trainable=False)
+        
+        # PREPROCESS
+        x = X / 255.0  # Rescale values to be 0-1
+        x = tf.reshape(x, shape=[-1, 54, 54, 1])  # Reshape for Conv Layers
+        print("x after reshaping to 4D: ", x.get_shape().as_list())
+        
+        # MODEL
+        digit_logits, bbox_logits = logit_func(x=x,
+                                               is_training=is_training,
+                                               settings=settings,
+                                               global_step=global_step)
+        
+        # BBOX LOSS
+        bbox_loss = tf.sqrt(tf.reduce_mean(tf.square(1 * (bbox_logits - BBOX))),
+                            name="bbox_loss")
+        
+        # DIGITS LOSS
+        digits_loss = multi_digit_loss(digit_logits, Y,
+                                       max_digits=5,
+                                       name="digit_loss")
+        
+        # TOTAL LOSS
+        loss = tf.add(bbox_loss, digits_loss, name="loss")
+        
+        # TRAIN
+        train = trainer(loss, alpha=alpha, global_step=global_step,name="train")
+        
+        # PREDICTIONS
+        digit_preds = tf.transpose(tf.argmax(digit_logits, dimension=2))
+        digit_preds = tf.to_int32(digit_preds, name="digit_preds")
+    
+    return graph
+
+
+# ==============================================================================
+#                                                                        MODEL_A
+# ==============================================================================
+def model_a(x, is_training, global_step, settings=None, verbose=True):
+    """
+    """
+    # BATCH NORM SETTINGS
+    bn_offset = 0.0
+    bn_scale = 1.0
+    
+    # MISC SETTINGS
+    bval = 0.01  # Bias value
+    leak = 0.01  # leakiness of leaky relus
+    
+    # WEIGHTS INITIALIZERS
+    # st_winit = zero_weights_initializer()
+    conv_winit = he_weights_initializer()
+    fc_winit = he_weights_initializer()  # identity_weights_initializer()
+    
+    # DROPOUT SETTINGS
+    conv_dropout = tf.cond(is_training,
+                           lambda: tf.constant(settings.conv_dropout),
+                           lambda: tf.constant(0.0))
+    fc_dropout = tf.cond(is_training,
+                         lambda: tf.constant(settings.fc_dropout),
+                         lambda: tf.constant(0.0))
+    
+    # --------------------------------------------------------------------------
+    #                                                                      TRUNK
+    # --------------------------------------------------------------------------
+    # CONV LAYERS
+    x = conv_battery(x, global_step=global_step, convk=5, n=48, mpk=2,
+                     mpstride=2, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=5, n=64, mpk=2,
+                     mpstride=1, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=5, n=128, mpk=2,
+                     mpstride=2, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=5, n=160, mpk=2,
+                     mpstride=1, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=3, n=192, mpk=2,
+                     mpstride=2, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=3, n=192, mpk=2,
+                     mpstride=1, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=3, n=192, mpk=2,
+                     mpstride=2, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=2, n=192, mpk=2,
+                     mpstride=1, is_training=is_training, verbose=verbose)
+    
+    # FC LAYER
+    x = flatten(x)
+    print_tensor_shape(x, verbose=verbose)
+    x = fc_battery(x, global_step=global_step, n=1024, bias=None,
+                   is_training=is_training, dropout=settings.fc_dropout,
+                   winit=fc_winit, verbose=verbose, name="FC")
+    
+    # --------------------------------------------------------------------------
+    #                                                             DIGIT BRANCHES
+    # --------------------------------------------------------------------------
+    max_digits = 5
+    d = [None] * max_digits
+    for i in range(max_digits):
+        d[i] = fc_layer(x, n=11, bias=0.1, winit=fc_winit,
+                        name="branch_{}".format(i + 1))
+        print_tensor_shape(d[i], verbose=verbose)
+    
+    digits = tf.pack(d, axis=0, name="digit_logits")
+    print_tensor_shape(digits, verbose=verbose)
+    
+    # --------------------------------------------------------------------------
+    #                                                              BBOX BRANCHES
+    # --------------------------------------------------------------------------
+    bboxes = fc_layer(x, n=24, bias=0.1, winit=fc_winit, name="bbox_logits")
+    print_tensor_shape(bboxes, verbose=verbose)
+    
+    return digits, bboxes
+
+
+# ==============================================================================
+#                                                                        MODEL_B
+# ==============================================================================
+# Similar to model A, but has aditional conv layer at begining with:
+# - k=2, n = 2
+# - maxpool k =3, stride=2
+# This is intended to reduce the dimensionality early on, while preserving
+# important information.
+def model_b(x, is_training, global_step, settings=None, verbose=True):
+    """
+    """
+    # BATCH NORM SETTINGS
+    bn_offset = 0.0
+    bn_scale = 1.0
+    
+    # MISC SETTINGS
+    bval = 0.01  # Bias value
+    leak = 0.01  # leakiness of leaky relus
+    
+    # WEIGHTS INITIALIZERS
+    # st_winit = zero_weights_initializer()
+    conv_winit = he_weights_initializer()
+    fc_winit = he_weights_initializer()  # identity_weights_initializer()
+    
+    # DROPOUT SETTINGS
+    conv_dropout = tf.cond(is_training,
+                           lambda: tf.constant(settings.conv_dropout),
+                           lambda: tf.constant(0.0))
+    fc_dropout = tf.cond(is_training,
+                         lambda: tf.constant(settings.fc_dropout),
+                         lambda: tf.constant(0.0))
+    
+    # --------------------------------------------------------------------------
+    #                                                                      TRUNK
+    # --------------------------------------------------------------------------
+    # CONV LAYERS
+    x = conv_battery(x, global_step=global_step, convk=2, n=2, mpk=3,
+                     mpstride=2, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=5, n=48, mpk=2,
+                     mpstride=2, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=5, n=64, mpk=2,
+                     mpstride=1, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=5, n=128, mpk=2,
+                     mpstride=2, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=5, n=160, mpk=2,
+                     mpstride=1, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=3, n=192, mpk=2,
+                     mpstride=2, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=3, n=192, mpk=2,
+                     mpstride=1, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=3, n=192, mpk=2,
+                     mpstride=2, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=2, n=192, mpk=2,
+                     mpstride=1, is_training=is_training, verbose=verbose)
+    
+    # FC LAYER
+    x = flatten(x)
+    print_tensor_shape(x, verbose=verbose)
+    x = fc_battery(x, global_step=global_step, n=1024, bias=None,
+                   is_training=is_training, dropout=settings.fc_dropout,
+                   winit=fc_winit, verbose=verbose, name="FC")
+    
+    # --------------------------------------------------------------------------
+    #                                                             DIGIT BRANCHES
+    # --------------------------------------------------------------------------
+    max_digits = 5
+    d = [None] * max_digits
+    for i in range(max_digits):
+        d[i] = fc_layer(x, n=11, bias=0.1, winit=fc_winit,
+                        name="branch_{}".format(i + 1))
+        print_tensor_shape(d[i], verbose=verbose)
+    
+    digits = tf.pack(d, axis=0, name="digit_logits")
+    print_tensor_shape(digits, verbose=verbose)
+    
+    # --------------------------------------------------------------------------
+    #                                                              BBOX BRANCHES
+    # --------------------------------------------------------------------------
+    bboxes = fc_layer(x, n=24, bias=0.1, winit=fc_winit, name="bbox_logits")
+    print_tensor_shape(bboxes, verbose=verbose)
+    
+    return digits, bboxes
+
+
+# ==============================================================================
+#                                                                        MODEL_C
+# ==============================================================================
+# Similar to model A, but has aditional conv layer at begining with:
+# - k=2, n = 16
+# - maxpool k =2, stride=2
+# This is intended to reduce the dimensionality early on, while preserving
+# important information.
+def model_c(x, is_training, global_step, settings=None, verbose=True):
+    """
+    """
+    # BATCH NORM SETTINGS
+    bn_offset = 0.0
+    bn_scale = 1.0
+    
+    # MISC SETTINGS
+    bval = 0.01  # Bias value
+    leak = 0.01  # leakiness of leaky relus
+    
+    # WEIGHTS INITIALIZERS
+    # st_winit = zero_weights_initializer()
+    conv_winit = he_weights_initializer()
+    fc_winit = he_weights_initializer()  # identity_weights_initializer()
+    
+    # DROPOUT SETTINGS
+    conv_dropout = tf.cond(is_training,
+                           lambda: tf.constant(settings.conv_dropout),
+                           lambda: tf.constant(0.0))
+    fc_dropout = tf.cond(is_training,
+                         lambda: tf.constant(settings.fc_dropout),
+                         lambda: tf.constant(0.0))
+    
+    # --------------------------------------------------------------------------
+    #                                                                      TRUNK
+    # --------------------------------------------------------------------------
+    # CONV LAYERS
+    x = conv_battery(x, global_step=global_step, convk=2, n=16, mpk=2,
+                     mpstride=2, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=5, n=48, mpk=2,
+                     mpstride=2, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=5, n=64, mpk=2,
+                     mpstride=1, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=5, n=128, mpk=2,
+                     mpstride=2, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=5, n=160, mpk=2,
+                     mpstride=1, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=3, n=192, mpk=2,
+                     mpstride=2, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=3, n=192, mpk=2,
+                     mpstride=1, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=3, n=192, mpk=2,
+                     mpstride=2, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=2, n=192, mpk=2,
+                     mpstride=1, is_training=is_training, verbose=verbose)
+    
+    # FC LAYER
+    x = flatten(x)
+    print_tensor_shape(x, verbose=verbose)
+    x = fc_battery(x, global_step=global_step, n=1024, bias=None,
+                   is_training=is_training, dropout=settings.fc_dropout,
+                   winit=fc_winit, verbose=verbose, name="FC")
+    
+    # --------------------------------------------------------------------------
+    #                                                             DIGIT BRANCHES
+    # --------------------------------------------------------------------------
+    max_digits = 5
+    d = [None] * max_digits
+    for i in range(max_digits):
+        d[i] = fc_layer(x, n=11, bias=0.1, winit=fc_winit,
+                        name="branch_{}".format(i + 1))
+        print_tensor_shape(d[i], verbose=verbose)
+    
+    digits = tf.pack(d, axis=0, name="digit_logits")
+    print_tensor_shape(digits, verbose=verbose)
+    
+    # --------------------------------------------------------------------------
+    #                                                              BBOX BRANCHES
+    # --------------------------------------------------------------------------
+    bboxes = fc_layer(x, n=24, bias=0.1, winit=fc_winit, name="bbox_logits")
+    print_tensor_shape(bboxes, verbose=verbose)
+    
+    return digits, bboxes
+
+
+# ==============================================================================
+#                                                                        MODEL_D
+# ==============================================================================
+# Similar to model A, but has aditional conv layer at begining with:
+# - k=2, n = 16
+# - maxpool k =2, stride=2
+#
+#  AND ALSO:
+#  There is no fully connected layer just before the branches, it goes directly
+#  from convolutional layers, to the branches.
+#
+# This is intended to reduce the dimensionality early on, while preserving
+# important information.
+def model_d(x, is_training, global_step, settings=None, verbose=True):
+    """
+    """
+    # BATCH NORM SETTINGS
+    bn_offset = 0.0
+    bn_scale = 1.0
+    
+    # MISC SETTINGS
+    bval = 0.01  # Bias value
+    leak = 0.01  # leakiness of leaky relus
+    
+    # WEIGHTS INITIALIZERS
+    # st_winit = zero_weights_initializer()
+    conv_winit = he_weights_initializer()
+    fc_winit = he_weights_initializer()  # identity_weights_initializer()
+    
+    # DROPOUT SETTINGS
+    conv_dropout = tf.cond(is_training,
+                           lambda: tf.constant(settings.conv_dropout),
+                           lambda: tf.constant(0.0))
+    fc_dropout = tf.cond(is_training,
+                         lambda: tf.constant(settings.fc_dropout),
+                         lambda: tf.constant(0.0))
+    
+    # --------------------------------------------------------------------------
+    #                                                                      TRUNK
+    # --------------------------------------------------------------------------
+    # CONV LAYERS
+    x = conv_battery(x, global_step=global_step, convk=2, n=16, mpk=2,
+                     mpstride=2, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=5, n=48, mpk=2,
+                     mpstride=2, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=5, n=64, mpk=2,
+                     mpstride=1, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=5, n=128, mpk=2,
+                     mpstride=2, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=5, n=160, mpk=2,
+                     mpstride=1, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=3, n=192, mpk=2,
+                     mpstride=2, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=3, n=192, mpk=2,
+                     mpstride=1, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=3, n=192, mpk=2,
+                     mpstride=2, is_training=is_training, verbose=verbose)
+    
+    x = conv_battery(x, global_step=global_step, convk=2, n=192, mpk=2,
+                     mpstride=1, is_training=is_training, verbose=verbose)
+    
+    # FLATTEN
+    x = flatten(x)
+    print_tensor_shape(x, verbose=verbose)
+    
+    # --------------------------------------------------------------------------
+    #                                                             DIGIT BRANCHES
+    # --------------------------------------------------------------------------
+    max_digits = 5
+    d = [None] * max_digits
+    for i in range(max_digits):
+        d[i] = fc_layer(x, n=11, bias=0.1, winit=fc_winit,
+                        name="branch_{}".format(i + 1))
+        print_tensor_shape(d[i], verbose=verbose)
+    
+    digits = tf.pack(d, axis=0, name="digit_logits")
+    print_tensor_shape(digits, verbose=verbose)
+    
+    # --------------------------------------------------------------------------
+    #                                                              BBOX BRANCHES
+    # --------------------------------------------------------------------------
+    bboxes = fc_layer(x, n=24, bias=0.1, winit=fc_winit, name="bbox_logits")
+    print_tensor_shape(bboxes, verbose=verbose)
+    
+    return digits, bboxes
+
diff --git a/settings.py b/settings.py
new file mode 100644
index 0000000..9afca6d
--- /dev/null
+++ b/settings.py
@@ -0,0 +1,87 @@
+import argparse
+import os
+from file_support.paths import PathsObj
+
+
+def parse_settings():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-o", "--output_name", required=True, type=str, help="Name that will be used to create output directory and zip file")
+    parser.add_argument("-m", "--model", required=True, type=str, help="Which model to use (single letter character)")
+    parser.add_argument("-i", "--input_dir", default="data", type=str, help="Directory containing the input data")
+    parser.add_argument("-e", "--epochs", type=int, default=1, help="Number of epochs")
+    parser.add_argument("-a", "--alpha", type=float, default=0.001, help="alpha value")
+    parser.add_argument("--conv_dropout", type=float, default=0.1, help="Dropout rate for Conv Layers")
+    parser.add_argument("--fc_dropout", type=float, default=0.1, help="Dropout rate for FC Layers")
+    
+    parser.add_argument("-n", "--data_size", type=int, default=None, help="Number of training samples to use")
+    parser.add_argument("-v", "--valid_size", type=int, default=1024, help="Number of validation samples to use")
+    parser.add_argument("-b", "--batch_size", type=int, default=128, help="Batch Size")
+    
+    settings = parser.parse_args()
+
+    settings.image_size = [54, 54]  # Image dimensions to use in model
+    settings.image_chanels = 1      # Number of color channels for input images
+    # settings.fc_width = 512         # Width of Fully Connected layers
+    settings.max_digits = 5         # Number of digits to recognize
+    
+    return settings
+
+
+def establish_paths(output_name, input):
+    """
+    output_name: output name (will be converted to a path where things will be saved)
+    input: directory where the data is stored
+    """
+
+    # Short path function names
+    abspath = os.path.abspath
+    dirname = os.path.dirname
+    basename = os.path.basename
+    
+    paths = PathsObj()
+    # WORKING DIR
+    paths.add("script_file", abspath(__file__))
+    paths.add("working_dir", os.getcwd()) #dirname(paths.script_file))
+    paths.add("working_dir_name", basename(paths.working_dir))
+
+    # INPUT PATHS
+    paths.add("data_dir", input) # "/home/ronny/TEMP/house_numbers_SVHN/format1"
+    paths.add("X_train", "X_aug_train_extra_cropped64.pickle", root="data_dir")
+    paths.add("Y_train", "Y_aug_train_extra.pickle", root="data_dir")
+    paths.add("X_test", "X_test_cropped64.pickle", root="data_dir")
+    paths.add("Y_test", "Y_test.pickle", root="data_dir")
+
+    # OUTPUT PATHS
+    paths.add("train_results", "results", root="working_dir", create_dir=True)
+    paths.add("output_name", output_name)
+    paths.add("output", output_name, root="train_results", create_dir=True)
+
+    # CHECKPOINTS
+    paths.add("checkpoint_dir", "checkpoints", root="output", create_dir=True)
+    paths.add("checkpoint", "checkpoint.chk", root="checkpoint_dir")
+    paths.add("checkpoint_max", "checkpoint_max.chk", root="checkpoint_dir")
+    
+    # EVALS
+    paths.add("evals_dir", "evals", root="output", create_dir=True)
+    paths.add("evals", "evals.pickle", root="evals_dir")
+    paths.add("evals_max", "evals_max.pickle", root="evals_dir")
+    paths.add("learning_curves", "learning_curves.png", root="evals_dir")
+
+    # SETTINGS/OPTS
+    paths.add("settings_text_file", "settings.txt", root="output")
+    paths.add("settings_pickle_file", "settings.pickle", root="output")
+
+    # COMPARISONS - Where files to compare different models is stored
+    paths.add("model_comparisons_dir", "comparisons", "train_results", create_dir=True)
+    
+    # TENSORBOARD
+    paths.add("tensorboard_dir", "tensorboard", root="output", create_dir=True)
+
+    # VISUALISATIONS
+    paths.add("epoch_vis", "epoch_vis", root="output", create_dir=True)
+
+    return paths
+
+
+
+
diff --git a/vis.py b/vis.py
new file mode 100644
index 0000000..2756818
--- /dev/null
+++ b/vis.py
@@ -0,0 +1,439 @@
+from main import np
+from main import os
+
+from matplotlib import pyplot as plt
+from image_processing import draw_boundingboxes
+from image_processing import array2pil, pil2array
+import copy
+
+from support import array_of_digit_arrays_to_ints
+from file_support import maybe_mkdir
+
+from evals import batch_iou, batch_multi_column_iou
+
+
+#  =============================================================================
+#                                                                        GLOBALS
+#  =============================================================================
+GREEN = "#52F900"
+RED = "#FF0D02"
+BLUE = "#0060F9"
+ORANGE = "#F98E00"
+DARK_GREEN = "#417D08"
+DARK_RED = "#A80A00"
+
+NICE_GREEN = "#73AD21"
+NICE_BLUE = "#307EC7"
+GUAVA = "#FF4F40"
+
+
+# ==============================================================================
+#                                                           PLOT_TRAINING_CURVES
+# ==============================================================================
+def plot_training_curves(evals, crop=(None, None), saveto=None):
+    """ Plots training curves given a dictionary-like object with lists for the
+        the following keys:
+        - "pda_train"
+        - "pda_valid"
+        - "wna_train"
+        - "wna_valid"
+        - "iou_train"
+        - "iou_valid"
+        - "alpha"
+        - "loss"
+    """
+    low = crop[0]
+    high = crop[1]
+    
+    if saveto:
+        plt.ioff()  # prevent figure from displaying
+    
+    fig, axes = plt.subplots(2, 2)
+    fig.suptitle('Evaluation per epoch of training', fontsize=15)
+    
+    # WNA
+    axes[0, 0].plot(evals["wna_train"][low:high], color=GUAVA, label="WNA train")
+    axes[0, 0].plot(evals["wna_valid"][low:high], color=NICE_GREEN, label="WNA valid")
+    
+    showmax = True
+    if showmax:
+        i_max = np.array(evals["wna_valid"][low:high]).argmax()
+        axes[0,0].axvline(x=i_max, color=NICE_BLUE, ls="solid", label="Best")
+
+    axes[0, 0].legend(loc="lower right", frameon=False)
+    axes[0, 0].set_title("WNA")
+
+    # PDA
+    axes[1, 0].plot(evals["pda_train"][low:high], color=GUAVA, label="PDA train")
+    axes[1, 0].plot(evals["pda_valid"][low:high], color=NICE_GREEN, label="PDA valid")
+    axes[1, 0].legend(loc="lower right", frameon=False)
+    axes[1, 0].set_title("PDA")
+    
+    # LOSSES
+    axes[1, 1].plot(evals["loss"][low:high], color=GUAVA)
+    axes[1, 1].set_title("Loss")
+    
+    # IOUs
+    axes[0, 1].plot(evals["iou_train"][low:high], color=GUAVA, label="train")
+    axes[0, 1].plot(evals["iou_valid"][low:high], color=NICE_GREEN, label="valid")
+    axes[0, 1].legend(loc="lower right", frameon=False)
+    axes[0, 1].set_title("IoU")
+
+    # # ALPHAS
+    # axes[1, 0].plot(evals["alpha"][low:high], color=NICE_BLUE)
+    # axes[1, 0].set_title("Alpha")
+    
+    
+    if saveto:
+        parent_dir = os.path.abspath(os.path.join(saveto, os.pardir))
+        if not os.path.exists(parent_dir):
+            os.makedirs(parent_dir)
+        fig.savefig(saveto)
+        plt.close(fig)  # needed to prevent image from displaying
+    else:
+        fig.show()
+
+
+# ==============================================================================
+#                                                                  ARRAY_TO_PLOT
+# ==============================================================================
+def array_to_plot(a, reshape=None, ax=None, cmap=None, vrange=(0,255), ticks=False):
+    """array to image
+    Takes a numpy array that contains pixel information, and displays the image
+    as a matlotlib plot. You can optionally pass a matplot lib axis and it will
+    polulate that axis with the image instead.
+
+    Args:
+        a:       numpy array containing a single image.
+        reshape: (None or tuple) (default=None)
+                 specify a tuple of two ints (width, height) if
+                 the input array is not already that shape.
+        ax:      (matplotlib axis, or None) (default=None)
+                 If you want the plot to be placed
+                 inside an existing figure, then specify the axis to place
+                 the new image into, otherwise a value of `None` will create a
+                 new figure.
+        cmap:    (default = None)
+                 colormap. eg "gray" for grayscale. None for default colormap of
+                 matplotlib's `imshow()`.
+        vrange:  (tuple of two numbers) Range of values over which pixels
+                 COULD take. eg [0,255], or [0,1]
+        
+        ticks:   (boolean) (default = False)
+                 Should it show the x and y axis tick marks?
+
+    Returns:
+        If `ax` was specified, then it returns the `ax` with the image.
+        Otherwise it doesnt return anything, and just shows the image.
+    """
+    vmin, vmax = vrange  # Range of values that pixel data comes from
+    
+    if ax is None:
+        fig, ax = plt.subplots(figsize=(1, 1))
+    if reshape is None:
+        array = a
+    else:
+        array = a.reshape(
+            reshape)  # COnvert a 1D array to a 60x40 2D array.
+    
+    ax.imshow(array, cmap=cmap, vmin=vmin, vmax=vmax)  # , interpolation='bicubic')
+    
+    # Hide the ticks and tick labels
+    if not ticks:
+        ax.get_yaxis().set_visible(False)
+        ax.get_xaxis().set_visible(False)
+    if ax is not None:
+        return ax
+    else:
+        fig.show()
+
+
+# ==============================================================================
+#                                                   OVERLAY_BOUNDING_BOXES_ARRAY
+# ==============================================================================
+def overlay_bounding_boxes_array(a, bboxes,
+                                 outline="#FF0000FF",
+                                 fill=None,
+                                 proportional=True):
+    """ Takes an array `a` of images, and bounding box information,
+        (optionally a second set of bounding boxes if you want to
+        compare true and predicted bounding boxes).
+
+        It returns a copy of the array, but with the bounding boxes
+        overlayed on the pixel information.
+
+        NOTE: that it converts to RGB, so it returns a 4D array with
+        3 channels on the final layer.
+
+    Args:
+        a:      (array) array of images
+        bboxes: (array) array, where each row contains the bounding boxes for
+                each image. The shape is a mulitple of 4, with each group of
+                four columns being a single bounding box: [x1,y1,x2,y2]
+        outline: (str) Color for bounding box outline  (accepts RGB or RGBA)
+        fill:   (str) Color for bounding box fill (accepts RGB or RGBA)
+        proportional: (bool) True if the bbox values are proportional to the
+                image dimensions (values between 0-1)
+                Set to False if the bbox values are absolute pixel coordinates.
+
+    Returns:
+        Array with the following dimensions:
+            [n_samples, dim_x, dim_y, 3]
+    """
+    n_samples, x, y = a.shape[:3]
+    out = np.empty(shape=[n_samples, x, y, 3], dtype=np.uint8)
+
+    for i in range(n_samples):
+        im = array2pil(a[i], mode=None)
+        im = draw_boundingboxes(im,
+                                bboxes=bboxes[i],
+                                to_rgb=True,
+                                outline=outline,
+                                fill=fill,
+                                proportional=proportional)
+        out[i] = pil2array(im)
+    return out
+
+
+
+# ==============================================================================
+#                                                          GRID_OF_SAMPLE_IMAGES
+# ==============================================================================
+def grid_of_sample_images(a, labels=None, labels2=None, gridsize=(4,4),
+                          reshape=None,
+                          cmap="gray", saveto=None, show=True, random=False,
+                          vrange=(0,255), title="", label_font_size=9,
+                          label_color=DARK_GREEN,
+                          label2_color=DARK_RED,
+                          seed=None):
+    """
+    Takes an array of image data, where the first axis represents the number
+    of samples. eg [num samples, image]
+    Args:
+        a:          The array of images
+        labels:     (list of strings) label for each image.
+        gridsize:   (tuple of two ints)
+                    Specify the grid size (num columns, num rows)
+        reshape:    The dimensions to reshape the image to (widht, height)
+        cmap:       Colormap. "gray" or None.
+        saveto:     (optional) (default=None)
+                    file to save the output image to.
+        show:       (bool) Show image on screen?
+        random:     randomly sample?
+        vrange:     (list of two numbers) Range of values over which pixels
+                    COULD take. eg [0,255], or [0,1]
+        title:      (string) Title for this diagram.
+        label_font_size: (int) font size for the labels
+        seed:       (int) seed for random generator
+
+    Returns:
+        None, it plots the image.
+    """
+    np.random.seed(seed=seed)
+
+    if not show:
+        plt.ioff()
+
+    # SETTINGS
+    n = gridsize[0]*gridsize[1]     # Number of images to sample
+    indices = np.arange(a.shape[0]) #
+    im_shape = a.shape[1:]          # 2D shape of images
+    
+    # SAMPLE N IMAGES
+    if random:
+        indices = np.random.choice(indices, size=n, replace=False)
+    images = a[indices][:n]
+    actual_n_images = images.shape[0]   # Number of actual images in array
+    labels = None if labels is None else np.array(labels)[indices][:n]
+    labels2 = None if labels2 is None else np.array(labels2)[indices][:n]
+
+    # HANDLE GRAYSCALE IMAGES with a chanels axis
+    if im_shape[-1] == 1:
+        images = images.reshape(n, im_shape[0], im_shape[1])
+
+    # PLOT
+    n_side = int(n ** (1 / 2.))  # Number of images per row/col
+    fig, axes = plt.subplots(gridsize[1], gridsize[0])  # figsize=(10,10)
+    axes = np.array(axes).flatten() # Unroll axes to a flat list
+    # axes = [item for row in axes for item in row]
+    fig.suptitle(title, fontsize=15,
+                 fontdict={"fontweight": "extra bold"})
+    
+    # PLOT EACH IMAGE
+    for i, ax in enumerate(axes):
+        if (i+1) > actual_n_images:
+            # FILL WITH BLANK IMAGES IF NOT ENOUGH IMAGES TO FIT GRID
+            ax.get_yaxis().set_visible(False)
+            ax.get_xaxis().set_visible(False)
+            ax.set_aspect('equal')
+            
+        else:
+            ax = array_to_plot(images[i], reshape=reshape, cmap=cmap,
+                                vrange=vrange, ax=ax)
+            ax.set_aspect('equal')
+            
+            # CELL LABEL
+            if labels is not None and labels2 is not None:
+                pos2 = -(label_font_size/2)
+                pos1 = pos2-(1.2*label_font_size)
+                ax.text(0, pos1, labels[i], fontsize=label_font_size, color=label_color, ha='left')
+                ax.text(0, pos2, labels2[i], fontsize=label_font_size, color=label2_color,ha='left')
+                fig.subplots_adjust(wspace=0.01, hspace=0.4)
+            elif labels is not None:
+                ax.set_title(labels[i], color=label_color, fontsize=label_font_size)
+                fig.subplots_adjust(wspace=0.01, hspace=0.3)
+            elif labels2 is not None:
+                ax.set_title(labels2[i], color=label2_color, fontsize=label_font_size)
+                fig.subplots_adjust(wspace=0.01, hspace=0.3)
+
+
+    if saveto:
+        fig.savefig(saveto)
+    if show:
+        # plt.show()
+        fig.show()
+    else:
+        plt.close(fig)
+
+
+# ==============================================================================
+#                                                          GRID_OF_SAMPLE_BBOXES
+# ==============================================================================
+def grid_of_sample_bboxes(a, bboxes, bboxes2=None, gridsize = (5,5),
+                          fill=None, outline=GREEN,
+                          fill2=None, outline2=RED,
+                          proportional=True, **kwargs):
+    """ Takes an array of images, and an array of bounding boxes and draws a
+        grid of those images with the bounding boxes drawn on.
+
+        Optionally takes a second array of bounding boxes, and draws them in a
+        different color, eg to compare predicted vs true bounding boxes.
+
+    Args:
+        a:              (numpy array) Images array, in one of the following two
+                        shapes:
+                        - [n_samples, width, height]
+                        - [n_samples, width, height, n_chanels]
+        bboxes:         (numpy array) bounding boxes for each image.
+                        - [n_samples, 4*num_bounding_boxes]
+                        Where each group of 4 columns represets the bbox as:
+                        - [x1, y1, x2, y2]
+        bboxes2:        (numpy array) Optional second set of bounding boxes.
+        fill:           (str) fill color for first bbox
+        outline:        (str) outline color for first bbox
+        fill2:          (str) fill color for second bbox
+        outline2:       (str) outline color for second bbox
+        proportional:   (bool) Whether the bounding box data is proportional
+                        to the image dimensions (ie, in the range 0-1)
+        **kwargs:       Additional key word arguments to pass on to
+                        `grid_of_sample_images()`
+    """
+    # LIMIT THE SIZE OF DATA
+    n_samples = gridsize[0]*gridsize[1]
+    X = copy.deepcopy(a[:n_samples])
+    bboxes = bboxes[:n_samples]
+    if bboxes2 is not None:
+        bboxes2 = bboxes2[:n_samples]
+    
+    # DRAW BOUNDING BOXES ON IMAGES
+    X = overlay_bounding_boxes_array(X, bboxes=bboxes,
+                                     proportional=proportional,
+                                     outline=outline, fill=fill)
+    if bboxes2 is not None:
+        X = overlay_bounding_boxes_array(X, bboxes=bboxes2,
+                                         proportional=proportional,
+                                         outline=outline2, fill=fill2)
+    # PLOT THE SAMPLE IMAGES
+    grid_of_sample_images(X, gridsize=gridsize, **kwargs)
+
+
+# ==============================================================================
+#                                                           EPOCH_VISUALISATIONS
+# ==============================================================================
+def epoch_visualisations(path, epoch, data, bboxes_pred, digits_pred):
+    """ Set of visualisations to be drawn and saved at the end of each epoch.
+    
+    Args:
+        path:   (str) path to the directory that will hold all the epoch
+                visualisations
+        epoch:  (int) epoch number
+        data:   (DataObj) A DataObj object, containing the attributes:
+                - Y
+                - bboxes
+        Y:      (dict) labels dictionary for the sample data
+        bboxes_pred: (numpy array) predicted bboxes for the sample images
+        digits_pred: (numpy array) predicted digits for the sample images
+    """
+    # ESTABLISH PATHS
+    whole_bbox_dir = os.path.join(path, "whole_bbox")
+    digit_bbox_dir = os.path.join(path, "digit_bbox")
+    digits_pred_dir = os.path.join(path, "digit_preds")
+    dificult_digits_dir = os.path.join(path, "difficult_digits")
+    maybe_mkdir(whole_bbox_dir)
+    maybe_mkdir(digit_bbox_dir)
+    maybe_mkdir(digits_pred_dir)
+    maybe_mkdir(dificult_digits_dir)
+    file_name = "{}.png".format(str(epoch).zfill(4))
+        
+    # WHOLE BOUNDING BOXES
+    grid_of_sample_bboxes(data.X[:25],
+                          bboxes=data.whole_bboxes[:25],
+                          bboxes2=bboxes_pred[:25, :4],
+                          labels=batch_iou(bboxes_pred[:25, :4], data.whole_bboxes[:25]),
+                          gridsize=[5, 5],
+                          label_font_size=8,
+                          saveto=os.path.join(whole_bbox_dir, file_name),
+                          show=False,
+                          outline=GREEN + "BB",
+                          fill=GREEN + "22",
+                          outline2=RED,
+                          fill2=None
+                          )
+    
+    # DIGIT BOUNDING BOXES
+    grid_of_sample_bboxes(data.X[:25],
+                          bboxes=data.digit_bboxes[:25],
+                          bboxes2=bboxes_pred[:25, 4:],
+                          labels=batch_multi_column_iou(bboxes_pred[:25, 4:],
+                                                      data.digit_bboxes[:25]).mean(axis=1),
+                          gridsize=[5, 5],
+                          label_font_size=8,
+                          saveto=os.path.join(digit_bbox_dir, file_name),
+                          show=False,
+                          outline=GREEN + "BB",
+                          fill=GREEN + "22",
+                          outline2=RED,
+                          fill2=None
+                          )
+    
+    # DIGIT PREDICTIONS
+    labels = array_of_digit_arrays_to_ints(data.Y[:25], null=10)
+    labels_pred = array_of_digit_arrays_to_ints(digits_pred[:25], null=10)
+    
+    grid_of_sample_images(data.X[:25],
+                          labels=labels,
+                          labels2=labels_pred,
+                          gridsize=[5, 5],
+                          label_font_size=8,
+                          saveto=os.path.join(digits_pred_dir, file_name),
+                          show=False
+                          )
+
+    # VISUALISE DIFICULT CASES
+    dificult_indices = (digits_pred != data.Y).any(axis=1)
+    dificult_data = data.extract_items(indices=dificult_indices, deepcopy=True)
+    dificult_preds = digits_pred[dificult_indices]
+    labels = array_of_digit_arrays_to_ints(dificult_data.Y[:36], null=10)
+    labels_pred = array_of_digit_arrays_to_ints(dificult_preds[:36], null=10)
+
+    grid_of_sample_images(dificult_data.X[:36],
+                          labels=labels,
+                          labels2=labels_pred,
+                          gridsize=[6, 6],
+                          label_font_size=7,
+                          saveto=os.path.join(dificult_digits_dir, file_name),
+                          show=False
+                          )
+
+
+