In [1]:
import numpy as np
from gtsrb import batch_generator

data = np.load('gtsrb_dataset.npz')

### File format

**npz** is a simple archive zip archive made from numpy, similiar to a typical zip file.
From this i can guess there are two files from the dataset. one file contains contains the "test images" and the other has "train images".

### What format does tensorflow want images
[source](https://www.tensorflow.org/api_docs/python/tf/summary/image)

The images are built from tensor which must be 4-D with shape [batch_size, height, width, channels] and where channels can be:

1: tensor is interpreted as Grayscale.
3: tensor is interpreted as RGB.
4: tensor is interpreted as RGBA.

### batch_generator function simplified

* batch size - images per batch
* chunk - a set of indices
* batch generator function divides the groups the images into batches of (100)
* x are the images
* y is the label corresponding to the images.

**The dataset**
* Entire y training data returns an array of size (39209,43). I think 39209 is the no. of images and 43 is probably the feature vector (label for the image)
* x training (with chunk) data returns an array of size (100, 32, 32, 3)
    * 100 - chunk size (from batch_size).
    * 32 32 is 32x32 vertically and horizontally (not sure which is which)
    * 3 is the number of colour channels
    * images are in the correct format 32x32x3 format
* y training data (with chunk) returns an array (100, 43)
    * 100 - chunk size (from batch_size)
    * 43 is a binary feature vector 
---

```python
group = 'train'
'y_{0:s}'.format(group) == 'y_train' # I assume this is the y training labels
```

```python
# produce indices for the number of images
indices = range(dataset_size)
# shuffle these indices, essentially to pick out images in a random order
np.random.shuffle(indices)
```

[slice](https://docs.python.org/2/library/functions.html#slice) returns the set of indices that are read only.
The set is defined by the input (start, stop, step). The idea similar to linspace in matlab. It is used to select a specified range of the shuffled indices.


```python
dataset['X_{0:s}'.format(group)][chunk], dataset['y_{0:s}'.format(group)][chunk]
```
Accesses the training/testing images with the training/testing labels respectively


```python
>>> dataset['y_{0:s}'.format(group)][chunk][10, :]
[0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]```
this is a label for each image, it is a binary vector of length 43

In [2]:
import os
import tensorflow as tf

sess = tf.InteractiveSession()

# settings
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer('log-frequency', 10,
                            'Number of steps between logging results to the console and saving summaries.' +
                            ' (default: %(default)d)')
tf.app.flags.DEFINE_integer('flush-frequency', 50,
                            'Number of steps between flushing summary results. (default: %(default)d)')
tf.app.flags.DEFINE_integer('save-model-frequency', 100,
                            'Number of steps between model saves. (default: %(default)d)')
tf.app.flags.DEFINE_string('log-dir', '{cwd}/logs/'.format(cwd=os.getcwd()),
                           'Directory where to write event logs and checkpoint. (default: %(default)s)')

# Optimisation hyperparameters
# for coding purposes set max_steps to a small value
max_steps_real = 10000
max_steps = 1000
tf.app.flags.DEFINE_integer('max-steps', max_steps,
                            'Number of mini-batches to train on. (default: %(default)d)')
tf.app.flags.DEFINE_integer('batch-size', 100, 'Number of examples per mini-batch. (default: %(default)d)')
tf.app.flags.DEFINE_float('learning-rate', 1e-3, 'Number of examples to run. (default: %(default)d)')
tf.app.flags.DEFINE_integer('img-width', 32, 'Image width (default: %(default)d)')
tf.app.flags.DEFINE_integer('img-height', 32, 'Image height (default: %(default)d)')
tf.app.flags.DEFINE_integer('img-channels', 3, 'Image channels (default: %(default)d)')
tf.app.flags.DEFINE_integer('num-classes', 43, 'Number of classes (default: %(default)d)')

In [17]:
# each iteration returns a batch of (batch_size) images

# code in main start here

In [30]:
def deep_nn(x_image, class_count):
    """ model for our CNN """
    # https://www.tensorflow.org/tutorials/layers 


    # first convolutional layer - maps on RBG image to 32 feature maps
    conv1 = tf.layers.conv2d(
        inputs = x_image,
        filters = 32,
        kernel_size = [5,5],
        padding = 'same',
        use_bias = False,
        name='conv1'
        )

    # normalise batch
    conv1_bn = tf.layers.batch_normalization(conv1, name='conv1_bn')
    # apply activation 
    conv1_bn = tf.nn.relu(conv1_bn)

    # pool layer 1
    pool1 = tf.layers.max_pooling2d(
        inputs = conv1_bn,
        pool_size = [2,2],
        strides = 2,
        name = 'pool1'
        )

    # convolutational layer 2
    conv2 = tf.layers.conv2d(
            inputs = pool1,
            filters = 64,
            kernel_size = [5,5],
            padding = 'same',
            activation = tf.nn.relu,
            use_bias = False,
            name = 'conv2'
            )

    # normalise batch
    conv2_bn = tf.layers.batch_normalization(conv2, name='conv2_bn')
    # apply activation 
    conv2_bn = tf.nn.relu(conv2_bn)

    # pool layer 2
    pool2 = tf.layers.max_pooling2d(
            inputs = conv2_bn,
            pool_size = [2,2],
            strides = 2,
            name = 'pool2'
            )

#     tf.Print(tf.shape(pool2), [x])
#     tf.Print(pool2, [x])
#     print(sess.run(tf.shape(pool2)))
    
    # dense layer, i'm not how to determine the size.
    pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64], name='pool2_flattened')
    
    # fully connected layer 1
    # unit? look up
    fc1 = tf.layers.dense(
            inputs = pool2_flat,
            activation = tf.nn.relu,
            units = 1024,
            name = 'fc1'
            )
    
    # fully connected layer 2 and assigned as logits

    logits = tf.layers.dense(
            inputs = fc1,
            units = class_count,
            name = 'fc2'
            )

    return logits

In [39]:
args.production = False

# Generate batches
for x_train, y_train in batch_generator(data, 'train'):
    if not args.production:
        x_image = x_train
        y_train = y_train
        # use one batch for building quicker
        # run main
        break
    else:
        # run main
        pass

In [41]:
def main(_):
    # clear graph
    tf.reset_default_graph()
    
    with tf.name_scope('input'):
        """build the computation graph by creating nodes for the input images and target output classes."""
        x = tf.placeholder(tf.float32, shape=[None, FLAGS.img_width * FLAGS.img_height * FLAGS.img_channels])
        # for batch_size is dynamically computed based input values
        x_image = tf.reshape(x, [-1, 32, 32, 3])
        # what is class count, i think its the len of the vector
        y_ = tf.placeholder(tf.float32, shape=[None, FLAGS.num_classes])

        """ Here x and y_ aren't specific values. Rather, they are each a placeholder
         -- a value that we'll input when we ask TensorFlow to run a computation."""

    with tf.variable_scope('model'):
        # build graph
        logits = deep_nn(x_image, FLAGS.num_classes)


    
   

main(_)