## Train a CNN to Recognise Words

The stamp process in the '_Data' notebook has produced some nice-looking spectrograms with a uniform (64,32) shape. 

Let's just recognise the words the stamps represent by learning to differentiate between the 'stamp' images : a task for which the MNIST CNN is almost perfect for...

In [None]:
"""Convolutional Neural Network Estimator, built with tf.keras (originally for MNIST)."""

import os, sys

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import pickle

# nvidia-smi --gpu-reset
import tensorflow as tf
from tensorflow import keras

do_training = True

In [None]:
print(sys.version)
print('Tensorflow:', tf.__version__)
print('Keras:', keras.__version__)

Expecting:
```
3.6.6 (default, Jul 19 2018, 14:25:17) 
[GCC 8.1.1 20180712 (Red Hat 8.1.1-5)]
Tensorflow: 1.12.0
Keras: 2.1.6-tf
```

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
prefix='num'

In [None]:
# Load training and validation data
dataset = pickle.load(open(os.path.join('data', prefix+'.pkl'), 'rb'))

train_indices = [ i for i,r in enumerate(dataset['rand']) if r<=0.9 ]
check_indices = [ i for i,r in enumerate(dataset['rand']) if r>0.9 ]

print("Training and Validation(='check_') data loaded, %d items total " % (len(dataset['stamp']),))

In [None]:
num_classes = 10

input_shape = (64, 32, 1)  # tf backend is channels_last

batch_size = 20
num_epochs = 20

In [None]:
model = keras.models.Sequential()
model.add(keras.layers.Conv2D(16, kernel_size=(5, 5), activation='relu', input_shape=input_shape))
model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(keras.layers.Conv2D(16, (5, 5), activation='relu'))
model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(32, activation='relu'))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(num_classes, activation='softmax'))

In [None]:
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(lr=0.001),
              metrics=['accuracy'])

In [None]:
# Create Dataset iterator

def make_dataset(data, indices, seed=None, num_epochs=1, batch_size=batch_size):  
    # If seed is defined, this will shuffle data into batches

    # Get the data into tensorflow
    stamps = np.array( data['stamp'] )[indices]
    print("stamps.shape:", stamps.shape)
    # Ensure that the stamps are 'float32' in [0,1] and have the channel=1
    stamps_with_channel = np.expand_dims( stamps / 255.0, -1)

    labels = np.array( data['label'] )[indices]
    print("labels.shape:", labels.shape)
    labels_one_hot = keras.utils.to_categorical(labels, num_classes)

    all_images = tf.constant( stamps_with_channel, shape=stamps_with_channel.shape, dtype=tf.float32 )
    all_labels = tf.constant( labels_one_hot, shape=labels_one_hot.shape, verify_shape=True )
    
    ds = tf.data.Dataset.from_tensor_slices( (all_images, all_labels) )
    if seed is not None:
        ds = ds.shuffle(batch_size*4)
    
    ds = ds.repeat(num_epochs).batch(batch_size)
    
    return ds

In [None]:
ds_train = make_dataset(dataset, train_indices, num_epochs=num_epochs, seed=100)  # shuffles...

In [None]:
ds_check = make_dataset(dataset, check_indices, num_epochs=num_epochs, batch_size=1)

In [None]:
spe = len(train_indices) // batch_size
spe, len(train_indices)

In [None]:
model.fit(ds_train, steps_per_epoch=spe, epochs=num_epochs, 
          validation_data=ds_check, validation_steps=len(check_indices), 
          verbose=1)

In [None]:
score = model.evaluate(ds_check, steps=len(check_indices), verbose=1)
score

... comment on results ...

### Now let's look at some 'live examples'

In [None]:
dataset_test = pickle.load(open(os.path.join('data', prefix+'-test.pkl'), 'rb'))

print("Ad-hoc test data loaded")

In [None]:
def get_predictions_for_dataset( data ):
    n_points = len(data['stamp'])
    ds = make_dataset(data, range( n_points ), num_epochs=1, batch_size=1)

    pred_arr = model.predict(ds, steps=n_points, verbose=0)
    #print(pred_arr)  # This is an array of predictions, each with n_classes of probs

    predictions = [ dict(classes=i, probabilities=p, logits=np.log(p+1e-20)) 
                    for i, p in enumerate(pred_arr) ]
    
    for i, p in enumerate(predictions):
        label = int(data['label'][i])
        if label>=0:
            p['word'] = data['words'][label]
        else:
            p['word'] = data['words'][i]
        p['label'] = label
    
    return predictions

predictions = get_predictions_for_dataset(dataset_test)

print()
for i, prediction in enumerate(predictions):
    probs = ','.join([ "%6.2f%%" % (p*100,) for p in prediction['probabilities']] )
    print( "%s == %d  p=[%s]" % (dataset_test['words'][i], prediction['classes'],  probs,))

In [None]:
def show_heat_map(heat_map, yticks=None):
    fig, ax = plt.subplots()
    ax.xaxis.tick_top()
    plt.imshow(heat_map, interpolation='nearest', cmap=plt.cm.Blues, aspect='auto')
    plt.xticks( range(10) )
    if yticks:
        plt.yticks( range(len(heat_map)), yticks )
    else:
        plt.yticks( range(len(heat_map)) )
    plt.show()

In [None]:
# And a heat map...
heat_map = [ prediction['probabilities'] for prediction in predictions]

show_heat_map(heat_map) 

### Extra... 

What happens if we try to look at the 'animals' test with the 'num' network?

In [None]:
dataset_animals = pickle.load(open(os.path.join('data', 'animals.pkl'), 'rb'))

predictions_animals = get_predictions_for_dataset(dataset_animals)

heat_map = [ p['probabilities'] for p in predictions_animals]

show_heat_map(heat_map, [ p['word'] for p in predictions_animals])

In [None]:
heat_map = [ p['logits'] for p in predictions_animals]
show_heat_map(heat_map, [ p['word'] for p in predictions_animals])

In [None]:
from sklearn import svm

animal_features, animal_targets=[],[]
for p in predictions_animals:
    #animal_features.append( p['probabilities'] )
    animal_features.append( p['logits'] )
    animal_targets.append( p['label'] )

animals_from_numbers_svm_classifier = svm.LinearSVC()
animals_from_numbers_svm_classifier.fit(animal_features, animal_targets) # learn from the data (QUICK!) 

In [None]:
dataset_animals_test = pickle.load(open(os.path.join('data', 'animals-test.pkl'), 'rb'))

predictions_animals_test = get_predictions_for_dataset(dataset_animals_test)

print('\n\nanimals class predictions from SVM classifier based on digits-CNN output')
for i,p in enumerate(predictions_animals_test):
    #svm_prediction = animals_from_numbers_svm_classifier.predict( p['probabilities'].reshape(1,-1) )
    svm_prediction = animals_from_numbers_svm_classifier.predict( p['logits'].reshape(1,-1) )
    #decision     = animals_from_numbers_svm_classifier.decision_function([ np_logits[0] ])
    
    print("Sound[%d] is '%s' - predicted class[%d] = '%s'" % (
            i, dataset_animals['words'][i], 
            svm_prediction, dataset_animals['words'][svm_prediction[0]],))


... Any good? ...