# Updatable Neural Network Classifier
This notebook demonstrates the process of creating a simple convolutional neural network classifier model that can be used for training on the MNIST-like dataset. The model is created using Keras, and then converted to a Core ML format using keras_converter. Once in Core ML format, we mark last two fully connected layers as updatable. Lastly, we attach a categorical cross entropy loss layer to the last layer and use SGD as the optimizer. 

In [14]:
def create_keras_base_model(url):
    """This method creates a convolutional neural network model using Keras.
    url - The URL that the keras model will be saved as h5 file.
    """
    
    import keras
    from keras.models import Sequential
    from keras.layers import Dense, Dropout, Flatten, MaxPooling2D
    
    keras.backend.clear_session()
    model = Sequential()
    # model.add(Dropout(0.25))
    model.add(Dense(9,
                    input_shape=(9, ),
                    activation='relu'))
    model.add(Dense(9, activation='relu'))
    model.add(Dense(9, activation='relu'))
    # model.add(Dense(9, activation='relu'))
    # model.add(Dense(9, activation='relu'))
    # model.add(Dense(9, activation='relu'))
    # model.add(Dense(9, activation='relu'))
    # model.add(Dense(9, activation='relu'))
    # model.add(Dropout(0.5))
    model.add(Dense(9, activation='softmax'))

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.SGD(lr=0.05),
                  metrics=['accuracy'])

    model.save(url)

keras_model_path = './KerasNNClassifier.h5'
create_keras_base_model(keras_model_path)

In [15]:
def convert_keras_to_mlmodel(keras_url, mlmodel_url):
    """This method simply converts the keras model to a mlmodel using coremltools.
    keras_url - The URL the keras model will be loaded.
    mlmodel_url - the URL the Core ML model will be saved.
    """
    from keras.models import load_model
    keras_model = load_model(keras_url)
    
    from coremltools.converters import keras as keras_converter
    class_labels = ['topLeft', 'top', 'topRight', 'midLeft', 'mid', 'midRight', 'bottomLeft', 'bottom', 'bottomRight']
    mlmodel = keras_converter.convert(keras_model, input_names=['gameboard'],
                                output_names=['pickProbabilities'],
                                class_labels=class_labels,
                                predicted_feature_name='pick')
    
    mlmodel.save(mlmodel_url)
     
coreml_model_path = './NNClassifier.mlmodel'
convert_keras_to_mlmodel(keras_model_path , coreml_model_path)

0 : dense_1_input, <keras.engine.input_layer.InputLayer object at 0xa2ac24450>
1 : dense_1, <keras.layers.core.Dense object at 0xa2ac242d0>
2 : dense_1__activation__, <keras.layers.core.Activation object at 0xa2acd3f10>
3 : dense_2, <keras.layers.core.Dense object at 0xa2ac24dd0>
4 : dense_2__activation__, <keras.layers.core.Activation object at 0xa2acb60d0>
5 : dense_3, <keras.layers.core.Dense object at 0xa2ac2c1d0>
6 : dense_3__activation__, <keras.layers.core.Activation object at 0xa2ac4fd50>
7 : dense_4, <keras.layers.core.Dense object at 0xa2ac2ca10>
8 : dense_4__activation__, <keras.layers.core.Activation object at 0xa2ab251d0>


In [16]:
# let's inspect the last few layers of this model
import coremltools
spec = coremltools.utils.load_spec(coreml_model_path)
builder = coremltools.models.neural_network.NeuralNetworkBuilder(spec=spec)
builder.inspect_layers(last=3)

[Id: 7], Name: dense_4__activation__ (Type: softmax)
          Updatable: False
          Input blobs: ['dense_4_output']
          Output blobs: ['pickProbabilities']
[Id: 6], Name: dense_4 (Type: innerProduct)
          Updatable: False
          Input blobs: ['dense_3__activation___output']
          Output blobs: ['dense_4_output']
[Id: 5], Name: dense_3__activation__ (Type: activation)
          Updatable: False
          Input blobs: ['dense_3_output']
          Output blobs: ['dense_3__activation___output']


In [17]:
# let's inspect the input of the model as we need this information later on the make_updatable method
builder.inspect_input_features()

neuralnetwork_spec = builder.spec

# let's inspect the input again to confirm the change in input type
builder.inspect_input_features()

# Set input and output description
neuralnetwork_spec.description.input[0].shortDescription = "TicTacToe gameboard, where a value of 1 is .a (AIPlayer's pick, 0 is nil/empty and -1 is .b (opponent's pick))"
neuralnetwork_spec.description.output[0].shortDescription = 'Probabilities / score for each pick'
neuralnetwork_spec.description.output[1].shortDescription = 'Predicted pick'

# Provide metadata
neuralnetwork_spec.description.metadata.author = 'Max Obermeier'
neuralnetwork_spec.description.metadata.license = 'MIT'
neuralnetwork_spec.description.metadata.shortDescription = (
        'An updatable TicTacToe-pick classifier (suggests next pick).')


[Id: 0] Name: gameboard
          Type: multiArrayType {
  shape: 9
  dataType: DOUBLE
}

[Id: 0] Name: gameboard
          Type: multiArrayType {
  shape: 9
  dataType: DOUBLE
}



In [19]:
def make_updatable(builder, mlmodel_url, mlmodel_updatable_path):
    """This method makes an existing non-updatable mlmodel updatable.
    mlmodel_url - the path the Core ML model is stored.
    mlmodel_updatable_path - the path the updatable Core ML model will be saved.
    """
    import coremltools
    model_spec = builder.spec

    # make_updatable method is used to make a layer updatable. It requires a list of layer names.
    # dense_1 and dense_2 are two innerProduct layer in this example and we make them updatable.
    builder.make_updatable(['dense_1', 'dense_2', 'dense_3', 'dense_4'])

    # Categorical Cross Entropy or Mean Squared Error can be chosen for the loss layer.
    # Categorical Cross Entropy is used on this example. CCE requires two inputs: 'name' and 'input'.
    # name must be a string and will be the name associated with the loss layer
    # input must be the output of a softmax layer in the case of CCE. 
    # The loss's target will be provided automatically as a part of the model's training inputs.
    builder.set_categorical_cross_entropy_loss(name='lossLayer', input='pickProbabilities')

    # in addition of the loss layer, an optimizer must also be defined. SGD and Adam optimizers are supported.
    # SGD has been used for this example. To use SGD, one must set lr(learningRate) and batch(miniBatchSize) (momentum is an optional parameter).
    from coremltools.models.neural_network import SgdParams
    builder.set_sgd_optimizer(SgdParams(lr=0.05, batch=1))

    # Finally, the number of epochs must be set as follows.
    builder.set_epochs(10)
        
    # Set training inputs descriptions
    model_spec.description.trainingInput[0].shortDescription = 'Example gameboard.'
    model_spec.description.trainingInput[1].shortDescription = 'Associated true label (best pick) of example gameboard'

    # save the updated spec
    from coremltools.models import MLModel
    mlmodel_updatable = MLModel(model_spec)
    mlmodel_updatable.save(mlmodel_updatable_path)

coreml_updatable_model_path = './UpdatableNNClassifier.mlmodel'
make_updatable(builder, coreml_model_path, coreml_updatable_model_path)

Now adding input pickProbabilities_true as target for categorical cross-entropy loss layer.


In [20]:
# let's inspect the loss layer of the Core ML model
import coremltools
spec = coremltools.utils.load_spec(coreml_updatable_model_path)
builder = coremltools.models.neural_network.NeuralNetworkBuilder(spec=spec)

builder.inspect_loss_layers()

[Id: 0], Name: lossLayer (Type: categoricalCrossEntropyLossLayer)
          Loss Input: pickProbabilities
          Loss Target: pickProbabilities_true


In [21]:
# let's inspect the optimizer of the Core ML model
builder.inspect_optimizer()

Optimizer Type: sgdOptimizer
lr: 0.05, min: 0.0, max: 1.0
batch: 1, allowed_set: [1]
momentum: 0.0, min: 0.0, max: 1.0


In [22]:
# let's see which layes are updatable
builder.inspect_updatable_layers()

Name: dense_4 (Type: innerProduct)
          Input blobs: ['dense_3__activation___output']
          Output blobs: ['dense_4_output']
Name: dense_3 (Type: innerProduct)
          Input blobs: ['dense_2__activation___output']
          Output blobs: ['dense_3_output']
Name: dense_2 (Type: innerProduct)
          Input blobs: ['dense_1__activation___output']
          Output blobs: ['dense_2_output']
Name: dense_1 (Type: innerProduct)
          Input blobs: ['gameboard']
          Output blobs: ['dense_1_output']
