## Prune a pre-trained model

Following this [tutorial](https://www.tensorflow.org/model_optimization/guide/pruning/pruning_with_keras)

In [1]:
import numpy as np
import tensorflow_model_optimization as tfmot

import tempfile

from keras.models import load_model
from keras.optimizers import RMSprop
from keras.callbacks import ModelCheckpoint, TerminateOnNaN

import keras.backend as K
import tensorflow as tf

from decimal import Decimal

### Get Data and Load Model

In [2]:
X_train = np.load('../train_and_val/X_train_ext.npy')
X_val = np.load('../train_and_val/X_val_ext.npy')
y_train = np.load('../train_and_val/y_train_ext.npy')
y_val = np.load('../train_and_val/y_val_ext.npy')

In [3]:
print('Maximum Scaled Duration for X_train: {}'.format(X_train[:, :, -1].max()))
print('Maximum Scaled Duration for X_val: {}'.format(X_val[:, :, -1].max()))
print('Maximum Scaled Duration for y_train: {}'.format(y_train[:, -1].max()))
print('Maximum Scaled Duration for y_val: {}'.format(y_val[:, -1].max()))

Maximum Scaled Duration for X_train: 0.860215053763441
Maximum Scaled Duration for X_val: 0.5516975308641976
Maximum Scaled Duration for y_train: 1.0
Maximum Scaled Duration for y_val: 0.9166666666666667


In [4]:
print('Train-Validation Ratio of the Mean of the Scaled Duration: ', y_train[:, -1].mean() / y_val[:, -1].mean())
print('Train-Validation Ratio of the Stdv of the Scaled Duration: ', y_train[:, -1].std() / y_val[:, -1].std())

Train-Validation Ratio of the Mean of the Scaled Duration:  0.9898257021922111
Train-Validation Ratio of the Stdv of the Scaled Duration:  0.9209054376139967


In [2]:
def maestro_loss_wr(harshness, n_dur_nodes): 
    """A loss function which, in addition to penalizing for misclassification on the 
    first n_keys_piano elements, includes a term proportional to the relative
    error in the prediction of the last n_dur_nodes elements (whose mean represents
    the duration). The proportionality constant is the 'harshness' of the maestro in 
    regards to timing."""
    def maestro_loss(ytrue, ypred):
        # Standard binary cross-entropy
        bce_loss = - K.mean(ytrue[:, :-n_dur_nodes] * K.log(ypred[:, :-n_dur_nodes]) + \
                            (1 - ytrue[:, :-n_dur_nodes]) * K.log(1 - ypred[:, :-n_dur_nodes]))

        # Duration error term
        dur_loss = 2 * harshness * K.mean(K.abs(K.mean(ytrue[:, -n_dur_nodes:], axis = 1) - \
                                                K.mean(ypred[:, -n_dur_nodes:], axis = 1)) / \
                                      (K.mean(ytrue[:, -n_dur_nodes:], axis = 1) + \
                                       K.mean(ypred[:, -n_dur_nodes:], axis = 1) + K.epsilon()))
        
        if (dur_loss > bce_loss):   # Often times, ytrue[:, :-n_dur_nodes] elements will be zero
            return bce_loss * 2     # (for a rest). This may spike dur_loss. To control, I limit it
                                    # so that it never exceeds the bce_loss.
        return bce_loss + dur_loss
    
    return maestro_loss
def precision_mod_wr(n_dur_nodes):
    def precision_mod(ytrue, ypred):
        """Just a modified precision excluding the last n_dur_nodes elements (which are not
        classification nodes)"""

        true_positives = K.sum(K.round(ytrue[:, :-n_dur_nodes] * ypred[:, :-n_dur_nodes]))
        pred_positives = K.sum(K.round(ypred[:, :-n_dur_nodes]))
        return true_positives / (pred_positives + K.epsilon())
    return precision_mod

def recall_mod_wr(n_dur_nodes):
    def recall_mod(ytrue, ypred):
        """Just a modified recall excluding the last n_dur_nodes elements (which are not
        classification nodes)"""

        true_positives = K.sum(K.round(ytrue[:, :-n_dur_nodes] * ypred[:, :-n_dur_nodes]))
        poss_positives = K.sum(ytrue[:, :-n_dur_nodes])
        return true_positives / (poss_positives + K.epsilon())
    return recall_mod

def f1_score_mod_wr(n_dur_nodes):
    def f1_score_mod(ytrue, ypred):
        """Just a modified f1_score excluding the last n_dur_nodes elements (which are not
        classification nodes)"""

        precision = precision_mod_wr(n_dur_nodes)(ytrue, ypred)
        recall = recall_mod_wr(n_dur_nodes)(ytrue, ypred)   
        return 2 * (precision * recall) / (precision + recall + K.epsilon())
    return f1_score_mod

def dur_error_wr(n_dur_nodes):
    def dur_error(ytrue, ypred):
        """A new metric that only gives information on the error in duration predictions"""
    
        return 2 * K.mean(K.abs((K.mean(ytrue[:, -n_dur_nodes:], axis = 1) - \
                   K.mean(ypred[:, -n_dur_nodes:], axis = 1)) / (K.mean(ytrue[:, -n_dur_nodes:], \
                    axis = 1) + K.mean(ypred[:, -n_dur_nodes:], axis = 1) + K.epsilon())))
    return dur_error

def maestro_dur_loss_wr(harshness, n_dur_nodes):
    """The second term of the maestro loss, based purely on error in duration predictions.
    To be used as a metric in order to decompose the loss components during analysis"""
    def maestro_dur_loss(ytrue, ypred):

        return 2 * harshness * K.mean(K.abs((K.mean(ytrue[:, -n_dur_nodes:], axis = 1) - \
                                      K.mean(ypred[:, -n_dur_nodes:], axis = 1)) / \
                                      (K.mean(ytrue[:, -n_dur_nodes:], axis = 1) + \
                                      K.mean(ypred[:, -n_dur_nodes:], axis = 1) + K.epsilon())))
    return maestro_dur_loss

In [3]:
harshness = 0.05

In [4]:
def load_model_from_file(file_path, harshness = harshness, n_dur_nodes = 20):
    
    custom_objects = {'maestro_loss': maestro_loss_wr(harshness, \
        n_dur_nodes), 'f1_score_mod': f1_score_mod_wr(n_dur_nodes), \
        'recall_mod': recall_mod_wr(n_dur_nodes), 'precision_mod': \
        precision_mod_wr(n_dur_nodes), 'dur_error': \
        dur_error_wr(n_dur_nodes), 'maestro_dur_loss': \
        maestro_dur_loss_wr(harshness, n_dur_nodes)}

    return load_model(file_path, custom_objects = custom_objects)

In [9]:
opt = RMSprop()

In [10]:
def prune_model_with_checkpoint(model, filename = 'best_pruned_maestro_model_ext20_2_1_1024_0pt4_mnv_2.h5', \
                                harshness = 0.05, n_dur_nodes = 20, batch_size = 512, epochs = 2, \
                                initial_sparsity = 0.5, final_sparsity = 0.8):
    
    prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude
    end_step = np.ceil(X_train.shape[0] / batch_size) * epochs
    
    # Define model for pruning.
    pruning_params = {
          'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(
              initial_sparsity = initial_sparsity, final_sparsity = final_sparsity,
              begin_step=0, end_step=end_step)
    }
    model_for_pruning = prune_low_magnitude(model, **pruning_params)
    
    # `prune_low_magnitude` requires a recompile.
    model_for_pruning.compile(loss = maestro_loss_wr(harshness, n_dur_nodes), 
                          optimizer = opt, 
                          metrics = [f1_score_mod_wr(n_dur_nodes), recall_mod_wr(n_dur_nodes), \
                                     precision_mod_wr(n_dur_nodes), dur_error_wr(n_dur_nodes), \
                                     maestro_dur_loss_wr(harshness, n_dur_nodes)])

    model_for_pruning.summary()
    
    logdir = tempfile.mkdtemp()

    mc = ModelCheckpoint('../models/' + filename, monitor = 'val_loss', mode = 'min', \
                                                            save_best_only = True, verbose = 1)
    callbacks = [
      tfmot.sparsity.keras.UpdatePruningStep(),
      tfmot.sparsity.keras.PruningSummaries(log_dir = logdir),
      mc, 
      TerminateOnNaN()
    ]

    model_for_pruning.fit(X_train, y_train, batch_size = batch_size, epochs = epochs, 
                      validation_data = (X_val, y_val), verbose = 2, callbacks = callbacks)
    
    return model

In [17]:
model = load_model_from_file('../models/best_maestro_model_ext20_2_1_1024_0pt4_mnv_2.h5')
pruned_model = prune_model_with_checkpoint(model)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
prune_low_magnitude_lstm_4 ( (None, 16, 1024)          9277443   
_________________________________________________________________
prune_low_magnitude_dropout_ (None, 16, 1024)          1         
_________________________________________________________________
prune_low_magnitude_lstm_5 ( (None, 1024)              16781315  
_________________________________________________________________
prune_low_magnitude_dropout_ (None, 1024)              1         
_________________________________________________________________
prune_low_magnitude_dense_4  (None, 512)               1049090   
_________________________________________________________________
prune_low_magnitude_activati (None, 512)               1         
_________________________________________________________________
prune_low_magnitude_dropout_ (None, 512)              

RuntimeError: Unable to create link (name already exists)

In [11]:
def prune_model(model, filename = 'best_pruned_maestro_model_ext20_2_1_1024_0pt4_mnv_2', harshness = 0.05, \
                n_dur_nodes = 20, batch_size = 512, epochs = 50, initial_sparsity = 0.5, final_sparsity = 0.8):
    
    prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude
    end_step = np.ceil(X_train.shape[0] / batch_size) * epochs
    
    # Define model for pruning.
    pruning_params = {
          'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(
              initial_sparsity = initial_sparsity, final_sparsity = final_sparsity,
              begin_step=0, end_step=end_step)
    }
    model_for_pruning = prune_low_magnitude(model, **pruning_params)
    
    # `prune_low_magnitude` requires a recompile.
    model_for_pruning.compile(loss = maestro_loss_wr(harshness, n_dur_nodes), 
                          optimizer = opt, 
                          metrics = [f1_score_mod_wr(n_dur_nodes), recall_mod_wr(n_dur_nodes), \
                                     precision_mod_wr(n_dur_nodes), dur_error_wr(n_dur_nodes), \
                                     maestro_dur_loss_wr(harshness, n_dur_nodes)])

    #model_for_pruning.summary()

    logdir = tempfile.mkdtemp()

    callbacks = [
      tfmot.sparsity.keras.UpdatePruningStep(),
      tfmot.sparsity.keras.PruningSummaries(log_dir = logdir),
      TerminateOnNaN()
    ]
    filepath = '../models/' + filename + '_{0}_{1}'.format(str(initial_sparsity).replace('.', 'pt'), \
                                         '{0:.1f}'.format(final_sparsity).replace('.', 'pt')) + '.h5'
    # ModelCheckpoint is giving a funny error (RuntimeError: Unable to create link (name already exists), 
    # so here is my workaround:
    print('Epoch 1/{}'.format(epochs))
    history = model_for_pruning.fit(X_train, y_train, batch_size = batch_size, epochs = 1, 
                validation_data = (X_val, y_val), verbose = 2, callbacks = callbacks)
    if (np.isnan(history.history['val_loss'][0])): # NaN failure in first epoch
        return model
    else:
        min_val_loss = history.history['val_loss'][0]
        print('val_loss is {a:2.5f}, saving model to {b}'.format(a = min_val_loss, b = filepath))
        model.save(filepath, save_format = 'h5')
        
    for i in range(epochs - 1):
        print('Epoch {}/{}'.format(i + 2, epochs))
        history = model_for_pruning.fit(X_train, y_train, batch_size = batch_size, epochs = 1, 
                      validation_data = (X_val, y_val), verbose = 2, callbacks = callbacks)
        if (np.isnan(history.history['val_loss'][0])): # NaN failure  
            break
        else:
            if (history.history['val_loss'][0] < min_val_loss):
                print('val_loss improved from {a:2.5f} to {b:2.5f}, saving model to {c}'.format(\
                            a = min_val_loss, b = history.history['val_loss'][0], c = filepath))
                model.save(filepath, save_format = 'h5')
                min_val_loss = history.history['val_loss'][0]
    return model

In [12]:
model = load_model_from_file('../models/best_maestro_model_ext20_2_1_1024_0pt4_mnv_2.h5')
pruned_model = prune_model(model, epochs = 5)

Instructions for updating:
Please use `layer.add_weight` method instead.
Epoch 1/5
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
50/50 - 465s - loss: 0.0584 - f1_score_mod: 0.6254 - recall_mod: 0.5019 - precision_mod: 0.8332 - dur_error: 0.1663 - maestro_dur_loss: 0.0083 - val_loss: 0.0769 - val_f1_score_mod: 0.5730 - val_recall_mod: 0.4681 - val_precision_mod: 0.7386 - val_dur_error: 0.1661 - val_maestro_dur_loss: 0.0083
val_loss is 0.07686, saving model to ../models/best_pruned_maestro_model_ext20_2_1_1024_0pt4_mnv_2_0pt5_0pt8.h5
Epoch 2/5
50/50 - 466s - loss: 0.0535 - f1_score_mod: 0.6722 - recall_mod: 0.5621 - precision_mod: 0.8362 - dur_error: 0.1587 - maestro_dur_loss: 0.0079 - val_loss: 0.0775 - val_f1_score_mod: 0.5849 - val_recall_mod: 0.4870 - val_precision_mod: 0.7321 - val_dur_error: 0.1651 - val_maestro_dur_loss: 0.0083
Epoch 3/5
50/50 - 495s - loss: 0.0779 - f1_score_mod: 0.4704 - recall_mod: 0.3395 - precision_mod: 0.7686 - dur_error: 0.2103 - m

The initial model's minimum val_loss was 0.07825, so we have actually done slightly better with 80% of the weights! However, the saved model file is still 109 MB (exactly the same as before!). What happens if I try to save just the weights?

In [13]:
pruned_model.save_weights('../models/best_pruned_maestro_model_weights_ext20_2_1_1024_0pt4_mnv_2_0pt5_0pt8.h5')

Wow, the weights this file is 54.5 MB, exactly as large as the weights file from the input model here. So the 0 weights must be explicitly being saved instead of severing the connections. How to save space and time on inference?

Now following this [tutorial](https://www.tensorflow.org/model_optimization/guide/pruning/pruning_with_keras#create_3x_smaller_models_from_pruning):

In [5]:
model = load_model_from_file('../models/best_maestro_model_ext20_2_1_1024_0pt4_mnv_2.h5')
pruned_model = load_model_from_file('../models/best_pruned_maestro_model_ext20_2_1_1024_0pt4_mnv_2_0pt5_0pt8.h5')

In [6]:
pruned_model.save('../models/best_pruned_maestro_model_ext20_2_1_1024_0pt4_mnv_2_0pt5_0pt8')

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: ../models/best_pruned_maestro_model_ext20_2_1_1024_0pt4_mnv_2_0pt5_0pt8/assets


In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(pruned_model)
tflite_pruned_model = converter.convert()

INFO:tensorflow:Assets written to: /var/folders/ps/lfq5vmk5793cw8f0kjkpnc3m0000gn/T/tmphrtwbgzb/assets


In [6]:
import tempfile
model_for_export = tfmot.sparsity.keras.strip_pruning(pruned_model)

_, pruned_keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model_for_export, pruned_keras_file, include_optimizer = False)
print('Saved pruned Keras model to:', pruned_keras_file)

Saved pruned Keras model to: /var/folders/ps/lfq5vmk5793cw8f0kjkpnc3m0000gn/T/tmpjpvqf2wm.h5


In [10]:
!pip install jupyter_contrib_nbextensions

Collecting jupyter_contrib_nbextensions
  Downloading jupyter_contrib_nbextensions-0.5.1-py2.py3-none-any.whl (20.9 MB)
[K     |████████████████████████████████| 20.9 MB 1.7 MB/s eta 0:00:011
Collecting jupyter-contrib-core>=0.3.3
  Downloading jupyter_contrib_core-0.3.3-py2.py3-none-any.whl (18 kB)
Collecting jupyter-highlight-selected-word>=0.1.1
  Downloading jupyter_highlight_selected_word-0.2.0-py2.py3-none-any.whl (11 kB)
Collecting jupyter-latex-envs>=1.3.8
  Downloading jupyter_latex_envs-1.4.6.tar.gz (861 kB)
[K     |████████████████████████████████| 861 kB 10.5 MB/s eta 0:00:01
Collecting jupyter-nbextensions-configurator>=0.4.0
  Downloading jupyter_nbextensions_configurator-0.4.1.tar.gz (479 kB)
[K     |████████████████████████████████| 479 kB 4.9 MB/s eta 0:00:01
Building wheels for collected packages: jupyter-latex-envs, jupyter-nbextensions-configurator
  Building wheel for jupyter-latex-envs (setup.py) ... [?25ldone
[?25h  Created wheel for jupyter-latex-envs: file

In [7]:
# Last time the next cell wrote way too many characters
from notebook.services.config import ConfigManager
cm = ConfigManager().update('notebook', {'limit_output': 100000})

In [None]:
# Causes error and writes way too much to the screen. Can't just get a piece because the buffer size must be > 10,000 characters
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
pruned_tflite_model = converter.convert()

In [None]:
_, pruned_tflite_file = tempfile.mkstemp('.tflite')

with open(pruned_tflite_file, 'wb') as f:
  f.write(pruned_tflite_model)

print('Saved pruned TFLite model to:', pruned_tflite_file)

In [35]:
import tempfile
import zipfile
from os import path

models = [model, pruned_model]
model_strings = ['model', 'pruned_model']
for i in range(len(models)):

    _, filepath = tempfile.mkstemp(".h5")
    print("Saving {} to: ".format(model_strings[i]), filepath)
    models[i].save_weights(filepath)

    # Zip the .h5 model file
    _, zip3 = tempfile.mkstemp(".zip")
    with zipfile.ZipFile(zip3, "w", compression=zipfile.ZIP_DEFLATED) as f:
        f.write(filepath)

    print('Size of {0} before compression: {1:.2f} MB'.format(cur_model, path.getsize(filepath) / float(2 ** 20)))
    print('Size of {0} after compression: {1:.2f} MB'.format(cur_model, path.getsize(zip3) / float(2 ** 20)))

Saving model to:  /var/folders/ps/lfq5vmk5793cw8f0kjkpnc3m0000gn/T/tmp2yprf4pu.h5
Size of model before compression: 51.96 MB
Size of model after compression: 48.16 MB
Saving pruned_model to:  /var/folders/ps/lfq5vmk5793cw8f0kjkpnc3m0000gn/T/tmpjmb3dqr7.h5
Size of model before compression: 51.96 MB
Size of model after compression: 48.16 MB
