In [25]:
import argparse
import numpy as np
import os
import pandas as pd
import tensorflow as tf
import zlib
import tensorflow_model_optimization as tfmot

version = "a"

seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

zip_path = tf.keras.utils.get_file(
    origin='https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip',
    fname='jena_climate_2009_2016.csv.zip',
    extract=True,
    cache_dir='.', cache_subdir='data')
csv_path, _ = os.path.splitext(zip_path)
df = pd.read_csv(csv_path)

column_indices = [2, 5]
columns = df.columns[column_indices]
data = df[columns].values.astype(np.float32)

n = len(data)
train_data = data[0:int(n*0.7)]
val_data = data[int(n*0.7):int(n*0.9)]
test_data = data[int(n*0.9):]

mean = train_data.mean(axis=0)
std = train_data.std(axis=0)

input_width = 6
if version == "a":
    output_steps = 3
if version == "b":
    output_steps = 9

In [26]:
# Class to deal with windows in the tempeorature and humidity forecasting
class WindowGenerator:
    def __init__(self, input_width, output_steps, mean, std):
        self.input_width = input_width
        self.output_steps = output_steps
        self.mean = tf.reshape(tf.convert_to_tensor(mean), [1, 1, 2])
        self.std = tf.reshape(tf.convert_to_tensor(std), [1, 1, 2])

    def split_window(self, features):
        inputs = features[:, :self.input_width, :]
        labels = features[:, -self.output_steps:, :]

        inputs.set_shape([None, self.input_width, 2])
        labels.set_shape([None, self.output_steps, 2])

        return inputs, labels

    def normalize(self, features):
        features = (features - self.mean) / (self.std + 1.e-6)

        return features

    def preprocess(self, features):
        inputs, labels = self.split_window(features)
        inputs = self.normalize(inputs)

        return inputs, labels

    def make_dataset(self, data, train):
        ds = tf.keras.preprocessing.timeseries_dataset_from_array(
                data=data,
                targets=None,
                sequence_length=input_width+self.output_steps,
                sequence_stride=1,
                batch_size=32)
        ds = ds.map(self.preprocess)
        ds = ds.cache()
        if train is True:
            ds = ds.shuffle(100, reshuffle_each_iteration=True)

        return ds

In [27]:
# Class to deal with two values of MAE (temperature and humidity)
class MultiOutputMAE(tf.keras.metrics.Metric):
    def __init__(self, name='mean_absolute_error', **kwargs):
        super().__init__(name=name, **kwargs)
        self.total = self.add_weight('total', initializer='zeros', shape=(2,))
        self.count = self.add_weight('count', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        error = tf.abs(y_pred - y_true)
        error = tf.reduce_mean(error, axis=[0,1])
        self.total.assign_add(error)
        self.count.assign_add(1.)

        return

    def reset_state(self):
        self.count.assign(tf.zeros_like(self.count))
        self.total.assign(tf.zeros_like(self.total))

    def result(self):
        result = tf.math.divide_no_nan(self.total, self.count)

        return result

In [38]:
# Load and evaluate quantized models
def load_and_evaluation(path, dataset):
    f = open(path, 'rb')
    decompressed_model = zlib.decompress(f.read())
    interpreter = tf.lite.Interpreter(model_content=decompressed_model)
    interpreter.allocate_tensors()
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    
    # set batch size to 1 when running inference with TFLite models
    dataset = dataset.unbatch().batch(1)
    
    outputs = []
    labels = []
    
    for data in dataset:
        my_input = np.array(data[0], dtype = np.float32)
        label = np.array(data[1], dtype = np.float32)
        labels.append(label)

        interpreter.set_tensor(input_details[0]['index'], my_input)
        interpreter.invoke()
        my_output = interpreter.get_tensor(output_details[0]['index'])
        
        outputs.append(my_output[0])

    outputs = np.array(outputs)
    labels = np.squeeze(np.array(labels))
    
    mae = np.sum(np.sum(np.absolute(outputs - labels), axis = 0), axis = 0)/(labels.shape[0]*3)
    return mae

# Function for weight and activations quantization 
def representative_dataset_generator():
    for x, _ in train_ds.take(1000):
        yield [x]

generator = WindowGenerator(input_width, output_steps, mean, std)
train_ds = generator.make_dataset(train_data, True)
val_ds = generator.make_dataset(val_data, False)
test_ds = generator.make_dataset(test_data, False)

In [None]:
# Models (version a and b)
mlp = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(input_width, 2), name='flatten'),
    tf.keras.layers.Dense(128, activation='relu', name='dense1'),
    tf.keras.layers.Dense(128, activation='relu', name='dense2'),
    tf.keras.layers.Dense(units = 2*output_steps, name='output_layer'),
    tf.keras.layers.Reshape([output_steps, 2])
])

cnn = tf.keras.Sequential([
    tf.keras.layers.Conv1D(input_shape=(input_width, 2), filters=64, kernel_size=3, activation='relu', name='convolution'),
    tf.keras.layers.Flatten(name='flatten'),
    tf.keras.layers.Dense(units=64, activation='relu', name='dense1'),
    tf.keras.layers.Dense(units=2*output_steps, name='output_layer'),
    tf.keras.layers.Reshape([output_steps, 2])
])

lstm = tf.keras.Sequential([
    tf.keras.layers.LSTM(input_shape=(input_width, 2), units=64, name='lstm'),
    tf.keras.layers.Flatten(name='flatten'),
    tf.keras.layers.Dense(units=2*output_steps, name='output_layer'),
    tf.keras.layers.Reshape([output_steps, 2])
])

# Select model to train (from input)
MODELS = {'c': mlp, 'b': cnn, 'a': lstm}
model = MODELS[version]
loss = tf.keras.losses.MeanSquaredError()
optimizer = tf.keras.optimizers.Adam()
metrics = [MultiOutputMAE()]

### **Training without optimization**

In [None]:
model.compile(loss = loss, optimizer = optimizer, metrics = metrics)
model.fit(train_ds, epochs=5, validation_data=val_ds)
print(model.summary())

# Test model
test_loss, test_error = model.evaluate(test_ds)
print('Test error: ', test_error)

In [None]:
# Save the model on disk
if not os.path.exists('./models/no_optimization/'):
    os.makedirs('./models/no_optimization/')

run_model = tf.function(lambda x: model(x))
concrete_func = run_model.get_concrete_function(tf.TensorSpec([1, 6, 2], tf.float32))
saving_path = os.path.join('.','models', 'no_optimization','Group2_th_{}'.format(version))
model.save(saving_path, signatures=concrete_func)

# Conert model to tflite model
converter = tf.lite.TFLiteConverter.from_saved_model(saving_path)
tflite_model = converter.convert()

tflite_model_dir = os.path.join('.','models', 'no_optimization', 'Group2_th_{}.tflite'.format(version))

with open(tflite_model_dir, 'wb') as fp:
    fp.write(tflite_model)

### **Training with pruning**

**1) Structured Pruning via Width Scaling**

In [34]:
alpha = 0.03 #[0,1]
# Models (version a and b)
pruned_mlp = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(input_width, 2), name='flatten'),
    tf.keras.layers.Dense(int(128*alpha), activation='relu', name='dense1'),
    tf.keras.layers.Dense(int(128*alpha), activation='relu', name='dense2'),
    tf.keras.layers.Dense(units = int(2*output_steps), name='output_layer'),
    tf.keras.layers.Reshape([output_steps, 2])
])

pruned_cnn = tf.keras.Sequential([
    tf.keras.layers.Conv1D(input_shape = (input_width, 2), filters=int(64*alpha), kernel_size=3, activation='relu'),    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=int(2*output_steps)),
    tf.keras.layers.Reshape([output_steps, 2])
])

pruned_lstm = tf.keras.Sequential([
    tf.keras.layers.LSTM(input_shape=(input_width, 2), units=int(64*alpha), name='lstm'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=int(2*output_steps), name='output_layer'),
    tf.keras.layers.Reshape([output_steps, 2])
])

# Select model to train (from input)
PRUNED_MODELS = {'a': pruned_mlp, 'b': pruned_cnn, 'c': pruned_lstm}
model = PRUNED_MODELS[version]
loss = tf.keras.losses.MeanSquaredError()
optimizer = tf.keras.optimizers.Adam()
metrics = [MultiOutputMAE()]

In [40]:
class CustomCallback(tf.keras.callbacks.Callback):
    
    def on_epoch_end(self, epoch, logs=None):
        t = logs["mean_absolute_error"][0]
        h = logs["mean_absolute_error"][1]
        print("\t T mae={:.3f}, H mae={:.3f}".format(t, h))

mycallback = CustomCallback()

**2) Magnitude-based Pruning**

In [41]:
epochs=50
#end_step = np.ceil(len(train_ds) / 32).astype(np.int32) * epochs

pruning_params = {'pruning_schedule':
    tfmot.sparsity.keras.PolynomialDecay(
        initial_sparsity=0.25,
        final_sparsity=0.5,
        begin_step=2*len(train_ds),
        end_step=20*len(train_ds)
        )
    }

prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude
model = prune_low_magnitude(model, **pruning_params)

# Early stopping callback
es_callback = tf.keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=5)

# Define the pruning callback
callbacks = [tfmot.sparsity.keras.UpdatePruningStep(), mycallback, es_callback]

# Train the model
input_shape = [32, 6, 2]
model.build(input_shape)
model.compile(loss = loss, optimizer = optimizer, metrics = metrics)
model.fit(train_ds, epochs=epochs, validation_data=val_ds, callbacks=callbacks)

test_loss, test_error = model.evaluate(test_ds)
print('Test error: ', test_error)

# Strip the model
model = tfmot.sparsity.keras.strip_pruning(model)

  trainable=False)
  aggregation=tf.VariableAggregation.MEAN)
  aggregation=tf.VariableAggregation.MEAN)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Test error:  [0.3104353 1.222645 ]


### Quantization on trained models

**1) Weights-only PTQ**

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.target_spec.supported_ops = [
  tf.lite.OpsSet.TFLITE_BUILTINS, # enable TensorFlow Lite ops.
  tf.lite.OpsSet.SELECT_TF_OPS # enable TensorFlow ops.
]
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quantized_model = converter.convert()

**2) Weights+Activations PTQ**

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset_generator
#converter.target_spec.supported_ops = [
#    tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
#]
tflite_quantized_model2 = converter.convert()

quantized_model_dir2 = os.path.join('.', 'models', 'quantized', 'Group2_th_{}_quantized_wa'.format(version))

with open(quantized_model_dir2, 'wb') as fp:
        fp.write(tflite_quantized_model2)

# Size of the quantized model (weights and activation)
print('Quantized model size (weights and activations): {:.2f}kB'.format(os.path.getsize(quantized_model_dir2)/1000))

# Evaluation of the PTQ model
mae = load_and_evaluation(quantized_model_dir2, test_ds)
print('MAE quantized model (weight and activations)', mae)

**3) Quantization-aware problem**

In [None]:
# quantization-aware model
quantize_model = tfmot.quantization.keras.quantize_model
model = quantize_model(model)

model.summary()

In [None]:
# Early stopping callback
# es_callback = tf.keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0.01, patience=5)

# Train the model
input_shape = [32, 6, 2]
model.build(input_shape)
model.compile(loss = loss, optimizer = optimizer, metrics = metrics)
model.fit(train_ds, epochs=1, validation_data=val_ds)

test_loss, test_error = q_aware_model.evaluate(test_ds)
print('Test error: ', test_error)

**Save model**

In [44]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]

tflite_model = converter.convert()

if not os.path.exists('./models/prova/'):
    os.makedirs('./models/prova/')

model_dir = os.path.join('models', 'prova', 'Group2_th_1.tflite')

with open(model_dir, 'wb') as fp:
    tflite_compressed = zlib.compress(tflite_model)
    fp.write(tflite_compressed)
    
# Size of the quantized model (weights only)
print('Model size: {:.2f}kB'.format(os.path.getsize(model_dir)/1000))

# Evaluation of the PTQ model
mae = load_and_evaluation(model_dir, test_ds)
print('MAE quantized model: [t_mae={:.3f}, h_mae={:.3f}]'.format(mae[0], mae[1]))

INFO:tensorflow:Assets written to: /tmp/tmpai3aup8r/assets


INFO:tensorflow:Assets written to: /tmp/tmpai3aup8r/assets


Model size: 1.45kB
MAE quantized model: [t_mae=0.310, h_mae=1.223]
