In [None]:
!git clone https://github.com/pavmassimo/TyBox/tree/feature-extractor-quantization.git

In [2]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
from keras import layers
from keras.models import Sequential
from keras.utils import np_utils
from keras import backend as K 
K.set_image_data_format('channels_last')

from sklearn.preprocessing import OneHotEncoder

import sys
# insert at 1, 0 is the script path (or '' in REPL)
sys.path.append('.//TyBox')

from TyBox import TyBox
from TyBox import profiler


from matplotlib import pyplot as plt

## Download and preparation of the dataset

In [None]:
from keras.datasets import cifar10
# Load data 
(train_examples, train_labels), (test_examples, test_labels) = cifar10.load_data()

train_examples = train_examples.astype('float32')
test_examples = test_examples.astype('float32')

train_examples = train_examples / 255.0
test_examples = test_examples / 255.0

# # One hot encode target values
# train_labels = np_utils.to_categorical(train_labels)
# test_labels = np_utils.to_categorical(test_labels)

train_dataset = tf.data.Dataset.from_tensor_slices((train_examples, train_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_examples, test_labels))

BATCH_SIZE = 64
SHUFFLE_BUFFER_SIZE = 100

train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
test_dataset = test_dataset.batch(BATCH_SIZE)

## Load model

In [4]:
model = keras.models.load_model("input_model")

In [5]:
# Compile model
pruning_lrate = 0.00135
pruning_adam = tf.keras.optimizers.Adam(learning_rate=pruning_lrate)

model.compile(loss=keras.losses.SparseCategoricalCrossentropy(), optimizer=pruning_adam, metrics=['accuracy'])

#### Model summary

In [None]:
model.summary()

## Network Profiler
It is possible now to initialize the profiler with the defined model.
The profiler can provide useful information to the designer of the network on the memory occupations in terms of weights and activations, in total and at a per-layer granularity. 

In [None]:
# profiler = profiler.Profiler("model", model)
profiler = profiler.Profiler(network_name="model", model=model, precisions=[[8, 8], [32, 32]])

In [None]:
profiler.print_occupations()

In [None]:
profiler.print_per_layer()

## Introduce concept drift: class swap

In the data used for incremental training and testing, class 4 and 6 are swapped to simulate an abrupt concept drift.


In [None]:
def swap_class(num):
    if num == 4:
        return 6
    elif num == 6:
        return 4
    else:
        return num

test_inverted_labels = np.array([swap_class(item) for item in test_labels]).astype('float32')
test_inverted_dataset = tf.data.Dataset.from_tensor_slices((tf.experimental
                        .numpy.expand_dims(test_examples, -1), test_inverted_labels))
test_inverted_dataset = test_inverted_dataset.batch(BATCH_SIZE)

In [None]:
model.evaluate(test_dataset)
model.evaluate(test_inverted_dataset)

## Python Incremental solution

To generate the python version of the incremental solution, it's sufficient to call 

*Tybox.create_python_learning_solution(tf_model, mem_available, precision)*

where mem_available is the amount of memory that can be dedicated to the machine learning on device (in Bytes), and precision is data precision of model and activations in bit (currently TyBox support only 32-bit floating point precision).


In [None]:
## Mf_lite, Mc_python = TyBox.create_python_learning_solution(model, 9000000, 32)

# Create a compressible model for TFLite using integer-only quantization
def representative_data_gen():
    for input_value in tf.data.Dataset.from_tensor_slices(train_examples).batch(1).take(500):
        yield [input_value]
yield_representative_dataset = representative_data_gen
Mf_lite, Mc_python = TyBox.create_python_learning_solution(model, 1250000, 8, yield_representative_dataset)

### Feature extraction with tf_lite

The tf_lite model is saved and used to extract the latent features from the data

In [None]:
with open("mf.tflite", "wb") as file:
    file.write(Mf_lite)

#tf-lite model preparation
interpreter = tf.lite.Interpreter('mf.tflite')
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()[0]
print(input_details)
output_details = interpreter.get_output_details()[0]
print(output_details)

#feature extraction with the tf-lite model
test_feature = []
for i in range(len(test_labels)):
    input_data = test_examples[i].reshape((1, 32, 32, 3))
    
    if input_details['dtype'] == np.uint8:
        input_scale, input_zero_point = input_details["quantization"]
        input_data = input_data / input_scale + input_zero_point
    
    input_data = input_data.astype(input_details["dtype"])
    # print(input_data)
    interpreter.set_tensor(input_details["index"], input_data)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details["index"])[0]
    # print(output)

    test_feature.append(interpreter.get_tensor(output_details['index']))
test_feature = np.array(test_feature)[:,0,:]
# print(test_feature.shape, test_feature.dtype)

In [14]:
#generate one hot-encoding of labels
categorical_test_labels_inverted = np.zeros(shape=(10000, 10))
for i in range(len(test_labels)):
    categorical_test_labels_inverted[i][int(test_inverted_labels[i])] = 1

categorical_test_labels = np.zeros(shape=(10000,10))
for i in range(len(test_labels)):
    categorical_test_labels[i][test_labels[i]] = 1

### TyBox: python model usage

In [None]:
import random

#n of times the experiment will be repeated.
n_repetitions = 5

#n of data used in the experiment
exp_l = 1200

#time of the concept drift event
concept_drift_event_time = 100

accuracies_tb = []

for repetition in range(n_repetitions):
    #list to store the results
    res_acc = []

    #load the original model and convert it
#     model = keras.models.load_model("/kaggle/input/7929-keras-cnasnet-cifar10-training/7929_ManualSE_KerasCNASnetCifar10_trainedModel")
#     model = keras.models.load_model("/kaggle/input/kerassurgeon-prunedmodel/kerasSurgeon_prunedModel")
    model = keras.models.load_model("/kaggle/input/7948kerassurgeon-prunedmodel/7948kerasSurgeon_prunedModel")
    # fe_model, python_model = TyBox.create_python_learning_solution(model, 9000000, 32)
    def representative_data_gen():
        for input_value in tf.data.Dataset.from_tensor_slices(train_examples).batch(1).take(500):
            yield [input_value]
    yield_representative_dataset = representative_data_gen
    fe_model, python_model = TyBox.create_python_learning_solution(model, 1250000, 8, yield_representative_dataset)
    python_model.set_lr(0.001)
    
    #fix seed for reproducibility
    random.seed(395 + repetition*52)

    #sample without repetition from the training dataset
    indices = random.sample(range(5000), exp_l)

    step = 0

    for sample in indices:
        datum = test_feature[sample]

        #before concept drift event
        if step < concept_drift_event_time:
          label = categorical_test_labels[sample]
          test = categorical_test_labels[5000:5200]

        #after concept drift event
        else:
          label = categorical_test_labels_inverted[sample]
          test = categorical_test_labels_inverted[5000:5200]

        #push datum into buffer and train on the whole buffer
        python_model.push_and_train(datum, label)

        #evaluate accuracy on test set
        accuracy = python_model.evaluate(test_feature[5000:5200], test, output_details["quantization"])
        res_acc.append(accuracy)

        print(step, accuracy)
        step += 1 
    accuracies_tb.append(res_acc)

In [16]:
avg_accuracies_tb = []
for i in range(len(accuracies_tb[0])):
  avg = 0
  for ii in range(len(accuracies_tb)):
    avg += accuracies_tb[ii][i]
  avg = avg / len(accuracies_tb)
  avg_accuracies_tb.append(avg)

In [17]:
import math

std_dev_accuracies_tb = []
for i in range(len(avg_accuracies_tb)):
  std_dev = 0
  for ii in range(len(accuracies_tb)):
    std_dev += (avg_accuracies_tb[i] - accuracies_tb[ii][i]) ** 2
  std_dev = math.sqrt(std_dev / len(accuracies_tb))
  std_dev_accuracies_tb.append(std_dev)

In [None]:
plt.figure(figsize=(10,7))

plt.plot(avg_accuracies_tb, label="toolbox")
std_low = [avg_accuracies_tb[i] - 2*std_dev_accuracies_tb[i] for i in range(len(avg_accuracies_tb))]
std_high = [min(avg_accuracies_tb[i] + 2*std_dev_accuracies_tb[i], 1) for i in range(len(avg_accuracies_tb))]
plt.fill_between([i for i in range(exp_l)], std_low, std_high, alpha=0.5)

plt.title("Abrupt concept drift")
plt.xlabel("Time")
plt.ylabel("Accuracy")
plt.legend(loc=4)