In [None]:
!git clone https://github.com/pavmassimo/TyBox/tree/feature-extractor-quantization.git

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import keras
import numpy as np
import pandas as pd
import os

import sys
# insert at 1, 0 is the script path (or '' in REPL)
sys.path.append('.//TyBox')

from TyBox import TyBox
!apt-get -qq install xxd
import math

## Prepare data

### Cifar10

In [None]:
from keras.datasets import cifar10
# Load data 
(x_train_cifar10, y_train_cifar10), (x_test_cifar10, y_test_cifar10) = cifar10.load_data()

x_train_cifar10 = x_train_cifar10.astype('float32')
x_test_cifar10 = x_test_cifar10.astype('float32')

x_train_cifar10 = x_train_cifar10 / 255.0
x_test_cifar10 = x_test_cifar10 / 255.0

# # One hot encode target values
# y_train_cifar10 = np_utils.to_categorical(y_train_cifar10)
# y_test_cifar10 = np_utils.to_categorical(y_test_cifar10)

train_dataset = tf.data.Dataset.from_tensor_slices((x_train_cifar10, y_train_cifar10))
test_dataset = tf.data.Dataset.from_tensor_slices((x_test_cifar10, y_test_cifar10))

BATCH_SIZE = 64
SHUFFLE_BUFFER_SIZE = 100

train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
test_dataset = test_dataset.batch(BATCH_SIZE)

### Imagenette

In [None]:
from fastai.data.all import *

path = untar_data(URLs.IMAGENETTE_160)
print(path)

In [None]:
labels_dict = dict(
    n01440764='0',
    n02102040='1',
    n02979186='2',
    n03000684='3',
    n03028079='4',
    n03394916='5',
    n03417042='6',
    n03425413='7',
    n03445777='8',
    n03888257='9'
)

In [None]:
from os import listdir

# print(path)
train_dir = os.path.join(path, 'train')
# print(train_dir)

def get_labels(files):
    labels = []
    for file in files:
        labels.append(parent_label(file))
    return labels

train_files = get_image_files(train_dir)
# print(train_files)
imagenette_labels = get_labels(train_files)
# print(len(imagenette_labels))
y_train_imagenette = []
for label in imagenette_labels:
    y_train_imagenette.append(int(labels_dict[label]))

y_train_imagenette = np.array(y_train_imagenette)

In [None]:
import cv2

x_train_imagenette = []
for image in train_files:
    # print(image)
    im = cv2.imread(str(image))
    resized_im = cv2.resize(im, (32, 32))
    # print(type(im))
    # print(resized_im.shape)
    # print(type(im.shape))
    x_train_imagenette.append(resized_im)
# print(len(x_train_imagenette))

## Load model

In [None]:
model = keras.models.load_model('input_model')

In [None]:
# Compile model
pruning_lrate = 0.00135
adam = tf.keras.optimizers.Adam(learning_rate=pruning_lrate)

model.compile(loss=keras.losses.SparseCategoricalCrossentropy(), optimizer=adam, metrics=['accuracy'])

## TyBox transfer learning

In [None]:
# Create a compressible model for TFLite using integer-only quantization
def representative_data_gen():
    for input_value in tf.data.Dataset.from_tensor_slices(x_train_cifar10).batch(1).take(500):
        yield [input_value]
yield_representative_dataset = representative_data_gen
Mf_lite, Mc_python = TyBox.create_python_learning_solution(model, 2509645, 8, yield_representative_dataset)

In [None]:
with open("mf.tflite", "wb") as file:
    file.write(Mf_lite)

#tf-lite model preparation
interpreter = tf.lite.Interpreter('mf.tflite')
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()[0]
print(input_details)
output_details = interpreter.get_output_details()[0]
print(output_details)

In [None]:
extracted_features = []
for i in range(len(y_train_cifar10)):
    input_data = x_train_cifar10[i].astype('float32').reshape((1, 32, 32, 3))

    if input_details['dtype'] == np.uint8:
        input_scale, input_zero_point = input_details["quantization"]
        input_data = input_data / input_scale + input_zero_point
    input_data = input_data.astype(input_details["dtype"])
    # print(input_data)

    interpreter.set_tensor(input_details["index"], input_data)
    interpreter.invoke()
    out = interpreter.get_tensor(output_details["index"])
    extracted_features.append(out)
extracted_features = np.array(extracted_features)[:,0,:]

In [None]:
extracted_imagenette_features = []
# x_train_imagenette, y_train_imagenette
for i in range(len(y_train_imagenette)):
    input_data = x_train_imagenette[i].astype('float32').reshape((1, 32, 32, 3))

    if input_details['dtype'] == np.uint8:
        input_scale, input_zero_point = input_details["quantization"]
        input_data = input_data / input_scale + input_zero_point
    input_data = input_data.astype(input_details["dtype"])
    # print(input_data)

    interpreter.set_tensor(input_details["index"], input_data)
    interpreter.invoke()
    out = interpreter.get_tensor(output_details["index"])
    extracted_imagenette_features.append(out)
extracted_imagenette_features = np.array(extracted_imagenette_features)[:,0,:]

In [None]:
categorical_cifar10_labels = np.zeros(shape=(50000,10))
for i in range(len(y_train_cifar10)):
    categorical_cifar10_labels[i][y_train_cifar10[i]] = 1

categorical_imagenette_labels = np.zeros((9469, 10))
for i in range(len(y_train_imagenette)):
  categorical_imagenette_labels[i][y_train_imagenette[i]] = 1

In [None]:
import random

#n of times the experiment will be repeated.
n_repetitions = 5

#n of data used in the experiment
exp_l = 700

repetitions_tb = []
for repetition in range(n_repetitions):
    res_x1 = []
    res_acc1 = []
    res_std = []

    # Create a compressible model for TFLite using integer-only quantization
    def representative_data_gen():
        for input_value in tf.data.Dataset.from_tensor_slices(x_train_cifar10).batch(1).take(500):
            yield [input_value]
    yield_representative_dataset = representative_data_gen
    fe_model, python_model = TyBox.create_python_learning_solution(model, 2509645, 8, yield_representative_dataset)
    python_model.set_lr(0.0015)

    #sample without repetition from the training dataset
    indices = random.sample(range(9268), exp_l)
    
    #fix seed for reproducibility
    random.seed(395 + repetition*52)

    step = 0

    for sample in indices:
        datum = extracted_imagenette_features[sample]
        label = categorical_imagenette_labels[sample]

        #push datum into buffer and train on the whole buffer
        python_model.push_and_train(datum, label)

        #evaluate accuracy on test set
        accuracy = python_model.evaluate(extracted_imagenette_features[9269:], 
                                         categorical_imagenette_labels[9269:], 
                                         output_details["quantization"])
        res_acc1.append(accuracy)
        print(step, accuracy)
        step += 1
    repetitions_tb.append(res_acc1)

save experiment results to file

In [None]:
avg_repetitions_tb = []
for i in range(len(repetitions_tb[0])):
  avg = 0
  for ii in range(len(repetitions_tb)):
    avg += repetitions_tb[ii][i]
  avg = avg / len(repetitions_tb)
  avg_repetitions_tb.append(avg)

In [None]:
std_dev_repetitions_tb = []
for i in range(len(avg_repetitions_tb)):
  std_dev = 0
  for ii in range(len(repetitions_tb)):
    std_dev += (avg_repetitions_tb[i] - repetitions_tb[ii][i]) ** 2
  std_dev = math.sqrt(std_dev / len(repetitions_tb))
  std_dev_repetitions_tb.append(std_dev)

In [None]:
plt.figure(figsize=(10,7))

plt.plot(avg_repetitions_tb, label="TyBox")
std_low = [avg_repetitions_tb[i] - 2*std_dev_repetitions_tb[i] for i in range(len(avg_repetitions_tb))]
std_high = [min(avg_repetitions_tb[i] + 2*std_dev_repetitions_tb[i], 1) for i in range(len(avg_repetitions_tb))]
plt.fill_between([i for i in range(exp_l)], std_low, std_high, alpha=0.5)

# plt.plot(avg_repetitions_tf, label="tensorflow")
# std_low = [avg_repetitions_tf[i] - 2*std_dev_repetitions_tf[i] for i in range(len(avg_repetitions_tf))]
# std_high = [min(avg_repetitions_tf[i] + 2*std_dev_repetitions_tf[i], 1) for i in range(len(avg_repetitions_tf))]
# plt.fill_between([i for i in range(500)], std_low, std_high, alpha=0.5)

plt.title("Transfer learning Imagenette")
plt.xlabel("Time")
plt.ylabel("Accuracy")
plt.legend(loc=4)
# plt.savefig('/content/drive/MyDrive/TyBox_experiments/Transfer_mnist/accuracy_192_1tf.png')

In [None]:
# import pickle as pkl
# experiment = {
#     'repetitions_tb' : repetitions_tb,
#     'repetitions_tf' : repetitions_tf,
#     'avg_repetitions_tf' : avg_repetitions_tf,
#     'avg_repetitions_tb' : avg_repetitions_tb,
#     'std_dev_repetitions_tf' : std_dev_repetitions_tf,
#     'std_dev_repetitions_tb' : std_dev_repetitions_tb
# }

# with open('/content/drive/MyDrive/TyBox_experiments/Transfer_mnist/experiment_data_192.pickle', 'wb') as handle:
#     pkl.dump(experiment, handle, protocol=pkl.HIGHEST_PROTOCOL)