In [2]:
import tempfile
import os

import tensorflow as tf
import numpy as np

from tensorflow import keras
from tensorflow.keras import layers

import tensorflow_model_optimization as tfmot
from distiller import Distiller
#import datetime

%load_ext tensorboard

In [3]:
tf.random.set_seed(1234)

In [5]:
# Load MNIST dataset
mnist = keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Normalize the input image so that each pixel value is between 0 and 1.
train_images = train_images / 255.0
test_images = test_images / 255.0

# Define the model architecture.
model = keras.Sequential([
        keras.Input(shape=(28, 28, 1)),
        layers.Conv2D(128, (3, 3), strides=(2, 2), padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding="same"),
        layers.Conv2D(256, (3, 3), strides=(2, 2), padding="same"),
        layers.Flatten(),
        layers.Dense(100),
        layers.Dropout(0.5),
        layers.Dense(10),
    ],)

quantize_model = tfmot.quantization.keras.quantize_model
q_aware_model = quantize_model(keras.models.clone_model(model))

# Train the digit classification model
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

#logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
#tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

model.fit(
    train_images,
    train_labels,
    epochs=5,
    validation_split=0.1,
)
#%tensorboard --logdir logs

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x260f2742580>

In [6]:
q_aware_model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

q_aware_model.fit(
    train_images,
    train_labels,
    epochs=5,
    validation_split=0.1,
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x260f09818e0>

In [35]:
q_aware_16_model = quantize_model(# Define the model architecture.
        keras.Sequential([
        keras.Input(shape=(28, 28, 1)),
        layers.Conv2D(128, (3, 3), strides=(2, 2), padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding="same"),
        layers.Conv2D(256, (3, 3), strides=(2, 2), padding="same"),
        layers.Flatten(),
        layers.Dense(100),
        layers.Dropout(0.5),
        layers.Dense(10),
    ],))

q_aware_16_model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

q_aware_16_model.fit(
    train_images,
    train_labels,
    epochs=5,
    validation_split=0.1,
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2002e912520>

In [6]:
# Create the student
student = keras.Sequential(
    [
        keras.Input(shape=(28, 28, 1)),
        layers.Conv2D(16, (3, 3), strides=(2, 2), padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding="same"),
        layers.Conv2D(32, (3, 3), strides=(2, 2), padding="same"),
        layers.Flatten(),
        layers.Dense(10),
    ],
    name="student",
)

# Clone student for later comparison
student_scratch = keras.models.clone_model(student)
q_aware_student = quantize_model(keras.models.clone_model(student))
q_aware_16_student = quantize_model(keras.models.clone_model(student))

# Train student
student_scratch.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
)

# Train and evaluate student trained from scratch.
student_scratch.fit(train_images, train_labels, epochs=3)
student_scratch.evaluate(test_images, test_labels)

Epoch 1/3
Epoch 2/3
Epoch 3/3


[0.0810793787240982, 0.9751999974250793]

In [8]:
# Initialize and compile distiller
distiller = Distiller(student=student, teacher=model)
distiller.compile(
    optimizer=keras.optimizers.Adam(),
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
    student_loss_fn=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    distillation_loss_fn=keras.losses.KLDivergence(),
    alpha=0.1,
    temperature=40,
)

# Distill teacher to student
distiller.fit(train_images, train_labels, epochs=3)

# Evaluate student on test dataset
distiller.evaluate(test_images, test_labels)

Epoch 1/3
Epoch 2/3
Epoch 3/3


[0.9729999899864197, 0.012272889725863934]

In [9]:
#quantize -> distill
# Initialize and compile distiller
qd = Distiller(student=student, teacher=q_aware_model)
qd.compile(
    optimizer=keras.optimizers.Adam(),
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
    student_loss_fn=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    distillation_loss_fn=keras.losses.KLDivergence(),
    alpha=0.1,
    temperature=40,
)

# Distill teacher to student
qd.fit(train_images, train_labels, epochs=3)

# Evaluate student on test dataset
qd.evaluate(test_images, test_labels)

Epoch 1/3
Epoch 2/3
Epoch 3/3


[0.9761000275611877, 0.004243148490786552]

In [10]:
# Initialize and compile distiller
dq = Distiller(student=q_aware_student, teacher=model)
dq.compile(
    optimizer=keras.optimizers.Adam(),
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
    student_loss_fn=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    distillation_loss_fn=keras.losses.KLDivergence(),
    alpha=0.1,
    temperature=40,
)

# Distill teacher to student
dq.fit(train_images, train_labels, epochs=3)

# Evaluate student on test dataset
dq.evaluate(test_images, test_labels)

Epoch 1/3
Epoch 2/3
Epoch 3/3


[0.9754999876022339, 0.013444222509860992]

In [11]:
# Initialize and compile distiller
dq_16 = Distiller(student=q_aware_16_student, teacher=model)
dq_16.compile(
    optimizer=keras.optimizers.Adam(),
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
    student_loss_fn=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    distillation_loss_fn=keras.losses.KLDivergence(),
    alpha=0.1,
    temperature=40,
)

# Distill teacher to student
dq_16.fit(train_images, train_labels, epochs=3)

# Evaluate student on test dataset
dq_16.evaluate(test_images, test_labels)

Epoch 1/3
Epoch 2/3
Epoch 3/3


[0.9775000214576721, 0.002722498495131731]

In [12]:
_, baseline_model_accuracy = model.evaluate(
    test_images, test_labels, verbose=0)

print('Baseline test accuracy:', baseline_model_accuracy)

_, keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model, keras_file, include_optimizer=False)
print('Saved baseline model to:', keras_file)

Baseline test accuracy: 0.977400004863739
Saved baseline model to: C:\Users\yeswe\AppData\Local\Temp\tmp05qxa71e.h5


In [13]:
_, q_aware_model_accuracy = q_aware_model.evaluate(
    test_images, test_labels, verbose=0)

print('Q Aware test accuracy:', q_aware_model_accuracy)

_, q_aware_keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(q_aware_model, q_aware_keras_file, include_optimizer=False)
print('Saved Q Aware model to:', q_aware_keras_file)

Q Aware test accuracy: 0.9811000227928162
Saved Q Aware model to: C:\Users\yeswe\AppData\Local\Temp\tmpl61yrowz.h5


In [14]:
distiller_accuracy, _ = distiller.evaluate(
    test_images, test_labels, verbose=0)

print('Distiller test accuracy:', distiller_accuracy)

_, distiller_keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(distiller.student, distiller_keras_file, include_optimizer=False)
print('Saved Distiller model to:', distiller_keras_file)

Distiller test accuracy: 0.9761000275611877
Saved Distiller model to: C:\Users\yeswe\AppData\Local\Temp\tmpvywapgq3.h5


In [15]:
qd_accuracy, _ = qd.evaluate(
    test_images, test_labels, verbose=0)

print('Quant -> Distill test accuracy:', qd_accuracy)

_, qd_keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(qd.student, qd_keras_file, include_optimizer=False)
print('Saved Quant -> Distill model to:', qd_keras_file)

Quant -> Distill test accuracy: 0.9761000275611877
Saved Quant -> Distill model to: C:\Users\yeswe\AppData\Local\Temp\tmpcqs_9yps.h5


In [16]:
dq_accuracy, _ = dq.evaluate(
    test_images, test_labels, verbose=0)

print('Distill -> Quant test accuracy:', dq_accuracy)

_, dq_keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(dq.student, dq_keras_file, include_optimizer=False)
print('Saved Distill -> Quant model to:', dq_keras_file)

Distill -> Quant test accuracy: 0.9754999876022339
Saved Distill -> Quant model to: C:\Users\yeswe\AppData\Local\Temp\tmpghqnzaf7.h5


In [17]:
def get_gzipped_model_size(file):
    # Returns size of gzipped model, in bytes.
    import os
    import zipfile

    _, zipped_file = tempfile.mkstemp('.zip')
    with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
        f.write(file)

    return os.path.getsize(zipped_file)

In [18]:
print("Size of gzipped baseline Keras model: %.2f bytes" % (get_gzipped_model_size(keras_file)))
print("Size of gzipped Q Aware Keras model: %.2f bytes" % (get_gzipped_model_size(q_aware_keras_file)))
print("Size of gzipped Distiller Keras model: %.2f bytes" % (get_gzipped_model_size(distiller_keras_file)))
print("Size of gzipped Quant -> Distill Keras model: %.2f bytes" % (get_gzipped_model_size(qd_keras_file)))
print("Size of gzipped Distill -> Quant Keras model: %.2f bytes" % (get_gzipped_model_size(dq_keras_file)))

Size of gzipped baseline Keras model: 5758115.00 bytes
Size of gzipped Q Aware Keras model: 5762168.00 bytes
Size of gzipped Distiller Keras model: 78789.00 bytes
Size of gzipped Quant -> Distill Keras model: 78789.00 bytes
Size of gzipped Distill -> Quant Keras model: 80747.00 bytes


In [53]:
model_converter = tf.lite.TFLiteConverter.from_keras_model(model)
model_tflite_model = model_converter.convert()

q_aware_converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
q_aware_converter.optimizations = [tf.lite.Optimize.DEFAULT]
q_aware_tflite_model = q_aware_converter.convert()

q8_converter = tf.lite.TFLiteConverter.from_keras_model(model)
q8_converter.optimizations = [tf.lite.Optimize.DEFAULT]
q8_tflite_model = q8_converter.convert()

q16_converter = tf.lite.TFLiteConverter.from_keras_model(model)#q_aware_16_model)
q16_converter.optimizations = [tf.lite.Optimize.DEFAULT]
q16_converter.target_spec.supported_types = [tf.float16]
q16_tflite_model = q16_converter.convert()

distiller_converter = tf.lite.TFLiteConverter.from_keras_model(distiller.student)
#distiller_converter.optimizations = [tf.lite.Optimize.DEFAULT]
distiller_tflite_model = distiller_converter.convert()

qd_converter = tf.lite.TFLiteConverter.from_keras_model(qd.student)
#qd_converter.optimizations = [tf.lite.Optimize.DEFAULT]
qd_tflite_model = qd_converter.convert()

dq_converter = tf.lite.TFLiteConverter.from_keras_model(dq.student)
dq_converter.optimizations = [tf.lite.Optimize.DEFAULT]
dq_tflite_model = dq_converter.convert()

dq_8_converter = tf.lite.TFLiteConverter.from_keras_model(distiller.student)#dq_16.student)
dq_8_converter.optimizations = [tf.lite.Optimize.DEFAULT]
dq_8_tflite_model = dq_8_converter.convert()

dq_16_converter = tf.lite.TFLiteConverter.from_keras_model(distiller.student)#dq_16.student)
dq_16_converter.optimizations = [tf.lite.Optimize.DEFAULT]
dq_16_converter.target_spec.supported_types = [tf.float16]
dq_16_tflite_model = dq_16_converter.convert()


INFO:tensorflow:Assets written to: C:\Users\yeswe\AppData\Local\Temp\tmpmysu49u0\assets


INFO:tensorflow:Assets written to: C:\Users\yeswe\AppData\Local\Temp\tmpmysu49u0\assets


INFO:tensorflow:Assets written to: C:\Users\yeswe\AppData\Local\Temp\tmp1xdn7gus\assets


INFO:tensorflow:Assets written to: C:\Users\yeswe\AppData\Local\Temp\tmp1xdn7gus\assets


INFO:tensorflow:Assets written to: C:\Users\yeswe\AppData\Local\Temp\tmps_170lid\assets


INFO:tensorflow:Assets written to: C:\Users\yeswe\AppData\Local\Temp\tmps_170lid\assets


INFO:tensorflow:Assets written to: C:\Users\yeswe\AppData\Local\Temp\tmp3dw9xsjv\assets


INFO:tensorflow:Assets written to: C:\Users\yeswe\AppData\Local\Temp\tmp3dw9xsjv\assets


INFO:tensorflow:Assets written to: C:\Users\yeswe\AppData\Local\Temp\tmpsz5fuf75\assets


INFO:tensorflow:Assets written to: C:\Users\yeswe\AppData\Local\Temp\tmpsz5fuf75\assets


INFO:tensorflow:Assets written to: C:\Users\yeswe\AppData\Local\Temp\tmp2jeq7xys\assets


INFO:tensorflow:Assets written to: C:\Users\yeswe\AppData\Local\Temp\tmp2jeq7xys\assets


INFO:tensorflow:Assets written to: C:\Users\yeswe\AppData\Local\Temp\tmpc9t7nw3j\assets


INFO:tensorflow:Assets written to: C:\Users\yeswe\AppData\Local\Temp\tmpc9t7nw3j\assets


INFO:tensorflow:Assets written to: C:\Users\yeswe\AppData\Local\Temp\tmpw_332k7k\assets


INFO:tensorflow:Assets written to: C:\Users\yeswe\AppData\Local\Temp\tmpw_332k7k\assets


INFO:tensorflow:Assets written to: C:\Users\yeswe\AppData\Local\Temp\tmp1ndw_ote\assets


INFO:tensorflow:Assets written to: C:\Users\yeswe\AppData\Local\Temp\tmp1ndw_ote\assets


In [54]:
# Measure sizes of models.
_, model_file = tempfile.mkstemp('.tflite')
_, quant_file = tempfile.mkstemp('.tflite')
_, q16_file = tempfile.mkstemp('.tflite')
_, q8_file = tempfile.mkstemp('.tflite')
_, distiller_file = tempfile.mkstemp('.tflite')
_, qd_file = tempfile.mkstemp('.tflite')
_, dq_file = tempfile.mkstemp('.tflite')
_, dq_8_file = tempfile.mkstemp('.tflite')
_, dq_16_file = tempfile.mkstemp('.tflite')

In [55]:
with open(quant_file, 'wb') as f:
  f.write(q_aware_tflite_model)

with open(q8_file, 'wb') as f:
  f.write(q8_tflite_model)

with open(q16_file, 'wb') as f:
  f.write(q16_tflite_model)

with open(model_file, 'wb') as f:
  f.write(model_tflite_model)

with open(distiller_file, 'wb') as f:
  f.write(distiller_tflite_model)

with open(qd_file, 'wb') as f:
  f.write(qd_tflite_model)

with open(dq_file, 'wb') as f:
  f.write(dq_tflite_model)

with open(dq_8_file, 'wb') as f:
  f.write(dq_8_tflite_model)

with open(dq_16_file, 'wb') as f:
  f.write(dq_16_tflite_model)

In [56]:
print("Original model in Mb:", os.path.getsize(model_file) / float(2**20))
print("Q Aware model in Mb:", os.path.getsize(quant_file) / float(2**20))
print("Quantized 8 model in Mb:", os.path.getsize(q8_file) / float(2**20))
print("Quantized 16 model in Mb:", os.path.getsize(q16_file) / float(2**20))
print("Distiller model in Mb:", os.path.getsize(distiller_file) / float(2**20))
print("Quant -> Distill model in Mb:", os.path.getsize(qd_file) / float(2**20))
print("Distill -> Quant model in Mb:", os.path.getsize(dq_file) / float(2**20))
print("Distill -> Quant 8 model in Mb:", os.path.getsize(dq_8_file) / float(2**20))
print("Distill -> Quant 16 model in Mb:", os.path.getsize(dq_16_file) / float(2**20))

Original model in Mb: 5.9225311279296875
Q Aware model in Mb: 1.4943008422851562
Quantized 8 model in Mb: 1.494232177734375
Quantized 16 model in Mb: 2.9636688232421875
Distiller model in Mb: 0.08052825927734375
Quant -> Distill model in Mb: 0.08052825927734375
Distill -> Quant model in Mb: 0.024322509765625
Distill -> Quant 8 model in Mb: 0.0233306884765625
Distill -> Quant 16 model in Mb: 0.0423583984375


In [57]:
print('original accuracy: ' + str(baseline_model_accuracy))
print('q aware accuracy: ' + str(q_aware_model_accuracy))
print('distiller accuracy: ' + str(distiller_accuracy))
print('qd accuracy: ' + str(qd_accuracy))
print('dq accuracy: ' + str(dq_accuracy))

original accuracy: 0.977400004863739
q aware accuracy: 0.9811000227928162
distiller accuracy: 0.9761
qd accuracy: 0.9761
dq accuracy: 0.9755


In [63]:
# A helper function to evaluate the TF Lite model using "test" dataset.
def evaluate_model(interpreter):
    input_index = interpreter.get_input_details()[0]["index"]
    output_index = interpreter.get_output_details()[0]["index"]

    # Run predictions on every image in the "test" dataset.
    prediction_digits = []
    for i, test_image in enumerate(test_images):
        if i%100 == 0:
            print('got to ' + str(i))
        # Pre-processing: add batch dimension and convert to float32 to match with
        # the model's input data format.
        test_image = np.expand_dims(test_image, axis=0).astype(np.float32)
        interpreter.set_tensor(input_index, test_image.reshape(1, 28, 28, 1))

        # Run inference.
        interpreter.invoke()

        # Post-processing: remove batch dimension and find the digit with highest
        # probability.
        output = interpreter.tensor(output_index)
        digit = np.argmax(output()[0])
        prediction_digits.append(digit)
        
    # Compare prediction results with ground truth labels to calculate accuracy.
    accurate_count = 0
    for index in range(len(prediction_digits)):
        if prediction_digits[index] == test_labels[index]:
            accurate_count += 1
    accuracy = accurate_count * 1.0 / len(prediction_digits)

    return accuracy

In [59]:
original_interpreter = tf.lite.Interpreter(model_content=model_tflite_model)
original_interpreter.allocate_tensors()

q_aware_interpreter = tf.lite.Interpreter(model_content=q_aware_tflite_model)
q_aware_interpreter.allocate_tensors()

q8_interpreter = tf.lite.Interpreter(model_content=q8_tflite_model)
q8_interpreter.allocate_tensors()

q16_interpreter = tf.lite.Interpreter(model_content=q16_tflite_model)
q16_interpreter.allocate_tensors()

distiller_interpreter = tf.lite.Interpreter(model_content=distiller_tflite_model)
distiller_interpreter.allocate_tensors()

qd_interpreter = tf.lite.Interpreter(model_content=qd_tflite_model)
qd_interpreter.allocate_tensors()

dq_interpreter = tf.lite.Interpreter(model_content=dq_tflite_model)
dq_interpreter.allocate_tensors()

dq_8_interpreter = tf.lite.Interpreter(model_content=dq_8_tflite_model)
dq_8_interpreter.allocate_tensors()

dq_16_interpreter = tf.lite.Interpreter(model_content=dq_16_tflite_model)
dq_16_interpreter.allocate_tensors()

In [64]:
q_aware_accuracy = evaluate_model(q_aware_interpreter)

got to 0


KeyboardInterrupt: 

In [68]:
print('evaluating q8...')
q8_accuracy = evaluate_model(q8_interpreter)

evaluating q8...
got to 0
got to 100
got to 200
got to 300
got to 400
got to 500
got to 600
got to 700
got to 800
got to 900
got to 1000
got to 1100
got to 1200
got to 1300
got to 1400
got to 1500
got to 1600
got to 1700
got to 1800
got to 1900
got to 2000
got to 2100
got to 2200
got to 2300
got to 2400
got to 2500
got to 2600
got to 2700
got to 2800
got to 2900
got to 3000
got to 3100
got to 3200
got to 3300
got to 3400
got to 3500
got to 3600
got to 3700
got to 3800
got to 3900
got to 4000
got to 4100
got to 4200
got to 4300
got to 4400
got to 4500
got to 4600
got to 4700
got to 4800
got to 4900
got to 5000
got to 5100
got to 5200
got to 5300
got to 5400
got to 5500
got to 5600
got to 5700
got to 5800
got to 5900
got to 6000
got to 6100
got to 6200
got to 6300
got to 6400
got to 6500
got to 6600
got to 6700
got to 6800
got to 6900
got to 7000
got to 7100
got to 7200
got to 7300
got to 7400
got to 7500
got to 7600
got to 7700
got to 7800
got to 7900
got to 8000
got to 8100
got to 8200

In [66]:
print('evaluating original...')
original_accuracy = evaluate_model(original_interpreter)
#print('evaluating q aware...')
#q_aware_accuracy = evaluate_model(q_aware_interpreter)
#print('evaluating q8...')
#q8_accuracy = evaluate_model(q8_interpreter)
print('evaluating q16...')
q16_accuracy = evaluate_model(q16_interpreter)
print('evaluating distilled...')
distiller_accuracy = evaluate_model(distiller_interpreter)
print('evaluating qd...')
qd_accuracy = evaluate_model(qd_interpreter)
print('evaluating dq...')
dq_accuracy = evaluate_model(dq_interpreter)
print('evaluating dq 8...')
dq_8_accuracy = evaluate_model(dq_8_interpreter)
print('evaluating dq 16...')
dq_16_accuracy = evaluate_model(dq_16_interpreter)

evaluating original...
got to 0
got to 100
got to 200
got to 300
got to 400
got to 500
got to 600
got to 700
got to 800
got to 900
got to 1000
got to 1100
got to 1200
got to 1300
got to 1400
got to 1500
got to 1600
got to 1700
got to 1800
got to 1900
got to 2000
got to 2100
got to 2200
got to 2300
got to 2400
got to 2500
got to 2600
got to 2700
got to 2800
got to 2900
got to 3000
got to 3100
got to 3200
got to 3300
got to 3400
got to 3500
got to 3600
got to 3700
got to 3800
got to 3900
got to 4000
got to 4100
got to 4200
got to 4300
got to 4400
got to 4500
got to 4600
got to 4700
got to 4800
got to 4900
got to 5000
got to 5100
got to 5200
got to 5300
got to 5400
got to 5500
got to 5600
got to 5700
got to 5800
got to 5900
got to 6000
got to 6100
got to 6200
got to 6300
got to 6400
got to 6500
got to 6600
got to 6700
got to 6800
got to 6900
got to 7000
got to 7100
got to 7200
got to 7300
got to 7400
got to 7500
got to 7600
got to 7700
got to 7800
got to 7900
got to 8000
got to 8100
got t

got to 8700
got to 8800
got to 8900
got to 9000
got to 9100
got to 9200
got to 9300
got to 9400
got to 9500
got to 9600
got to 9700
got to 9800
got to 9900


In [69]:
print('original accuracy ', original_accuracy)
print('q aware accuracy ', q_aware_accuracy)
print('q8 accuracy ', q8_accuracy)
print('q16 accuracy ', q16_accuracy)
print('distiller accuracy ', distiller_accuracy)
print('qd accuracy ', qd_accuracy)
print('dq accuracy ', dq_accuracy)
print('dq 8 accuracy ', dq_8_accuracy)
print('dq 16 accuracy ', dq_16_accuracy)


original accuracy  0.9774
q aware accuracy  0.9812
q8 accuracy  0.9775
q16 accuracy  0.9774
distiller accuracy  0.9761
qd accuracy  0.9761
dq accuracy  0.9755
dq 8 accuracy  0.9762
dq 16 accuracy  0.9761
