In [1]:
def add_to_class(Class):
    def wrapper(obj):
        setattr(Class, obj.__name__, obj)
    return wrapper

In [2]:
import tensorflow as tf
from tensorflow import keras

import numpy as np
import matplotlib.pyplot as plt

import os
import time
import IPython.display as ipd

In [3]:
import sys
LIB_PATH = '/content/drive/MyDrive/GSC/GSC_helper'

sys.path.append(LIB_PATH)
#from BCResNet import BCResNet
from utils import unzipzip, zipzip
from GSC import download_GSC
from GSC12 import SpeechCommands12

## Download data

In [4]:
ZIP_MAP = download_GSC('https://drive.google.com/file/d/1-8quY_z264H0kgWqbrQPWQIvuAfioBLh/view?usp=drive_link',
                       'https://drive.google.com/file/d/1-B1vD4fz2kZR9It2xBmq1PJ0afHnaJVg/view?usp=drive_link',
                       'https://drive.google.com/file/d/1-CFA8mlCdT4lgnIuX4-S_Vs0ATtRBbSo/view?usp=drive_link',
                       '/content/GSC_12',
                       end = '.zip')
CSV_MAP = download_GSC('https://drive.google.com/file/d/1-DYeiWPis6npYe8Z22Sa38nB2RdokMWH/view?usp=drive_link',
                       'https://drive.google.com/file/d/1-FB8YPbdvL2Vrhur94nWOGHL2M7RwzW0/view?usp=drive_link',
                       'https://drive.google.com/file/d/1-H7ZkCrzEl9VxfdOuH5YmCNSC6YP8CFw/view?usp=drive_link',
                       '/content/GSC_12',
                       end = '.csv')

Downloading...
From (original): https://drive.google.com/uc?id=1-8quY_z264H0kgWqbrQPWQIvuAfioBLh
From (redirected): https://drive.google.com/uc?id=1-8quY_z264H0kgWqbrQPWQIvuAfioBLh&confirm=t&uuid=de8bcc82-2302-4460-98fa-57a24bb99afb
To: /content/GSC_12/train.zip
100%|██████████| 2.18G/2.18G [00:22<00:00, 98.5MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1-B1vD4fz2kZR9It2xBmq1PJ0afHnaJVg
From (redirected): https://drive.google.com/uc?id=1-B1vD4fz2kZR9It2xBmq1PJ0afHnaJVg&confirm=t&uuid=1ce1255c-4d69-4470-bb49-0c0a6282e9d1
To: /content/GSC_12/val.zip
100%|██████████| 64.6M/64.6M [00:00<00:00, 148MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1-CFA8mlCdT4lgnIuX4-S_Vs0ATtRBbSo
From (redirected): https://drive.google.com/uc?id=1-CFA8mlCdT4lgnIuX4-S_Vs0ATtRBbSo&confirm=t&uuid=d50669b2-309b-43c9-9df5-35b3ccb49cbd
To: /content/GSC_12/test.zip
100%|██████████| 65.7M/65.7M [00:00<00:00, 138MB/s]
Downloading...
From: https://drive.google.com/uc?id=1-DYe

## Data generator

### Sample Code

In [6]:
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels, batch_size = 32, dim = (32, 32, 32),
                 n_channels = 1, n_classes = 10, shuffle = True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes

    def __len__(self):
        'Denotes the numbber of batches per epoch'
        return int(np.floor(len(self.list_IDs)/self.batch_size))

    def __getitem__(self, index):
        'Generate on batch of data'
        # Generate indexes of the batcch
        indexes = self.indexes[index*self.batch_size: (index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Undates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X: (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype = int)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            X[i,] = np.load('data/' + ID + '.npy')

            # Store class
            y[i] = self.labels[ID]

        return X, y

In [7]:
!pip install tensorflow-io

Collecting tensorflow-io
  Downloading tensorflow_io-0.36.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (49.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 MB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorflow-io
Successfully installed tensorflow-io-0.36.0


In [8]:
import tensorflow_io as tfio

In [5]:
import pandas as pd

class SC_12(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self,
                 root: str,
                 zip_map: dict,
                 csv_map: dict,
                 unzip: bool = True,
                 subset: str = 'train',
                 batch_size: int = 32,
                 shuffle: bool = True):
        'Initialization'
        super().__init__()
        local_path = os.path.join(root, subset)
        self.root = root
        if not os.path.exists(local_path):
            os.mkdir(local_path)
            unzipzip(zip_map[subset], local_path)
        if unzip:
            unzipzip(zip_map[subset], local_path)
        self.csv = pd.read_csv(csv_map[subset])
        self.subset = subset
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the numbber of batches per epoch'
        return int(np.floor(len(self.csv)/self.batch_size))

    def __getitem__(self, index):
        'Generate on batch of data'
        # Generate indexes of the batcch
        indexes = self.indexes[index*self.batch_size: (index+1)*self.batch_size]

        # Generate data
        X, y = self.__data_generation(indexes)

        return X, y

    def on_epoch_end(self):
        'Undates indexes after each epoch'
        self.indexes = np.arange(len(self.csv))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, indexes):
        'Generates data containing batch_size samples' # X: (n_samples, *dim, n_channels)
        # Initialization
        X = []
        y = []

        # Generate data
        for i in indexes:
            # Store sample
            row = self.csv.iloc[i]
            X.append(np.load(os.path.join(self.root, row['link']))['arr_0'])

            # Store class
            y.append(row['label'])
        X = np.stack(X, axis = 0)
        y = np.stack(y, axis = 0)
        return tf.convert_to_tensor(X)[..., tf.newaxis], tf.convert_to_tensor(y)

In [6]:
train_dataloader = SC_12('/content/GSC_12', ZIP_MAP, CSV_MAP, unzip = False, subset = 'train', batch_size = 100, shuffle = True)
val_dataloader = SC_12('/content/GSC_12', ZIP_MAP, CSV_MAP, unzip = False, subset = 'val', batch_size = 100, shuffle = False)
test_dataloader = SC_12('/content/GSC_12', ZIP_MAP, CSV_MAP, unzip = False, subset = 'test', batch_size = 10, shuffle = False)

Extracted /content/GSC_12/train.zip
Extracted /content/GSC_12/val.zip
Extracted /content/GSC_12/test.zip


## Toy model

In [7]:
model = keras.models.Sequential([
    keras.layers.Input(shape = (40, 101, 1)),
    keras.layers.Conv2D(32, 3, activation = 'relu'),
    keras.layers.Conv2D(64, 3, activation = 'relu'),
    keras.layers.MaxPooling2D(),
    keras.layers.Dropout(0.25),
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation = 'relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(12)
])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 38, 99, 32)        320       
                                                                 
 conv2d_1 (Conv2D)           (None, 36, 97, 64)        18496     
                                                                 
 max_pooling2d (MaxPooling2  (None, 18, 48, 64)        0         
 D)                                                              
                                                                 
 dropout (Dropout)           (None, 18, 48, 64)        0         
                                                                 
 flatten (Flatten)           (None, 55296)             0         
                                                                 
 dense (Dense)               (None, 128)               7078016   
                                                        

In [12]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy'],
)

model.fit(train_dataloader, validation_data = val_dataloader, epochs = 5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7b81a8535c00>

In [14]:
model.evaluate(test_dataloader)



[0.4233097732067108, 0.8740286231040955]

## Test Convert to TFLite

In [9]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy'],
)

model.fit(val_dataloader, validation_data = test_dataloader, epochs = 1)



<keras.src.callbacks.History at 0x79d113ca4f10>

In [15]:
from tqdm import tqdm

test_specs = []
test_labels = []

for i in tqdm(range(len(test_dataloader.csv))):
    row = test_dataloader.csv.iloc[i]
    spec = np.load(os.path.join('/content/GSC_12', row['link']))['arr_0']
    test_specs.append(spec)
    test_labels.append(row['label'])

test_specs = tf.convert_to_tensor(np.stack(test_specs, axis = 0))[..., tf.newaxis]
test_labels = tf.convert_to_tensor(np.stack(test_labels))

100%|██████████| 4890/4890 [00:03<00:00, 1553.52it/s]


In [17]:
def representative_data_gen():
    for input_value in tf.data.Dataset.from_tensor_slices(test_specs).batch(1).take(100):
        yield [input_value]

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
# Ensure that if any ops can't be quantized, the converter throws an error
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
# Set the input and output tensors to uint 8 (APIs added in r2.3)
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8

tflite_model_quant = converter.convert()



In [18]:
import pathlib

tflite_models_dir = pathlib.Path("/tmp/mnist_tflite_models/")
tflite_models_dir.mkdir(exist_ok=True, parents=True)

# Save the dynamic range quantized model:
tflite_model_quant_file = tflite_models_dir/"mnist_model_quant.tflite"
tflite_model_quant_file.write_bytes(tflite_model_quant)

7105224

In [20]:
# Helper function to run inference on a TFLite model
def run_tflite_model(tflite_file, test_image_indices):
    global test_specs

    # Initialize the interpreter
    interpreter = tf.lite.Interpreter(model_path = str(tflite_file))
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()[0]
    output_details = interpreter.get_output_details()[0]

    predictions = np.zeros((len(test_image_indices), ), dtype = int)
    for i, test_image_index in enumerate(test_image_indices):
        test_image = test_specs[test_image_index]

        # Check if the input type is quantized, the rescale input data to to uint8
        if input_details['dtype'] == np.uint8:
            input_scale, input_zero_point = input_details['quantization']
            test_image = test_image/input_scale + input_zero_point

        test_image = np.expand_dims(test_image, axis = 0).astype(input_details['dtype'])
        interpreter.set_tensor(input_details['index'], test_image)
        interpreter.invoke()
        output = interpreter.get_tensor(output_details['index'])[0]

        predictions[i] = output.argmax()

    return predictions

# Helper function to evaluate a TFLite model on all images
def evaluate_model(tflite_file, model_type):
  global test_specs
  global test_labels

  test_image_indices = range(test_specs.shape[0])
  predictions = run_tflite_model(tflite_file, test_image_indices)

  accuracy = (np.sum(test_labels== predictions) * 100) / len(test_specs)

  print('%s model accuracy is %.4f%% (Number of test samples=%d)' % (
      model_type, accuracy, len(test_specs)))

In [21]:
evaluate_model(tflite_model_quant_file, model_type="Quantized")

Quantized model accuracy is 40.4908% (Number of test samples=4890)


In [26]:
interpreter = tf.lite.Interpreter(model_path = str(tflite_model_quant_file))
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()[0]
output_details = interpreter.get_output_details()[0]
print(input_details)
print(output_details)

{'name': 'serving_default_input_1:0', 'index': 0, 'shape': array([  1,  40, 101,   1], dtype=int32), 'shape_signature': array([ -1,  40, 101,   1], dtype=int32), 'dtype': <class 'numpy.uint8'>, 'quantization': (0.5479903817176819, 182), 'quantization_parameters': {'scales': array([0.5479904], dtype=float32), 'zero_points': array([182], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}
{'name': 'StatefulPartitionedCall:0', 'index': 17, 'shape': array([ 1, 12], dtype=int32), 'shape_signature': array([-1, 12], dtype=int32), 'dtype': <class 'numpy.uint8'>, 'quantization': (0.060032136738300323, 131), 'quantization_parameters': {'scales': array([0.06003214], dtype=float32), 'zero_points': array([131], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}
