In [1]:
from typing import Tuple, List

import tensorflow as tf
from tensorflow import keras

import numpy as np

## SSN

In [2]:
class SubSpectralNorm(keras.Model):
    def __init__(self, num_features, spec_groups = 16, affine = 'Sub', batch = True):
        super().__init__()
        self.spec_groups = spec_groups
        self.affine_all = False
        if affine == 'Sub':
            affine_norm = True
        else:
            assert affine == 'Sub', "Haven't supported yet"

        if batch:
            self.ssnorm = keras.layers.BatchNormalization(axis = -1)
        else:
            self.ssnorm = keras.layers.InstanceNormalization(axis = [0, 1])

    def call(self, x):
        """
        x: (N, F, T, C)
        """
        b, h, w, c = x.shape
        x = tf.transpose(x, perm = [0, 3, 1, 2])
        #b, c, h, w = x.shape
        x = tf.reshape(x, shape = [b, c*self.spec_groups, h//self.spec_groups, w])
        x = tf.transpose(x, perm = [0, 2, 3, 1])
        x = self.ssnorm(x)
        x = tf.transpose(x, perm = [0, 3, 1, 2])
        x = tf.reshape(x, shape = [b, c, h, w])
        x = tf.transpose(x, perm = [0, 2, 3, 1])
        return x

In [None]:
x = np.random.rand(128, 40, 101, 3)
ssn = SubSpectralNorm(3, spec_groups = 5, affine = 'Sub')
ssn(x).shape

TensorShape([128, 40, 101, 3])

In [None]:
x_ = tf.transpose(x, perm = [0, 3, 1, 2])
x_.shape

TensorShape([100, 1, 40, 101])

## BCResNet

### BaseBlock

In [None]:
def get_padding(kernel_size: Tuple[int, int],
                dilation: int):
    kh, kw = kernel_size
    ph = (kh-1)*dilation//2
    pw = (kw-1)*dilation//2
    return (ph, pw)

In [None]:
x = np.random.rand(128, 40, 101, 3)
conv2d = keras.layers.Conv2D(filters = 3,
                             kernel_size = (1, 3),
                             strides = (1, 1),
                             padding = "same",
                             groups = 3,
                             dilation_rate = (1, 2),
                             use_bias = False)
conv2d(x).shape

TensorShape([128, 40, 101, 3])

In [None]:
x.mean(axis = 1, keepdims = True).shape

(128, 1, 101, 3)

In [3]:
class AdaptiveAvgPool2d(keras.Model):
    def __init__(self,
                 axis: list,
                 keepdims:bool = True) -> None:
        super().__init__()
        self.axis = axis
        self.keepdims = keepdims

    def call(self, input):
        return tf.reduce_mean(input, axis = self.axis, keepdims = self.keepdims)

In [None]:
adpavgpool = AdaptiveAvgPool2d(axis = [1, 2])
adpavgpool(x).shape

TensorShape([128, 1, 1, 3])

In [None]:
silu = keras.layers.Activation('swish')
silu(x)

In [4]:
class BaseBlock(keras.Model):
    def __init__(self,
                 in_channels: int,
                 out_channels: int,
                 kernel_size: Tuple[int, int],
                 stride: Tuple[int, int],
                 dilation: Tuple[int, int],
                 bias: bool = True,
                 ssn_kwargs: dict = None,
                 dropout: float = 0.1
                 ) -> None:
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        f_kernel = (kernel_size[0], 1)
        t_kernel = (1, kernel_size[1])
        f_stride = (stride[0], 1)
        t_stride = (1, stride[1])
        f_dilation = (dilation[0], 1)
        t_dilation = (1, dilation[1])

        # Freq_conv
        f2 = []
        if in_channels != out_channels:
            f2.extend([keras.layers.Conv2D(filters = out_channels,
                                           kernel_size = 1,
                                           use_bias = bias),
                        keras.layers.BatchNormalization(axis = -1),
                        keras.layers.ReLU()])


        self.f2 = keras.Sequential([*f2,
                                keras.layers.Conv2D(filters = out_channels,
                                                    kernel_size = f_kernel,
                                                    strides = f_stride,
                                                    padding = 'same',
                                                    dilation_rate = f_dilation,
                                                    groups = out_channels,
                                                    use_bias = bias
                                          ),
                                SubSpectralNorm(out_channels, **ssn_kwargs)
                                if ssn_kwargs else keras.layers.BatchNormalization(axis = -1)])

        # Temporal_conv
        self.f1 = keras.Sequential([AdaptiveAvgPool2d(axis = 1, keepdims = True),
                                keras.layers.Conv2D(filters = out_channels,
                                                    kernel_size = t_kernel,
                                                    strides = t_stride,
                                                    dilation_rate = t_dilation,
                                                    padding = 'same',
                                                    groups = out_channels,
                                                    use_bias = bias,
                                                    activation = 'swish'),
                                keras.layers.BatchNormalization(axis = -1),
                                keras.layers.Activation("swish"),
                                keras.layers.Conv2D(filters = out_channels,
                                                    kernel_size = 1,
                                                    use_bias = bias),
                                keras.layers.Dropout(dropout)])
        self.relu = keras.layers.ReLU()

    def call(self,
             input: tf.Tensor) -> tf.Tensor:
        auxiliary_x = self.f2(input)
        output = self.f1(auxiliary_x)
        if self.in_channels == self.out_channels:
            output = output + auxiliary_x + input
        else:
            output = output + auxiliary_x
        return self.relu(output)

In [None]:
input = np.random.rand(128, 20, 101, 8)
bcblock = BaseBlock(8,
                    12,
                    kernel_size = (3, 3),
                    stride = (2, 1),
                    dilation = (1, 2),
                    bias = False,
                    ssn_kwargs = {
                        'spec_groups': 5
                    })
bcblock(input).shape

TensorShape([128, 10, 101, 12])

## BC-ResBlock

In [49]:
class BCResBlock(keras.Model):
    def __init__(self,
                 in_channels: int,
                 out_channels: int,
                 kernel_size: Tuple[int, int],
                 stride: Tuple[int, int],
                 bias: bool = True,
                 ssn_kwargs: dict = None,
                 dropout: float = 0.1,
                 num_blks: int = 1,
                 idx: int = 0
                 ) -> None:
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.num_blks = num_blks

        blks = []
        for i in range(num_blks):
            blks.append(BaseBlock(in_channels if i==0 else out_channels,
                                  out_channels,
                                  kernel_size,
                                  stride if i == 0 else (1, 1),
                                  # dilation = (1, 2**idx),
                                  dilation = (1, 1),
                                  bias = False,
                                  ssn_kwargs = ssn_kwargs))
        self.blks = keras.Sequential(blks)

    def call(self,
                input: tf.Tensor) -> tf.Tensor:
        return self.blks(input)

In [None]:
input = np.random.rand(128, 20, 101, 8)
bcblock = BCResBlock(8,
                    12,
                    kernel_size = (3, 3),
                    stride = (2, 1),
                    bias = False,
                    ssn_kwargs = {
                        'spec_groups': 5
                    })
bcblock(input).shape

TensorShape([128, 10, 101, 12])

## BC-ResNet

In [50]:
class BCResNet(keras.Model):
    def __init__(self,
                 in_channels: int,
                 num_classes: int,
                 bias: bool = False,
                 mul_factor: int = 1) -> None:
        super().__init__()
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.mul_factor = mul_factor
        self.net = keras.Sequential([keras.layers.Conv2D(filters = int(16*mul_factor),
                                                     kernel_size = 5,
                                                     padding = 'same',
                                                     strides = (2, 1),
                                                     use_bias = bias),
                                 keras.layers.BatchNormalization(axis = -1),
                                 keras.layers.ReLU(),
                                 BCResBlock(int(16*mul_factor),
                                            int(8*mul_factor),
                                            kernel_size = (3, 3),
                                            stride = (1, 1),
                                            bias = bias,
                                            #ssn_kwargs = {
                                            #    'spec_groups': 5
                                            #},
                                            num_blks = 2,
                                            idx = 0),
                                 BCResBlock(int(8*mul_factor),
                                            int(12*mul_factor),
                                            kernel_size = (3, 3),
                                            stride = (1, 1),
                                            bias = bias,
                                            #ssn_kwargs = {
                                            #    'spec_groups': 5
                                            #},
                                            num_blks = 2,
                                            idx = 1),
                                 BCResBlock(int(12*mul_factor),
                                            int(16*mul_factor),
                                            kernel_size = (3, 3),
                                            stride = (1, 1),
                                            bias = bias,
                                            #ssn_kwargs = {
                                            #    'spec_groups': 5
                                            #},
                                            num_blks = 4,
                                            idx = 2),
                                 BCResBlock(int(16*mul_factor),
                                            int(20*mul_factor),
                                            kernel_size = (3, 3),
                                            stride = (1, 1),
                                            bias = False,
                                            #ssn_kwargs = {
                                            #    'spec_groups': 5
                                            #},
                                            num_blks = 4,
                                            idx = 3),
                                 keras.layers.Conv2D(filters = int(20*mul_factor),
                                                     kernel_size = 5,
                                                     padding = 'same',
                                                     use_bias  = bias,
                                                     groups = int(20*mul_factor)),
                                 keras.layers.Conv2D(filters = int(32*mul_factor),
                                                     kernel_size = 1,
                                                     use_bias = bias),
                                 keras.layers.BatchNormalization(axis = -1),
                                 keras.layers.ReLU(),
                                 AdaptiveAvgPool2d([1, 2]),
                                 keras.layers.Conv2D(filters = num_classes,
                                           kernel_size = 1)]
                                 )
    def call(self, input: tf.Tensor) -> tf.Tensor:
        x = self.net(input)
        return tf.squeeze(x)

In [51]:
#input = np.random.rand(128, 40, 101, 1)
model = BCResNet(1,
                   12,
                   False,
                    3)
#model(input).shape

In [52]:
model.build(input_shape = (1, 40, 101, 1))
model.summary()

Model: "bc_res_net_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_115 (Sequential  (1, 1, 1, 12)             52584     
 )                                                               
                                                                 
Total params: 52584 (205.41 KB)
Trainable params: 49752 (194.34 KB)
Non-trainable params: 2832 (11.06 KB)
_________________________________________________________________


## Try to convert to TFLite

In [3]:
import sys

LIB_PATH = '/content/drive/MyDrive/GSC/GSC_helper'
sys.path.append(LIB_PATH)

from utils import unzipzip, zipzip
from GSC import download_GSC
from GSC12 import SpeechCommands12

In [4]:
ZIP_MAP = download_GSC('https://drive.google.com/file/d/1-8quY_z264H0kgWqbrQPWQIvuAfioBLh/view?usp=drive_link',
                       'https://drive.google.com/file/d/1-B1vD4fz2kZR9It2xBmq1PJ0afHnaJVg/view?usp=drive_link',
                       'https://drive.google.com/file/d/1-CFA8mlCdT4lgnIuX4-S_Vs0ATtRBbSo/view?usp=drive_link',
                       '/content/GSC_12',
                       end = '.zip')
CSV_MAP = download_GSC('https://drive.google.com/file/d/1-DYeiWPis6npYe8Z22Sa38nB2RdokMWH/view?usp=drive_link',
                       'https://drive.google.com/file/d/1-FB8YPbdvL2Vrhur94nWOGHL2M7RwzW0/view?usp=drive_link',
                       'https://drive.google.com/file/d/1-H7ZkCrzEl9VxfdOuH5YmCNSC6YP8CFw/view?usp=drive_link',
                       '/content/GSC_12',
                       end = '.csv')

Downloading...
From (original): https://drive.google.com/uc?id=1-8quY_z264H0kgWqbrQPWQIvuAfioBLh
From (redirected): https://drive.google.com/uc?id=1-8quY_z264H0kgWqbrQPWQIvuAfioBLh&confirm=t&uuid=d0737887-c2d6-4a01-86f8-11f858c9a14a
To: /content/GSC_12/train.zip
100%|██████████| 2.18G/2.18G [00:29<00:00, 74.8MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1-B1vD4fz2kZR9It2xBmq1PJ0afHnaJVg
From (redirected): https://drive.google.com/uc?id=1-B1vD4fz2kZR9It2xBmq1PJ0afHnaJVg&confirm=t&uuid=b5def961-6be0-4467-b715-39080b2b842b
To: /content/GSC_12/val.zip
100%|██████████| 64.6M/64.6M [00:00<00:00, 89.8MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1-CFA8mlCdT4lgnIuX4-S_Vs0ATtRBbSo
From (redirected): https://drive.google.com/uc?id=1-CFA8mlCdT4lgnIuX4-S_Vs0ATtRBbSo&confirm=t&uuid=c66eca0c-d8ab-421a-aa48-9e68a70bbc05
To: /content/GSC_12/test.zip
100%|██████████| 65.7M/65.7M [00:00<00:00, 123MB/s]
Downloading...
From: https://drive.google.com/uc?id=1-DY

In [5]:
import pandas as pd
import os

class SC_12(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self,
                 root: str,
                 zip_map: dict,
                 csv_map: dict,
                 unzip: bool = True,
                 subset: str = 'train',
                 batch_size: int = 32,
                 shuffle: bool = True):
        'Initialization'
        super().__init__()
        local_path = os.path.join(root, subset)
        self.root = root
        if not os.path.exists(local_path):
            os.mkdir(local_path)
            unzipzip(zip_map[subset], local_path)
        if unzip:
            unzipzip(zip_map[subset], local_path)
        self.csv = pd.read_csv(csv_map[subset])
        self.subset = subset
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the numbber of batches per epoch'
        return int(np.floor(len(self.csv)/self.batch_size))

    def __getitem__(self, index):
        'Generate on batch of data'
        # Generate indexes of the batcch
        indexes = self.indexes[index*self.batch_size: (index+1)*self.batch_size]

        # Generate data
        X, y = self.__data_generation(indexes)

        return X, y

    def on_epoch_end(self):
        'Undates indexes after each epoch'
        self.indexes = np.arange(len(self.csv))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, indexes):
        'Generates data containing batch_size samples' # X: (n_samples, *dim, n_channels)
        # Initialization
        X = []
        y = []

        # Generate data
        for i in indexes:
            # Store sample
            row = self.csv.iloc[i]
            X.append(np.load(os.path.join(self.root, row['link']))['arr_0'])

            # Store class
            y.append(row['label'])
        X = np.stack(X, axis = 0)
        y = np.stack(y, axis = 0)
        return tf.convert_to_tensor(X)[..., tf.newaxis], tf.convert_to_tensor(y)

In [6]:
train_dataloader = SC_12('/content/GSC_12', ZIP_MAP, CSV_MAP, unzip = False, subset = 'train', batch_size = 128, shuffle = True)
val_dataloader = SC_12('/content/GSC_12', ZIP_MAP, CSV_MAP, unzip = False, subset = 'val', batch_size = 128, shuffle = False)
test_dataloader = SC_12('/content/GSC_12', ZIP_MAP, CSV_MAP, unzip = False, subset = 'test', batch_size = 10, shuffle = False)

Extracted /content/GSC_12/train.zip
Extracted /content/GSC_12/val.zip
Extracted /content/GSC_12/test.zip


In [None]:
x, y = next(iter(val_dataloader))
x.shape

TensorShape([128, 40, 101, 1])

In [None]:
model(x).shape

TensorShape([100, 12])

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy'],
)

model.fit(val_dataloader, validation_data = test_dataloader, epochs = 5)

In [53]:
loss_object = keras.losses.SparseCategoricalCrossentropy(from_logits = True)
optimizer = keras.optimizers.Adam()

In [54]:
train_loss = keras.metrics.Mean(name = 'train_loss')
train_acc = keras.metrics.SparseCategoricalAccuracy(name = 'train_acc')

test_loss = keras.metrics.Mean(name = 'test_loss')
test_acc = keras.metrics.SparseCategoricalAccuracy(name = 'test_acc')

In [55]:
@tf.function
def train_step(images, labels):
  with tf.GradientTape() as tape:
    # training=True is only needed if there are layers with different
    # behavior during training versus inference (e.g. Dropout).
    predictions = model(images, training=True)
    #loss_ = keras.losses.categorical_crossentropy(labels, predictions, from_logits = True)
    loss_ = loss_object(labels, predictions)
  gradients = tape.gradient(loss_, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))

  train_loss(loss_)
  train_acc(labels, predictions)

@tf.function
def test_step(images, labels):
    # training = False is only needed if there are layers with different
    # behavior during training versus inference (e.g. Dropout)
    predictions = model(images, training = False)
    #loss_ = keras.losses.categorical_crossentropy(labels, predictions, from_logits = True)
    loss_ = loss_object(labels, predictions)

    test_loss(loss_)
    test_acc(labels, predictions)

In [56]:
from tqdm import tqdm

EPOCHS = 1

for epoch in range(EPOCHS):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_state()
    train_acc.reset_state()
    test_loss.reset_state()
    test_acc.reset_state()

    for images, labels in tqdm(train_dataloader):
        train_step(images, labels)

    for test_images, test_labels in tqdm(test_dataloader):
        test_step(test_images, test_labels)

    print(
        f'Epoch {epoch + 1}, '
        f'Loss: {train_loss.result():0.2f}, '
        f'Accuracy: {train_acc.result() * 100:0.2f}, '
        f'Test Loss: {test_loss.result():0.2f}, '
        f'Test Accuracy: {test_acc.result() * 100:0.2f}'
    )

100%|██████████| 1153/1153 [05:33<00:00,  3.46it/s]
100%|██████████| 489/489 [00:10<00:00, 44.74it/s]

Epoch 1, Loss: 1.18, Accuracy: 60.71, Test Loss: 1.33, Test Accuracy: 68.02





In [7]:
from tqdm import tqdm

test_specs = []
test_labels = []

for i in tqdm(range(len(test_dataloader.csv))):
    row = test_dataloader.csv.iloc[i]
    spec = np.load(os.path.join('/content/GSC_12', row['link']))['arr_0']
    test_specs.append(spec)
    test_labels.append(row['label'])

test_specs = tf.convert_to_tensor(np.stack(test_specs, axis = 0))[..., tf.newaxis]
test_labels = tf.convert_to_tensor(np.stack(test_labels))

100%|██████████| 4890/4890 [00:05<00:00, 977.22it/s] 


In [None]:
model.save('bcresnet3_tf.keras')

In [60]:
def representative_data_gen():
    for input_value in tf.data.Dataset.from_tensor_slices(test_specs).batch(1).take(100):
        yield [input_value]

#model._set_inputs(tf.random.uniform((1, 40, 101, 1)))
model.trainable = True
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
# Ensure that if any ops can't be quantized, the converter throws an error
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
# Set the input and output tensors to uint 8 (APIs added in r2.3)
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8

tflite_model_quant = converter.convert()



In [None]:
model.summary()

Model: "bc_res_net"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_28 (Sequential)  (1, 1, 1, 12)             61416     
                                                                 
Total params: 61416 (239.91 KB)
Trainable params: 0 (0.00 Byte)
Non-trainable params: 61416 (239.91 KB)
_________________________________________________________________


In [61]:
import pathlib

tflite_models_dir = pathlib.Path("/tmp/mnist_tflite_models/")
tflite_models_dir.mkdir(exist_ok=True, parents=True)

# Save the dynamic range quantized model:
tflite_model_quant_file = tflite_models_dir/"mnist_model_quant3.tflite"
tflite_model_quant_file.write_bytes(tflite_model_quant)

166904

In [62]:
import shutil
shutil.copy2(str(tflite_model_quant_file), '/content')

'/content/mnist_model_quant3.tflite'

In [44]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

In [45]:
tflite_model_file = tflite_models_dir/"gsc_model.tflite"
tflite_model_file.write_bytes(tflite_model)

254448

In [8]:
# Helper function to run inference on a TFLite model
def run_tflite_model(tflite_file, test_image_indices):
    global test_specs

    # Initialize the interpreter
    interpreter = tf.lite.Interpreter(model_path = str(tflite_file))
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()[0]
    output_details = interpreter.get_output_details()[0]

    predictions = np.zeros((len(test_image_indices), ), dtype = int)
    for i, test_image_index in enumerate(test_image_indices):
        test_image = test_specs[test_image_index]

        # Check if the input type is quantized, the rescale input data to to uint8
        if input_details['dtype'] == np.uint8:
            input_scale, input_zero_point = input_details['quantization']
            test_image = test_image/input_scale + input_zero_point

        test_image = np.expand_dims(test_image, axis = 0).astype(input_details['dtype'])
        interpreter.set_tensor(input_details['index'], test_image)
        interpreter.invoke()
        output = interpreter.get_tensor(output_details['index'])[0]

        predictions[i] = output.argmax()

    return predictions

# Helper function to evaluate a TFLite model on all images
def evaluate_model(tflite_file, model_type):
  global test_specs
  global test_labels

  test_image_indices = range(test_specs.shape[0])
  predictions = run_tflite_model(tflite_file, test_image_indices)

  accuracy = (np.sum(test_labels== predictions) * 100) / len(test_specs)

  print('%s model accuracy is %.4f%% (Number of test samples=%d)' % (
      model_type, accuracy, len(test_specs)))

In [9]:
evaluate_model('/content/mnist_model_quant (1).tflite', model_type="Quantized")

Quantized model accuracy is 8.3436% (Number of test samples=4890)


In [10]:
evaluate_model('/content/mnist_model_quant2.tflite', model_type="Quantized")

Quantized model accuracy is 8.3436% (Number of test samples=4890)


In [11]:
evaluate_model('/content/mnist_model_quant3.tflite', model_type="Quantized")

Quantized model accuracy is 8.3436% (Number of test samples=4890)


In [33]:
interpreter = tf.lite.Interpreter(model_path = str(tflite_model_quant_file))
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()[0]
output_details = interpreter.get_output_details()[0]
print(input_details)
print(output_details)

{'name': 'serving_default_input_1:0', 'index': 0, 'shape': array([  1,  40, 101,   1], dtype=int32), 'shape_signature': array([ -1,  40, 101,   1], dtype=int32), 'dtype': <class 'numpy.uint8'>, 'quantization': (0.5479903817176819, 182), 'quantization_parameters': {'scales': array([0.5479904], dtype=float32), 'zero_points': array([182], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}
{'name': 'StatefulPartitionedCall:0', 'index': 293, 'shape': array([12], dtype=int32), 'shape_signature': array([12], dtype=int32), 'dtype': <class 'numpy.uint8'>, 'quantization': (0.1329231858253479, 128), 'quantization_parameters': {'scales': array([0.13292319], dtype=float32), 'zero_points': array([128], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}


In [34]:
evaluate_model(tflite_model_file, model_type="Float32")

Float32 model accuracy is 8.3436% (Number of test samples=4890)
