[GPU supported operations](https://www.tensorflow.org/lite/performance/gpu)

In [None]:
!pip install tensorflow==2.10.0
!pip install coremltools
!pip install pytablewriter

In [None]:
#@markdown libs

import os
# stop tf warning logs
os.environ["KMP_SETTINGS"] = "false"
import coremltools as ct 

from google.colab import files

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import pickle
from google.colab import files

import numpy as np
from shutil import copyfile
from numpy.linalg import norm

import time

import json
from glob import glob

import sys
import time

import matplotlib.pyplot as plt
import random
from os.path import join
import json

from math import ceil

from datetime import datetime
import gdown


plt.rcParams['figure.figsize'] = [25, 4]

In [None]:
#@markdown funcs

def get_tflite_from_keras(keras_model, tflite_name='model.tflite', optimize_default=False):
    keras_model.save('model_di')
    converter_model_keras = tf.lite.TFLiteConverter.from_saved_model('model_di')
    if optimize_default==True:
        converter_model_keras.optimizations = [tf.lite.Optimize.DEFAULT]

    converter_model_keras = converter_model_keras.convert()
    with open(tflite_name, 'wb') as f:
        f.write(converter_model_keras)

    interpreter = tf.lite.Interpreter(model_path=tflite_name)
    interpreter.allocate_tensors()
    print(interpreter.get_input_details())
    print(interpreter.get_output_details())
    return interpreter

def convertKeras2MLModel(kerasModel):
    mlmodel = ct.convert(kerasModel)
    mlmodel.save(kerasModel.fileName+'.mlmodel')
    input = str(mlmodel.input_description)
    return input[len('Features('):-1]

In [None]:
#@title Models
from tensorflow.keras.layers import Dense, Conv2D, LSTM, MultiHeadAttention, LayerNormalization

#@markdown DenseModel

class DenseModel(keras.Model):
    def __init__(self, n_units=512, n_layers=10):
        super(DenseModel, self).__init__()
        self.ffn = keras.Sequential(
            [Dense(units=n_units, activation='relu', use_bias=False) for _ in range(n_layers)]
        ) 
        self.fileName = f'DenseModel_units_{n_units}_layers_{n_layers}_'

    def call(self, inputs):
        return self.ffn(inputs)

#@markdown ConvModel

class ConvModel(keras.Model):
    def __init__(self, filters=32, n_layers=10, kernel_size=3):
        super(ConvModel, self).__init__()
        self.seq = keras.Sequential(
            [Conv2D(filters=filters,
                        kernel_size=kernel_size,
                        activation='relu',
                        use_bias=False,
                        padding="same",) for _ in range(n_layers)]
        ) 
        self.fileName = f'Conv2D_filters_{filters}_kernel_{kernel_size}_layers_{n_layers}_'
    def call(self, inputs):
        return self.seq(inputs)


#@markdown Transformer


class TransformerDecoderLayer(tf.keras.layers.Layer):
    def __init__(self, embed_dim=512,
                 num_heads=6, feed_forward_dim=2048,
                 key_dim=64):
        super(TransformerDecoderLayer, self).__init__()
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm3 = layers.LayerNormalization(epsilon=1e-6)
        self.self_att = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=key_dim
        )
        self.enc_att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)
        self.ffn = keras.Sequential(
            [
                layers.Dense(feed_forward_dim, activation='relu', use_bias=False),
                layers.Dense(embed_dim, use_bias=False),
            ]
        )

    def call(self, enc_out, target):
        target_norm = self.layernorm1(target)
        target_att = self.self_att(target_norm, target_norm)
        target_norm = self.layernorm2(target + target_att)
        enc_out = self.enc_att(target_norm, enc_out)
        enc_out_norm = self.layernorm3(enc_out + target_norm)
        ffn_out = self.ffn(enc_out_norm)
        output = enc_out_norm + ffn_out
        return output
        
class TransformerEncoderLayer(tf.keras.layers.Layer):
    def __init__(self, embed_dim=512,
                 num_heads=6, feed_forward_dim=2048,
                 key_dim=64):
        super(TransformerEncoderLayer, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads,
                                             key_dim=key_dim)
        self.ffn = keras.Sequential(
            [
                Dense(feed_forward_dim, activation='relu', use_bias=False),
                Dense(embed_dim, use_bias=False),
            ]
        ) 
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)

    def call(self, inputs):
        attn_output = self.att(inputs, inputs)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        output=self.layernorm2(inputs + ffn_output)
        return output

class TransformerEncoder(keras.Model):
    def __init__(self, n_layers=8, embed_dim=512,
                 num_heads=6, feed_forward_dim=2048,
                 key_dim=64):
        super(TransformerEncoder, self).__init__()

        self.encoder = keras.Sequential(
            [TransformerEncoderLayer(embed_dim=embed_dim, num_heads=num_heads,
                                   feed_forward_dim=feed_forward_dim, key_dim=key_dim)
                for _ in range(n_layers)
            ]
        )
    def call(self, inputs):
        return self.encoder(inputs)

class Transformer(keras.Model):
    def __init__(
        self,
        embed_dim=512,
        num_heads=6,
        feed_forward_dim=2048,
        key_dim=64,
        num_layers_enc=1,
        num_layers_dec=1,
        num_classes=2,
    ):
        super().__init__()
        self.num_layers_dec=num_layers_dec
        self.encoder = TransformerEncoder(n_layers=num_layers_enc,
                                          embed_dim=embed_dim,
                                          num_heads=num_heads,
                                          feed_forward_dim=feed_forward_dim,
                                          key_dim=key_dim)

        for i in range(num_layers_dec):
            setattr(
                self,
                f"dec_layer_{i}",
                TransformerDecoderLayer(embed_dim=embed_dim,
                                        num_heads=num_heads,
                                        feed_forward_dim=feed_forward_dim,
                                        key_dim=key_dim),
                    )

        self.classifier = layers.Dense(num_classes)
        self.fileName = f'Transformer_embedDim_{embed_dim}_nHeads_{num_heads}_ffn_{feed_forward_dim}_keyDim_{key_dim}_layers_{num_layers_enc}_deLayers_{num_layers_dec}_'

    def decode(self, enc_out, target):
        for i in range(self.num_layers_dec):
            target = getattr(self, f"dec_layer_{i}")(enc_out, target)
        return target


    def call(self, inputs):
        x = self.encoder(inputs)
        y = self.decode(x, inputs)
        return self.classifier(y)


#@markdown Mel spectrogram

from librosa.filters import mel as mel_filter
from librosa.util import pad_center
from scipy.signal import get_window

def get_forward_basis(filter_length=2048, window='hann'):

    win_length = filter_length
    # window='hann' # blackmanharris hann
    fourier_basis = np.fft.fft(np.eye(filter_length))

    cutoff = int((filter_length / 2 + 1))
    fourier_basis = np.vstack([np.real(fourier_basis[:cutoff, :]),
                                np.imag(fourier_basis[:cutoff, :])])

    forward_basis = fourier_basis[:, None, :]
    forward_basis_c = forward_basis.copy()

    fft_window = get_window(window, win_length, fftbins=True, )
    fft_window = pad_center(fft_window, filter_length)
    forward_basis *= fft_window
    forward_basis = forward_basis.T.astype(np.float32)
    forward_basis = np.expand_dims(forward_basis, 0)
    forward_basis = tf.convert_to_tensor(forward_basis)

    return forward_basis

def get_mel_filter(sample_rate=16000, filter_length=2048, n_mels=229, mel_fmin=30, mel_fmax=8000, htk=True):

    mel_basis =  mel_filter(sample_rate, filter_length, n_mels, mel_fmin, mel_fmax, htk=htk)
    mel_basis = tf.convert_to_tensor(mel_basis)

    return mel_basis

class mel_tf_for_tflite(tf.keras.Model):
        
    def __init__(self, filters, mel_basis, hop_size=512, num_samples=1535):
        super(mel_tf_for_tflite, self).__init__()
        self.filters = filters
        self.mel_basis = mel_basis
        self.hop_size = hop_size
        self.num_samples = num_samples
        self.window_size =  filters.shape[1]
        ms = int((num_samples/16000) * 1000)
        self.fileName = f'MelSpecTF_{self.window_size}_{ms}_'

    def call(self, input_data):

        dim2 = int(self.num_samples/self.hop_size) + 1
        input_data = tf.pad(input_data, ([0, 0], [int(self.window_size / 2), int(self.window_size / 2)]), mode='REFLECT') # norm with torch zero
        input_data = tf.expand_dims(input_data, -1)
        input_data = tf.expand_dims(input_data, 1)
        forward_transform = tf.nn.conv2d(
        input_data, self.filters, self.hop_size, 'VALID')
        forward_transform = tf.squeeze(forward_transform)
        forward_transform = tf.transpose(forward_transform)
        forward_transform = tf.reshape(forward_transform, [1, self.window_size + 2, dim2])
        real_part = forward_transform[:, :(self.window_size+2) // 2, :]
        imag_part = forward_transform[:, (self.window_size+2) // 2:, :]
        magnitude = tf.math.sqrt(real_part**2 + imag_part**2)
        mel_output = tf.matmul(self.mel_basis, magnitude)
        mel_output = tf.math.log(tf.clip_by_value(mel_output, clip_value_min =1e-5 , clip_value_max=np.inf))

        mel_output = tf.transpose(mel_output)

        return mel_output

In [None]:
#@markdown test all model's classes
model = ConvModel(n_layers=12, filters=256, kernel_size=3)
print(model(np.random.random((1, 10, 512, 3)).astype(float)).shape, model.fileName)
model = DenseModel(n_layers=10, n_units=3)
print(model(np.random.random((1, 10, 512)).astype(float)).shape, model.fileName)
model = Transformer(embed_dim=512,
                    num_heads=6,
                    feed_forward_dim=2048,
                    key_dim=64,
                    num_layers_enc=1,
                    num_layers_dec=1,
                    num_classes=2,
                    )
print(model(np.random.random((1, 10, 512)).astype(float)).shape, model.fileName)
model = EncoderLstm(embed_dim=512,
                    num_heads=6,
                    feed_forward_dim=2048,
                    key_dim=64,
                    num_layers_enc=1,
                    n_lstm_units=512,
                    num_classes=2,
                    )
print(model(np.random.random((1, 10, 512)).astype(float)).shape, model.fileName)

filters = get_forward_basis(filter_length=4096)
mel_basis = get_mel_filter(n_mels=512, filter_length=4096)
mel_tf_model = mel_tf_for_tflite(filters,
                    mel_basis,
                    num_samples=512*32 - 1)

mel_tf_model(np.random.random((1, 512*32 - 1 ))).shape, mel_tf_model.fileName

In [None]:
# Create models

# Models parameters
Dense_models_config = [[6, 128], [3, 256]]
Conv_models_config = [[6, 128, 3], [3, 36, 12]]
Transformer_models_config = [[512, 6, 1024, 64, 8, 8, 512]]

Models_info = []
Input = np.ones((1, 10, 512, 3)).astype(float)

mlmodel_input_name='input_1'


for n_layers, filters, kernel_size in Conv_models_config:
    input = Input
    model = ConvModel(n_layers=n_layers, filters=filters, kernel_size=kernel_size)
    output = model(input).numpy()

    mlmodel_input_name = convertKeras2MLModel(model)
    get_tflite_from_keras(model, model.fileName+'.tflite')
    Models_info.append([model.fileName+'.tflite', model.fileName+'.mlmodel', mlmodel_input_name, input.shape, model.count_params(), output.sum()])
    del model



for embed_dim, num_heads, feed_forward_dim, key_dim, num_layers_enc, num_layers_dec, num_classes in Transformer_models_config:
    input = Input[:,:,:,0]
    model = model = Transformer(embed_dim=embed_dim,
                    num_heads=num_heads,
                    feed_forward_dim=feed_forward_dim,
                    key_dim=key_dim,
                    num_layers_enc=num_layers_enc,
                    num_layers_dec=num_layers_dec,
                    num_classes=num_classes,
                    )
    output = model(input).numpy()

    get_tflite_from_keras(model, model.fileName+'.tflite')
    mlmodel_input_name = convertKeras2MLModel(model)
    Models_info.append([model.fileName+'.tflite', model.fileName+'.mlmodel', mlmodel_input_name, input.shape, model.count_params(), output.sum()])
    del model

for n_layers, n_units in Dense_models_config:
    input = Input[:,:,:,0]
    model = DenseModel(n_layers=n_layers, n_units=n_units)
    output = model(input).numpy()

    mlmodel_input_name = convertKeras2MLModel(model)
    get_tflite_from_keras(model, model.fileName+'.tflite')
    Models_info.append([model.fileName+'.tflite', model.fileName+'.mlmodel', mlmodel_input_name, input.shape, model.count_params(), output.sum()])
    del model


# Mel spectro part
filters = get_forward_basis(filter_length=4096)
mel_basis = get_mel_filter(n_mels=512, filter_length=4096)
model = mel_tf_for_tflite(filters,
                    mel_basis,
                    num_samples=512*32 - 1)
mel_input = np.ones((1, 512*32 - 1 ))
output = model(mel_input).numpy()
mlmodel_input_name = convertKeras2MLModel(model)
get_tflite_from_keras(model, model.fileName+'.tflite')

Models_info.append([model.fileName+'.tflite', model.fileName+'.mlmodel', mlmodel_input_name, mel_input.shape, model.count_params(), output.sum()])

Models_info

In [None]:
Models_info_str = []
for i in Models_info:
    temp = []
    for j in i:
        temp.append(str(j))
    Models_info_str.append(temp)

np.savetxt('First_Batch_Models_Info.txt',np.array(Models_info_str).astype(str), fmt='%s', delimiter='<<<')

In [None]:
Models_info_str = np.loadtxt('First_Batch_Models_Info.txt', dtype=str, delimiter='<<<', )
Models_info_str[0]

In [None]:

#@markdown **swiftMLModelFuncString** is a template for a function that run (mlmodel) with Swift
swiftMLModelFuncString = '''
func run-MODELNAME-NTimes(input: MLMultiArray, n_rounds: Int = 10, ignoreFirst: Bool=true, mlConfig: MLModelConfiguration) -> String{
    
    var invokingDurations = [] as [Double]
    var firtInvokeDuration = 0.0
    var initDuration = 0.0
    let timer = Timer()
    var _sum = Float(0.0)
    
    do{
        /// Initialize model
        timer.reset()
        let mlModel = try -MODELNAME-(configuration: mlConfig)
        initDuration = timer.getDurationAndReset()
        
        /// Run same input n times
        for i in 0..<n_rounds{
            timer.reset()
            let output = try mlModel.prediction(input_1: input).Identity
            if i==0 {
                firtInvokeDuration=timer.getDuration()
                if ignoreFirst {
                    continue
                }
            }
            invokingDurations.append(timer.getDuration())
            _sum = sumMultiArray(array:output)
        }
    }catch {
        print("runCoreMLModelNTimes error", error)
    }
  
    let invokeAvgDuration = invokingDurations.avg()
    let invokeStdDuration = invokingDurations.std()
    
    return "-MODELNAME-, \(invokeAvgDuration), \(invokeStdDuration), \(firtInvokeDuration), \(initDuration), \(_sum)"
        
}
'''


#@markdown **swiftMLModelCallingString** template for the code to run the above mentioned template
swiftMLModelCallingString = '''

modelDurations = run-MODELNAME-NTimes(input: -MODELINPUT-, n_rounds: n_rounds, mlConfig:mlConfig)
lines.append(modelDurations+computeUnitsName)

'''

#@markdown -MODELNAME- should be replaced with the model name
#@markdown -MODELINPUT- should be replaced with the model input MLMultiArray with the same input shape within calling code

# swiftMLModelFuncString.replace("-MODELNAME-", 't')

In [None]:
#Get swift code for dealing with the created models
Swift_new_funcs_str = ''
Swift_call_str = ''
for item in Models_info:
    name = item[0].split('.')[0]
    if item[2]!='input_1':
        print('ERRORR ', item)
    
    Swift_new_funcs_str += '\n\n\n'
    Swift_new_funcs_str += swiftMLModelFuncString.replace("-MODELNAME-", name)

    Swift_call_str += swiftMLModelCallingString.replace("-MODELNAME-", name)

In [None]:
Swift_call_str

In [None]:
Swift_new_funcs_str

In [None]:
!rm First_Batch_of_models_speed_test_on_iOS.zip

In [None]:
!zip First_Batch_of_models_speed_test_on_iOS.zip *tflite *mlmodel

In [None]:
!mv First_Batch_of_models_speed_test_on_iOS.zip path/to/drive # faster to download from drive than from colab directly

In [None]:
data = np.loadtxt('CA1748D9-13CF-40EA-B048-B6CA584ADD5A.txt', delimiter=', ', dtype=str)

computeUnits_dic= {}
computeUnits_dic['MLComputeUnits.cpuOnly'] = 1
computeUnits_dic['MLComputeUnits.cpuAndGPU'] = 2
computeUnits_dic['MLComputeUnits.all'] = 3
computeUnits_dic['MLComputeUnits.cpuAndNeuralEngine'] = 4
computeUnits_dic['Tflite-CPU'] = 5
computeUnits_dic['Tflite-GPU'] = 6

modelNames = data[1:,0]
invokeDurations = data[1:, 2].astype(float)

In [None]:
models_name = list(set(list(data[1:,0])))
n_models =  len(models_name)
n_headers = len(headers)

row = ["-/-" for _ in range(n_headers)]
table_content = [row for _ in range(n_models)]


table_content = []
for row_i in range(n_models ):
    model_name = models_name[row_i]
    row = [model_name]
    models_indexes = np.where(data[:,0]==model_name)
    models_rows = data[models_indexes]
    models_computeUnitesNames = list(models_rows[:,-1])
    for computeUnite_name in computeUnits_dic:
        if computeUnite_name in models_computeUnitesNames:
            models_rows_row_i = models_computeUnitesNames.index(computeUnite_name)
            try:
                invokeAvg = float(models_rows[models_rows_row_i][1]) # in sec
                invokeStd = float(models_rows[models_rows_row_i][2])
                if invokeStd == 0.0:
                    if models_rows[models_rows_row_i][1] == 'nan':
                        row.append('NaN')  
                    else:
                        row.append(f'{invokeAvg*1000:.1f}/<0.1')
                else:
                    row.append(f'{invokeAvg*1000:.1f}/{invokeStd*1000:.1f}')
            except Exception as e:
                    row.append(f'NaN')
        else:
            row.append(f'-/-')

    table_content.append(row)




len(table_content), table_content[0]

In [None]:
from pytablewriter import MarkdownTableWriter
headers=["Model name", "CoreML-CPU",	"CoreML-CPU&GPU",	"CoreML-All",	"CoreML-CPU&NeuralEngine",	"Tflite-CPU",	"Tflite-GPU"]
writer = MarkdownTableWriter(
    
    table_name=data[0][-1].split('-')[1],
    headers=["Model name", "CoreML-CPU",	"CoreML-CPU&GPU",	"CoreML-All",	"CoreML-CPU&NeuralEngine",	"Tflite-CPU",	"Tflite-GPU"],
    value_matrix=table_content,
)
s = writer.write_table()
s
print('')