In [None]:

import wave
import numpy as np
import tflite_runtime.interpreter as tflite
from pprint import pprint

melspec_model_path = "models/melspectrogram.tflite"
embedding_model_path = "models/embedding_model.tflite"
ncpu = 4

In [None]:


melspec_model = tflite.Interpreter(model_path=melspec_model_path, num_threads=ncpu)
melspec_model.resize_tensor_input(0, [1, 1280], strict=True)
melspec_model.allocate_tensors()
pprint([melspec_model.get_input_details(), melspec_model.get_output_details()])
melspec_input_index = melspec_model.get_input_details()[0]['index']
melspec_output_index = melspec_model.get_output_details()[0]['index']

def melspec(x):
    if len(x.shape) == 1:
        x = x[None, ]
    x = x.astype(np.float32)
    if melspec_model.get_input_details()[0]['shape'][1] != x.shape[1]:
        if x.shape[1] < 640 or x.shape[1] % 160 != 0:
            raise ValueError("Input length must be a multiple of 160 and at least 640")
        melspec_model.resize_tensor_input(0, [1, x.shape[1]], strict=True)
        melspec_model.allocate_tensors()
    melspec_model.set_tensor(melspec_input_index, x)
    melspec_model.invoke()
    return melspec_model.get_tensor(melspec_output_index)

In [None]:
embedding_model = tflite.Interpreter(model_path=embedding_model_path, num_threads=ncpu)
embedding_model.allocate_tensors()
pprint([embedding_model.get_input_details(), embedding_model.get_output_details()])
embedding_input_index = embedding_model.get_input_details()[0]['index']
embedding_output_index = embedding_model.get_output_details()[0]['index']

def embedding(x):
    embedding_model.set_tensor(embedding_input_index, x)
    embedding_model.invoke()
    return embedding_model.get_tensor(embedding_output_index)


# Calculations

`160 * N + 480` (`10ms * N + 30ms`) wave samples ==> generates `N` vectors of spectrogram

76 vectors (12160 samples, 760ms) ==> generates `1` embedding

Embedding are overlapping with step of 8 vectors (1280 samples, 80ms)

In [None]:
with wave.open("data/sample1.wav", "rb") as wf:
    nframes = (wf.getparams().nframes // 160) * 160
    x = np.frombuffer(wf.readframes(nframes), dtype=np.int16)

spec = melspec(x).squeeze()

samples = (spec.shape[0] - 76) // 8 + 1

embeddings = np.empty((samples, 96), dtype=np.float32)

for i in range(0, spec.shape[0] - 76, 8):
    x = spec[i:i+76]
    x = x[None,:,:,None]
    y = embedding(x)
    embeddings[i // 8,:] = y.squeeze()

# bb = melspec(b)
# cc = melspec(c)

# print("A shape:", a.shape, aa.shape)
# print("B shape:", b.shape, bb.shape)
# print("C shape:", c.shape, cc.shape)

# aaa = np.reshape(aa, (32,)) - np.reshape(cc[:,:,0,:], (32,))
# bbb = np.reshape(bb, (32,)) - np.reshape(cc[:,:,1,:], (32,))
# print("AAA shape:", aaa.shape)
# # Sum all elements of aaa and bbb
# print("AAA sum:", np.sum(aaa))
# print("BBB sum:", np.sum(bbb))

# y = melspec(x).squeeze()
# y = y[None,:,:,None]
# print("Spec shape:", y.shape, y.dtype)
# z = embedding(y)
# print("Output shape:", z.shape, z.dtype, z)

