In [None]:
import matplotlib.pyplot as plt
import os
import pathlib
from scipy.io import wavfile
from collections import defaultdict, Counter
from scipy import signal
import numpy as np
import librosa
import sklearn
import random
from unicodedata import normalize
from tensorflow.keras import layers,models
import librosa.display
import tensorflow as tf
from keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
DATASET_PATH = 'data/mini_speech_commands'

data_dir = pathlib.Path(DATASET_PATH)
if not data_dir.exists():
  tf.keras.utils.get_file(
      'mini_speech_commands.zip',
      origin="http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip",
      extract=True,
      cache_dir='.', cache_subdir='data')

Downloading data from http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip
[1m182082353/182082353[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [None]:
commands = np.array(tf.io.gfile.listdir(str(data_dir)))
commands = commands[commands != 'README.md']
print('Commands:', commands)

Commands: ['left' 'go' 'yes' 'no' 'stop' 'up' 'right' 'down']


In [None]:
x = []
y = []

In [None]:
pad1d = lambda a, i: a[0: i] if a.shape[0] > i else np.hstack((a, np.zeros(i-a.shape[0])))
pad2d = lambda a, i: a[:, 0:i] if a.shape[1] > i else np.hstack((a, np.zeros((a.shape[0], i-a.shape[1]))))

In [None]:
# train data를 넣는다.
target = ['up', 'down', 'left', 'right','go','stop','yes','no']
for item in os.listdir(DATASET_PATH):
    sub_folder = os.path.join(DATASET_PATH,item)
    if os.path.isdir(sub_folder):
      print(sub_folder)
      for filename in os.listdir(sub_folder):
        filename = normalize('NFC', filename)
        try:
    # wav 포맷 데이터만 사용
          if '.wav' not in filename in filename:
              continue

          wav, sr = librosa.load(os.path.join(sub_folder,filename), sr=16000)

          stft = np.abs(librosa.stft(y=wav, n_fft=256, hop_length=128))
#         mfcc = sklearn.preprocessing.scale(mfcc, axis=1)

          padded_stft = pad2d(stft, 120)

          x.append(padded_stft)
          y.append(target.index(item))
        except Exception as e:
          print(filename,e)
          raise

data/mini_speech_commands/left
data/mini_speech_commands/go
data/mini_speech_commands/yes
data/mini_speech_commands/no
data/mini_speech_commands/stop
data/mini_speech_commands/up
data/mini_speech_commands/right
data/mini_speech_commands/down


In [None]:
x=np.array(x)
y=np.array(y)

In [None]:
x.shape

(8000, 129, 120)

In [None]:
y.shape

(8000,)

In [None]:
interpreter = tf.lite.Interpreter(model_path='./model.tflite')
interpreter.allocate_tensors()

In [None]:
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

In [None]:
input_scale, input_zero_point = input_details[0]['quantization']
output_scale, output_zero_point = output_details[0]['quantization']

input_scale,input_zero_point,output_scale,output_zero_point

(0.1303439885377884, -128, 0.00390625, -128)

In [None]:
#quantization
x=x/input_scale + input_zero_point
x=x.astype(np.int8)

In [None]:
batch_size = x.shape[0]

In [None]:
output = []
for i in range(batch_size):
  interpreter.set_tensor(input_details[0]['index'],x[i:i+1])
  interpreter.invoke()
  quantized_output = interpreter.get_tensor(output_details[0]['index'])
  output.append(np.argmax((quantized_output.astype(np.float32)-output_zero_point)*output_scale))


In [None]:
output = np.array(output)


In [None]:
np.sum(output == y) / len(output)

0.72025