In [1]:
from typing import List, Tuple
import tensorflow as tf
from tensorflow.python.keras import Sequential
from tensorflow.python.keras.layers import InputLayer, GRU, Dense
import glob
import os
from scipy.io.wavfile import read
import numpy as np

def esr(y_path: str, y_hat_path: str) -> float:
    '''Returns the Error-to-Signal Ratio.

    Keyword arguments:
    y -- the groundtruth file
    y_hat -- the prediction file
    '''
    y_sample_rate, y = read(y_path)
    y_hat_sample_rate, y_hat = read(y_hat_path)
    power = 2.0
    numerator = sum(abs(y - y_hat)**power)
    denominator = sum(abs(y)**power)
    return numerator / denominator  


def normalize(array: List, scale_max: int=1, scale_min: int=0) -> List:
    '''Returns a normalized array.
    
    Keyword arguments:
    array -- array to normalize
    scale_max -- maximum value to scale between
    scale_min -- minimum value to scale between

    Source: https://www.geeksforgeeks.org/how-to-normalize-an-array-in-numpy-in-python/
    '''
    norm_arr = []
    diff = scale_max - scale_min
    diff_arr = np.max(array) - np.min(array)
    for i in array:
        temp = (((i - np.min(array))*diff)/diff_arr) + scale_min
        norm_arr.append(temp)
    return norm_arr


def partition_dataset(data_path: str='data/simple_dataset/*', train_perc: float=0.8) -> Tuple[List, List, List, List]: 
    '''Partition into train, train_labels & test, test_labels datasets.

    Keyword arguments:
    data_path -- where the data lives
    '''
    assert train_perc < 1, 'train_perc must be less than 1'
    data_paths = glob.glob(data_path)
    split_idx = int(0.8 * len(data_paths))
    train_paths, test_paths = data_paths[:split_idx], data_paths[split_idx:]
    train_data, test_data = [], []
    train_labels, test_labels = [], []
    for file in train_paths:
        _, data = read(file)
        train_data.append(data)
        train_labels.append(os.path.basename(file).split('.')[0])
    for file in test_paths:
        _, data = read(file)
        test_data.append(data)
        test_labels.append(os.path.basename(file).split('.')[0])
    return train_data, train_labels, test_data, test_labels


def make_model(num_output_nodes: int=3) -> Sequential:
    '''Return a model.
    
    Keyword arguments:
    num_output_nodes -- number of nodes on the output layer
    '''
    model = Sequential([
        InputLayer,
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(num_output_nodes)
    ])
    return model

In [2]:
# partition dataset
train_data, train_labels, test_data, test_labels = partition_dataset()

In [3]:
# normalize datasets
train_data_norm = []
for i, d in enumerate(train_data):
    if i % (len(train_data) * 0.1) == 0:
        print(f'{i/len(train_data):.2f}% train data normalization complete')
    train_data_norm.append(normalize(d))
test_data_norm = []
for i, d in enumerate(test_data):
    if i % (len(test_data) * 0.1) == 0:
        print(f'{i/len(test_data):.2f}% test data normalization complete')
    test_data_norm.append(normalize(d))

0.00% train data normalization complete
0.10% train data normalization complete
0.20% train data normalization complete
0.30% train data normalization complete
0.40% train data normalization complete
0.50% train data normalization complete
0.60% train data normalization complete
0.70% train data normalization complete
0.80% train data normalization complete
0.90% train data normalization complete
0.00% test data normalization complete
0.10% test data normalization complete
0.20% test data normalization complete
0.30% test data normalization complete
0.40% test data normalization complete
0.50% test data normalization complete
0.60% test data normalization complete
0.70% test data normalization complete
0.80% test data normalization complete
0.90% test data normalization complete


In [4]:
# make and compile ML model
model = make_model()
model.compile(optimizer='adam',
              loss=esr(),
              metrics=['accuracy'])

2022-03-23 16:20:28.704625: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-03-23 16:20:28.704930: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M1

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



TypeError: The added layer must be an instance of class Layer. Found: <class 'tensorflow.python.keras.engine.input_layer.InputLayer'>

In [None]:
# #train ML model
# model.fit(train_images, train_labels, epochs=10)

# #evaluate ML model on test set
# test_loss, test_acc = model.evaluate(test_images,  test_labels, verbose=2)

# #setup stop time
# t1 = time.time()
# total_time = t1-t0

# #print results
# print('\n')
# print(f'Training set contained {train_set_count} images')
# print(f'Testing set contained {test_set_count} images')
# print(f'Model achieved {test_acc:.2f} testing accuracy')
# print(f'Training and testing took {total_time:.2f} seconds')