# Training pipeline for MFCC (train + save)

In [1]:
import sys
import os
from git_root import git_root

import numpy as np
import tensorflow as tf

from tensorflow.keras.optimizers import Adam

sys.path.append(git_root("utils"))
from utils import load_params

from fetch_data import fetch_data_local
from fetch_data import to_numpy_arrays, prepare_tf_dataset

sys.path.append(git_root("models"))
from MFCC_model import setup_model

from validation_utils import plot_history

In [2]:
print(tf.__version__)

2.0.0


In [3]:
params = load_params()

In [4]:
train = fetch_data_local(map_type="mfcc", train=True)
test = fetch_data_local(map_type="mfcc", train=False)

Fetching: data_mfcc_train.json


ValueError: Expected object or value

In [None]:
print("train")
print(train.shape)
print("test")
print(test.shape)

In [None]:
np.array(train.loc[0,'maps']).shape

In [None]:
len(train.iloc[0, 1])

In [None]:
train = to_numpy_arrays(train, mfcc=True)
test = to_numpy_arrays(test, mfcc=True)

In [None]:
print("train")
print(len(train[0]))
print(len(train[1]))
print("test")
print(len(test[0]))
print(len(test[1]))

In [None]:
train_sample_unstacked = []
train_label_unstacked = []

for j in range(len(train[0])):
    train_sample_unstacked += [train[0][j][:,:,i].reshape(40,50,1) for i in range(train[0][0].shape[-1])]

for i in range(len(train[1])):
    train_label_unstacked += [train[1][i]]*(train[0][0].shape[-1])

In [None]:
test_sample_unstacked = []
test_label_unstacked = []

for j in range(len(test[0])):
    test_sample_unstacked += [test[0][j][:,:,i].reshape(40,50,1) for i in range(test[0][0].shape[-1])]

for i in range(len(test[1])):
    test_label_unstacked += [test[1][i]]*(test[0][0].shape[-1])

In [None]:
train = prepare_tf_dataset(train_sample_unstacked, train_label_unstacked)
test = prepare_tf_dataset(test_sample_unstacked, test_label_unstacked)

In [None]:
tr_sample_batch, tr_label_batch = next(iter(train))
te_sample_batch, te_label_batch = next(iter(test))
print("train")
print(tr_sample_batch.shape, tr_label_batch.shape)
print("test")
print(te_sample_batch.shape, te_label_batch.shape)

In [None]:
#We load the model
net = setup_model()
net.summary()

In [None]:
net.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
# Since the dataset already takes care of batching,
# we don't pass a `batch_size` argument.
history = net.fit(train, epochs=1000, validation_data=test)

In [None]:
plot_history(history)

In [None]:
print(f"Final validation accuracy is: {history.history['val_accuracy'][-1]:.3f}")

In [None]:
file_path = git_root("models", "saved_models", "mfcc.h5")
net.save(file_path)