# Baseline perceptron experiment
In this notebook we implement a baseline one-layer neural network with _raw audio samples_ as an input and one _note probability vector_ as an output. After one epoch, the model reaches 30% accuracy on test set. Accuracy on train set is very low (23%) which suggests that the model capacity is too low.

Metric | Result
--- | ---
Precision | 62.05%
Recall | 37.33%
Accuracy | 30.39%


In [None]:
# for automatic reloading of my libraries
%load_ext autoreload
%autoreload 2

In [None]:
import tensorflow as tf
import numpy as np

import datasets
from model import Network

import visualization as vis

import datetime

In [None]:
MUSICNET_ROOT = "/home/jirka/bakalarka/melody_extraction/datasets/musicnet"
test_data = datasets.musicnet_dataset(MUSICNET_ROOT, "test")
train_data = datasets.musicnet_dataset(MUSICNET_ROOT, "train")

In [None]:
annotations_per_window = 1
context_width = 944

test_dataset = datasets.AADataset(test_data, annotations_per_window, context_width, shuffle_batches=False)
train_dataset = datasets.AADataset(train_data, annotations_per_window, context_width, shuffle_batches=True)

In [None]:
# small dataset for manual evaluation
small_test_data = [
    test_data[3].slice(15,20.8), # solo cello
    test_data[9].slice(56,61.4), # solo piano
    test_data[5].slice(55.6,61.6), # orchestra
    test_data[2].slice(17.65,27), # violin + string section
]
small_test_dataset = datasets.AADataset(small_test_data, annotations_per_window, context_width, shuffle_batches=False)

In [None]:
def create_model(self, args):
    # Perceptron baseline
    audio_net = tf.layers.dense(self.window, 1000, activation=tf.nn.relu)
    output_layer = tf.layers.dense(audio_net, self.note_range*self.annotations_per_window, activation=None, name="output")
    ref_notes_flat = tf.layers.flatten(self.ref_notes)

    self.note_probabilites = tf.reshape(output_layer, [-1, self.annotations_per_window, self.note_range])
    self.est_notes = tf.cast(tf.greater(self.note_probabilites, 0.5), tf.float32)
    
    self.loss = tf.losses.sigmoid_cross_entropy(ref_notes_flat, output_layer)

    global_step = tf.train.create_global_step()
    self.training = tf.train.AdamOptimizer().minimize(self.loss, global_step=global_step)

In [None]:
restore = None

# Create logdir name
args = {
    "threads": 6,
    "batch_size": 64,
    "logdir": None,
    "annotations_per_window": test_dataset.annotations_per_window,
    "window_size": test_dataset.window_size,
    "note_range": 96,
    "samplerate": test_dataset.samplerate
}
name = "{}-bs{}-apw{}-wsiz{}".format(
    datetime.datetime.now().strftime("%m-%d_%H%M%S"),
    args["batch_size"],
    args["annotations_per_window"],
    args["window_size"],
)

if restore:
    name = restore
args["logdir"] = "models/" + name

print(name)

# Construct the network
network = Network(threads=args["threads"])
network.construct(args, create_model)

if restore:
    network.restore()

In [None]:
epochs = 1
network.train(train_dataset, test_dataset, small_test_dataset, args["batch_size"], epochs, eval_every_n_batches=10000, save_every_n_batches=20000)

In [None]:
network.save()
print("Testing dataset metrics:")
acc = network.evaluate(test_dataset, args["batch_size"], print_detailed=True)
print("\nSmall testing dataset metrics:")
acc = network.evaluate(small_test_dataset, args["batch_size"], visual_output=True, print_detailed=True)

In [None]:
# play the small testing dataset
vis.samplesplayer(small_test_dataset.all_samples(), small_test_dataset.samplerate)