In [109]:
import os
import logging
import pickle

import pandas as pd
import numpy as np

from string import digits

In [232]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Dense, LSTM, Bidirectional, TimeDistributed
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

from sklearn.model_selection import train_test_split

In [275]:
class ModelConfig:
    # Model configuration
    batch_size = 128
    lstm_units = 10
    loss_function = CategoricalCrossentropy()
    max_sequence_length = 50
    epochs = 50
    optimizer = Adam()
    validation_split = 0.2
    verbosity = 1
    metrics = ['accuracy']

In [58]:
class Semantic:
    black_keys = set(['C#','Bb','D#','Eb','F#','Gb','G#','Ab','A#','Bb'])
    key_map = {'Cb': -1, 'C': 0, 'C#': 1, 'Db': 1, 'D': 2, 'D#': 3, 'Eb': 3,
               'E': 4, 'Fb': 4, 'E#': 5, 'F': 5, 'F#': 6, 'Gb': 6, 'G': 7, 'G#': 8,
               'Ab': 8, 'A': 9, 'A#': 10, 'Bb': 10, 'B': 11, 'B#': 12}
    remove_digits = str.maketrans('', '', digits)
    octave_semitones = 12
    
    @classmethod
    def is_black_key(cls, key):
        # Enumerate black keys. Do not filter by checking for '#' or 'b' in the string, because
        # composers sometimes write things like E# or Cb, which are not black keys.
        # The string translate() method is faster than iterating manually.
        key = key.translate(cls.remove_digits)
        return key in cls.black_keys
    
    @classmethod
    def get_semitone_distances(cls, notes):
        # fill in missing octave digits
        notes = [x for x in map(lambda n: n if any(i.isdigit() for i in n) else n+'4', notes)]

        # compute semitone differences
        # initialize first note semitone distance to 0
        notes = [notes[0]] + notes
        diffs = []
        for i in range(len(notes) - 1):
            before = notes[i]
            after = notes[i+1]

            before = int(before[-1]) * cls.octave_semitones + cls.key_map[before[:-1]]
            after = int(after[-1]) * cls.octave_semitones + cls.key_map[after[:-1]]
            diffs.append(after - before)
        return diffs
    
    @classmethod
    def get_embedding(cls, tokens):
        # embeds tokens as inputs to model
        # for the piano model, we don't really care which octave we're at
        # in fact, we can simplify the notes into semitone distance from previous note,
        # and whether the current note is a white or black key (affects ease of playing)
        # note that the primus dataset does not have double-sharp/flat as inputs

        # combine multiple lines, if any
        tokens = '\t'.join(tokens)
        tokens = tokens.split('\t')
        # only use notes
        tokens = [y for y in filter(lambda x: x.startswith('note-') or x.startswith('gracenote-'), tokens)]
        # ignore note lengths
        tokens = [y for y in map(lambda x: x.split('_')[0][5:] if x.startswith('note-') else x.split('_')[0][10:]
                                 , tokens)]
        # get black_keys
        black_keys = [1 if x else 0 for x in map(cls.is_black_key, tokens)]
        semitone_distances = cls.get_semitone_distances(tokens)
        
        return [[x,y] for x, y in zip(black_keys, semitone_distances)]
    
    @classmethod
    def get_embedding_from_file(cls, path):
        with open(path, "r") as f:
            try:
                return cls.get_embedding(f.readlines())
            except FileNotFoundError:
                logging.warn("Semantic Data file not found: {}".format(path))

In [217]:
class ETL:
    data_dirs = ['../data/package_aa']# , '../data/package_ab']
    @classmethod
    def parse_finger_file(cls, path):
        with open(path, "r") as f:
            try:
                return [x for x in map(int, f.readline().split(' '))]
            except FileNotFoundError:
                logging.warn("Finger Data file not found: {}".format(path))
        
    def get_data(self, n=-1, pad = 50):
        X, y = [], []
        count, stop = 0, False
        for data_dir in ETL.data_dirs:
            if stop:
                break
            for i, folder in enumerate(next(os.walk(data_dir))[1]):
                semantic_file = os.path.join(data_dir, folder, "{}.semantic".format(folder))
                finger_file = os.path.join(data_dir, folder, "{}.finger".format(folder))
                
                if not os.path.exists(semantic_file) or not os.path.exists(finger_file):
                    continue
                
                X_i = Semantic.get_embedding_from_file(semantic_file)
                y_i = ETL.parse_finger_file(finger_file)
                
                # pad sequences
                X_i += [[-1, 0] for _ in range(pad-len(X_i))]
                y_i += [0 for _ in range(pad-len(y_i))]
                
                X.append(X_i)
                y.append(y_i)
                count += 1
                if n != -1 and count >= n:
                    stop = True
                    break
            
        X = np.array(X)
        y = np.array(y)
        
        return X, y
                

Main Workflow

In [251]:
store_path = './cache'
store_file_path = './cache/etl_cache.pkl'

test_proportion = 0.1

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

In [None]:
etl = ETL()
# X.shape = (samples, length, features)
# y.shape = (samples, length)
X, y = etl.get_data(pad = ModelConfig.max_sequence_length)

In [183]:
if not os.path.exists(store_path):
    os.makedirs(store_path)
with open(store_file_path, 'wb+') as f:
    pickle.dump(X, f)
    pickle.dump(y, f)

In [262]:
f = open(store_file_path, 'rb')
X = pickle.load(f).astype(np.float32)
y = pickle.load(f)
y = to_categorical(y)
    
print(X.shape, y.shape)

(23929, 50, 2) (23929, 50, 6)


In [263]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_proportion, random_state=42)
print(X_train.shape, y_train.shape)

(21536, 50, 2) (21536, 50, 6)


In [264]:
model = Sequential()
model.add(Bidirectional(LSTM(ModelConfig.lstm_units, 
                             input_shape=(ModelConfig.max_sequence_length, 2),
                             return_sequences=True)))
model.add(TimeDistributed(Dense(6, activation='softmax')))


In [280]:
model.compile(optimizer=ModelConfig.optimizer,
             loss=ModelConfig.loss_function,
             metrics=ModelConfig.metrics)

In [276]:
history = model.fit(X_train,
                   y_train,
                   batch_size=ModelConfig.batch_size,
                   epochs=ModelConfig.epochs,
                   verbose=ModelConfig.verbosity,
                   validation_split=ModelConfig.validation_split,)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [281]:
test_results = model.evaluate(X_test, y_test)



In [282]:
print(test_results[1])

0.9507814645767212
