In [None]:
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Bidirectional, Dense, LSTM, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint
import matplotlib.pyplot as plt

In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os

files = []
for dirname, _, filenames in os.walk('/kaggle/input'):
    if filenames:
        paths = []
        for filename in filenames:
            paths.append(os.path.join(dirname, filename))
        files.append(paths)

# for fs in files:
#     for f in fs:
#         print(f)
        
chroma_path = '/kaggle/input/mir-chroma/'
chord_path = '/kaggle/input/mir-label/'
fs = sorted([name.split('/')[-1] for name in files[1]])
print('Successful Import: ', len(fs) == 72 and len(files[0]) == len(files[1]))
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [33]:
# Test-Train Seperation
trains = filenames.copy()
tests = ["Chopin_F._Nocturne_in_F_Minor_Op.55_No.1.csv",
         "Chopin_F._Prelude_in_D-Flat_Major_Op.28_No.15.csv",
         "Beethoven_L.V._Sonata_in_A-Flat_Major_(Op.110_No.31)_2nd_Movement.csv",
         "anonymous_Twinkle_Twinkle.csv",
         "Chopin_F._Etude_in_F_Minor_Op.10_No.9.csv",
         "Mendelsshon_F._Songs_Without_Words_(Op._19_No._6).csv",
         "Schubert_D911-16.csv",
         "Schubert_D911-24.csv"]

for name in tests:
    trains.remove(name)
    
print(f'#{len(fs)} files')
print(f'#{len(trains)} train files')
print(f'#{len(tests)}  test files')

In [72]:
def fraction(s):
    num,den = s.split('/')
    return float(num)/float(den)

# Build Train for one piece
def build_train_ts(f):
    x_train, y_train = [], []
    
    train = pd.read_csv(chroma_path + f)
    label = pd.read_csv(chord_path + f)
#     train = train.drop(['offset', 'total'], axis=1)
#     label = label.drop(['offset'], axis=1)
    train = train.drop(['total'], axis=1)
    train = train.to_numpy()
    label = label.to_numpy()
    
    
#     print(train.shape, label.shape)
#     print(train)
#     print(train[9][1:])
#     print(np.array([train[idx][1:] for idx in range(0, 10)]))

    xidx, yidx = 0, 0
    while xidx < train.shape[0] - timesteps + 1 and yidx < label.shape[0] - timesteps + 1:
        if type(train[xidx][0]) == str and '/' in train[xidx][0]:
            xo = fraction(train[xidx][0])
        else:
            xo = float(train[xidx][0])
        if type(label[yidx][0]) == str and '/' in label[yidx][0]:
            yo = fraction(label[yidx][0])
        else:
            yo = float(label[yidx][0])
            
        if xo < yo:
              xidx += 1
        elif xo > yo:
            yidx += 1
        else:
#             x_train.append(np.array([train[xidx:xidx+timesteps][1:] for]))
#             train = np.asarray(train).astype('float32')
#             label = np.asarray(label).astype('float32')
            x_train.append(np.asarray([train[idx][1:] for idx in range(xidx, xidx+timesteps)]).astype(np.float32))
            y_train.append(np.asarray(label[yidx+timesteps-1,:][1:]).astype(np.float32))
            xidx += 1
            yidx += 1
        
    return x_train, y_train

def data_clean(f):
    x_train, y_train = [], []
    
    train = pd.read_csv(chroma_path + f)
    label = pd.read_csv(chord_path + f)
    train = train.drop(['total'], axis=1)
    
    train = train.to_numpy()
    label = label.to_numpy()
    
    xi, yi = 0, 0
    while xi < len(train)-1 and yi < len(label)-1:
        xtmp = np.asarray(train[xi][1:]).astype(np.float32)
        xn = np.linalg.norm(xtmp)
        if xn > 0:
            xtmp = xtmp / np.linalg.norm(xtmp)
            
        x_train.append(xtmp)
        y_train.append(np.asarray(label[yi][1:]).astype(np.float32))
        xo, yo = train[xi][0], label[yi][0] # offsets

        if xo < yo:
            xi += 1
        elif xo > yo:
            yi += 1
        else:
            xi += 1
            yi += 1
            
#     x_train, y_train = np.array(x_train), np.array(y_train)
#     x_train = x_train/np.linalg.norm(x_train)
#     y_train = y_train/np.linalg.norm(y_train)

    return np.array(x_train), np.array(y_train)

def build_train(f):
    x_train, y_train = [], []
    train, label = data_clean(f)
    
    for i in range(train.shape[0] - timesteps + 1):
        x_train.append(np.array([train[idx] for idx in range(i, i+timesteps)]))
        y_train.append(label[i+timesteps-1])
        
    return np.array(x_train), np.array(y_train)

# Helper function to aggregate all build_train
def build_all_train(fs):
    x_all, y_all = [], []
    for f in fs:
        x_train, y_train = build_train(f)
        x_all.extend(x_train)
        y_all.extend(y_train)
    return np.array(x_all), np.array(y_all)

def train_generator(fs):
    for f in fs:
        yield build_train(f)

In [73]:
for f in fs:
    x, y = data_clean(f)
    print(x.shape, y.shape)
    break

# x, y = build_train(trains[0])

x, y = build_all_train(fs)
print(x.shape, y.shape)
# shape should be in (# of batches: 72, music sequence length**, # of features: 12)

In [184]:
x, y = build_all_train(trains)
print(x.shape, y.shape)

In [183]:
x, y = build_all_train(tests)
print(x.shape, y.shape)

In [None]:
keras.backend.clear_session()
timesteps = 3

model = Sequential()
# model.add(LSTM(128, activation='relu', input_shape=(timesteps, 12), return_sequences=True))
model.add(LSTM(24, input_shape=(timesteps, 12)))
# model.add(Dropout(0.2))

# model.add(LSTM(64, activation='relu'))
# model.add(Dropout(0.2))

model.add(Dense(12, activation='sigmoid'))

model.compile(loss="mse", optimizer='adam', metrics= ['accuracy'])
# model.summary()

In [176]:
keras.backend.clear_session()
timesteps = 5

model = Sequential()
model.add(LSTM(24, activation='relu', input_shape=(timesteps, 12)))
model.add(Dropout(0.2))
model.add(Dense(12, activation='sigmoid'))
model.compile(loss="mse", optimizer='adam')
model.summary()

In [177]:
x_train, y_train = build_all_train(trains)
x_test, y_test = build_all_train(tests)

# checkpoint = ModelCheckpoint("NS_best", monitor='val_accuracy', verbose=1, save_best_only=True, mode='auto')
# history = model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test), callbacks=[checkpoint])
history = model.fit(x_train, y_train, epochs=50, validation_data=(x_test, y_test))

In [182]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Train loss vs Validation loss')
plt.xlabel('Iterations')
plt.ylabel('Loss')
plt.legend(['Train', 'Validation'], loc='upper right')
plt.savefig('loss_key.png')

In [179]:
thres = 0.5

predict = model.predict(x_test)
for res in predict:
    for i, note in enumerate(res):
        if note > thres:
            res[i] = 1
        else:
            res[i] = 0

print(np.mean(np.equal(predict, y_test)))

for test in tests:
    x, y = build_train(test)

    predict = model.predict(x)
    for res in predict:
        for i, note in enumerate(res):
            if note >= thres:
                res[i] = 1
            else:
                res[i] = 0

    print(test[:-4], '\n', np.mean(np.equal(predict, y)))

In [188]:
thres = 0.25

predict = model.predict(x_test)
for res in predict:
    for i, note in enumerate(res):
        if note > thres:
            res[i] = 1
        else:
            res[i] = 0

print(csr(predict, y_test), rcsr(predict, y_test))

for test in tests:
    x, y = build_train(test)

    predict = model.predict(x)
    for res in predict:
        for i, note in enumerate(res):
            if note >= thres:
                res[i] = 1
            else:
                res[i] = 0

    print(round(csr(predict, y),4), round(rcsr(predict, y),4))

In [204]:
thres = 0.4

predict = model.predict(x_test)
for res in predict:
    for i, note in enumerate(res):
        if note > thres:
            res[i] = 1
        else:
            res[i] = 0

a,b,c =tpp(predict, y_test)
print(round(a,4), round(b,4), round(c,4))

for test in tests:
    x, y = build_train(test)

    predict = model.predict(x)
    for res in predict:
        for i, note in enumerate(res):
            if note >= thres:
                res[i] = 1
            else:
                res[i] = 0

    a,b,c = tpp(predict, y)
    print(round(a,4), round(b,4), round(c,4))

In [124]:
def csr_row(predict, target):
    s, c = 0, 0
    for i in range(12):
        if target[i] == 1:
            c += 1
            if predict[i] == 1:
                s += 1
    return s, c

def csr(predict, target):
    s, c = 0, 0
    for i in range(len(predict)):
        x, y = csr_row(predict[i], target[i])
        s += x
        c += y
    return s/c

In [190]:
def rcsr_row(predict, target):
    s, c = 0, 0
    for i in range(12):
        if target[i] == 0:
            c += 1
            if predict[i] == 0:
                s += 1
    return s, c

def rcsr(predict, target):
    s, c = 0, 0
    for i in range(len(predict)):
        x, y = rcsr_row(predict[i], target[i])
        s += x
        c += y
    return s/c

In [193]:
def trow(predict, target):
    tp, tn, fp, fn = 0, 0, 0, 0
    for i in range(12):
        if target[i] == 1 and predict[i] == 1:
            tp += 1
        elif target[i] == 1 and predict[i] == 0:
            fn += 1
        elif target[i] == 0 and predict[i] == 1:
            fp += 1
        elif target[i] == 0 and predict[i] == 0:
            tn += 1
    return tp, tn, fp, fn

def tpp(predict, target):
    tp, tn, fp, fn = 0, 0, 0, 0
    for i in range(len(predict)):
        a,b,c,d = trow(predict[i], target[i])
        tp += a
        tn += b
        fp += c
        fn += d
    return (tp + tn)/(tp+tn+fp+fn), tp/(tp+fp), tp/(tp+fn)