In [1]:
import numpy as np
import keras
from keras import callbacks
from keras.models import Sequential
from keras.layers import Input, Dense, Dropout, Activation, Flatten, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from matplotlib import pyplot as plt
from IPython.display import clear_output
from sklearn.metrics import confusion_matrix
import gc

Using TensorFlow backend.


In [2]:
gc.enable()

In [3]:
def parse_row(csv_row):  # same as generate_database.ipynb

    csv_row_array = csv_row.split(",")

    filename = csv_row_array[0]
    time_array = [float(t) for t in csv_row_array[1::2]]
    note_array = [int(x) for x in csv_row_array[2::2]]

    assert len(time_array) == len(note_array)

#     print(time_array)
#     print(note_array)

    return filename, time_array, note_array

In [4]:
def parse_csv(csv_file):
    
    with open(csv_file, 'r') as csvfile:
        csv_rows = csvfile.read().split("\n")

    filenames = []
    time_arrays = []
    note_arrays = []
    
    for csv_row in csv_rows:
        filename, time_array, note_array = parse_row(csv_row)
        
        filenames.append(filename)
        time_arrays.append(time_array)
        note_arrays.append(note_array)
    
    return filenames,time_arrays,note_arrays

In [5]:
filenames,time_arrays,note_arrays = parse_csv('notes.csv')

In [7]:
filenames_to_train = filenames # list of filenames, select at random

spec_array_to_train = np.zeros((0,108,108))
notes_arrays_to_train = np.zeros((0,10,24))

for i,filename in enumerate(filenames_to_train):
    start_pt = np.random.randint(290,3446-200)
    
    # this refers to detection if it falls within this range
    time_end = 2*(start_pt+54-27)*(2**7)/44100
    interval = 2*(27)*(2**7)/44100
    # 2 as bpm is 120 default, 2**7 is hop length, # 44100 is sampling freq
    
    # This is how it overlaps
    # ==== 
    #   ====
    #     ====
    
    notes_array_to_train = np.zeros((0,24))  # array without s
    
    for _ in range(10):  # 10 LSTM layers, note that order is reversed
        time_start = time_end - interval
        
        notes = []
        notes_array = [0]*24

        for j,t in enumerate(time_arrays[i]):
            if t > time_end:
                break
            if t > time_start:
                notes.append(note_arrays[i][j])

#         print(notes)
        for note in notes:
            notes_array[note] = 1
    
        notes_array_to_train = np.append(notes_array_to_train, 
                                        [notes_array], axis=0)
        
        time_end = time_start
    
#     print(np.shape(notes_array_to_train))        
    notes_arrays_to_train = np.append(notes_arrays_to_train, 
                                      [notes_array_to_train], axis=0)
        
    
    spectrogram = np.load("./spectrograms/{}.npy".format(filename))[:,start_pt:start_pt+108]
    # print(np.shape(spectrogram))
    spec_array_to_train = np.append(spec_array_to_train, [spectrogram], axis=0)
    
print(np.shape(spec_array_to_train))
print(np.shape(notes_arrays_to_train))

[]
[]
[]
[]
[]
[]
[19, 8, 20, 13]
[]
[]
[]
[]
[]
[]
[13, 9, 8]
[]
[]
[]
[]
[]
[2, 18, 0]
[]
[13, 12, 20]
[]
[]
[]
[]
[1, 5]
[]
[]
[]
[7, 12, 23, 19]
[]
[]
[]
[5, 7, 9, 19]
[]
[]
[]
[16, 5, 23]
[]
[17, 2, 9, 7]
[]
[]
[]
[]
[]
[]
[21, 8]
[]
[]
(5, 108, 108)
(5, 10, 24)


In [None]:
# old code below

In [None]:
import random
import pandas as pd
x_train_ori = []
y_train_ori = []
x_test_ori = []
y_test_ori = []
for i in range(516):
    print(i)
    file = pd.read_csv(path + "database_{}.csv".format(i), dtype=str)
    array = file.values
    array = list(array)
    train_test = random.sample(range(100), 60)
    
    for j in train_test[:50]:
        sample = list(array[j])
        lst = []
        for x in sample[1:11665]:
            if type(x) == str:
                lst.append(float(x))
            elif type(x) == float:
                lst.append(x)
            else:
                print(type(x))
        x_train_ori.append(lst)
        y_train_ori.append(sample[0])
        
    for l in train_test[50:]:
        sample = list(array[l])
        lst = []
        for x in sample[1:11665]:
            if type(x) == str:
                lst.append(float(x))
            elif type(x) == float:
                lst.append(x)
            else:
                print(type(x))
        x_test_ori.append(lst)
        y_test_ori.append(sample[0])
    gc.collect()

In [None]:
print(np.shape(x_train_ori))
print(np.shape(x_test_ori))
print(np.shape(y_train_ori))
print(np.shape(y_test_ori))

In [None]:
def y_convert(array, flag=0):
    y = []
    for i in array: # i is the name of each sample:
        name = i.split("_")
        name = name[1:5]
        if flag == 0:
            # category of appearance of a note
            category = np.zeros(24, dtype=int)
            for j in name:
                if j != "x":
                    num = int(j)
                    category[num-60] = 1
            y.append(list(category))
    return y

In [None]:
lst_1 = []
lst_2 = []

for i in y_train_ori:
    label = "".join(i.split())
    lst_1.append(label)
for j in y_test_ori:
    label = "".join(j.split())
    lst_2.append(label)
    
y_train_ori = y_convert(lst_1)
y_test_ori = y_convert(lst_2)

In [None]:
y_train_ori = np.array(y_train_ori)
y_test_ori = np.array(y_test_ori)
x_train_ori = np.array(x_train_ori)
x_test_ori = np.array(x_test_ori)

In [None]:
x_train_ori = x_train_ori.flatten()
x_train_ori = x_train_ori.reshape(25800, img_row, img_col, 1)
x_test_ori = x_test_ori.flatten()
x_test_ori = x_test_ori.reshape(5160, img_row, img_col, 1)

In [None]:
print(x_train_ori.shape)
input_shape = (108, 108, 1)

In [None]:
def build_model_E():
    early_stop = callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=3, verbose=1, mode="min")
    
    filepath = "Best-weights-my_model-{epoch:03d}-{loss:4f}-{acc:4f}.hdf5"
    checkpoint = callbacks.ModelCheckpoint(filepath, monitor="val_loss", verbose=1, save_best_only= True, mode="min")
    
    callbacks_list = [early_stop, checkpoint]
    
    #build the model
    model = Sequential()
    
    #first convolutional layer
    model.add(Conv2D(32, (3,3), input_shape=input_shape, padding="same", activation="relu"))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(2,2))
    
    #second convolutional layer
    model.add(Conv2D(32, (3,3), activation="relu", border_mode="same"))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(2,2))
    
    #third convolutional layer
    model.add(Conv2D(64, (3,3), activation="relu", border_mode = "same"))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(2,2))

    #forth convolutional layer
    model.add(Conv2D(64, (3,3), activation="relu", border_mode = "same"))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(2,2))
    
    #fifth convulutional layer
    model.add(Conv2D(64, (3,3), activation="relu", border_mode = "same"))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(2,2))
    model.add(Dropout(0.3))
    
    #first FC layer
    model.add(Flatten())
    model.add(Dense(64, activation="relu", kernel_initializer="uniform"))
    model.add(BatchNormalization())
    
    #second FC layer
    model.add(Dense(256, activation="relu", kernel_initializer="uniform"))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    
    #output layer
    model.add(Dense(24, activation="sigmoid", kernel_initializer="uniform"))
    
    model.compile(loss="mean_squared_error", optimizer="adam", metrics=["accuracy"])
    return model

In [None]:
classifier_E = build_model_E()
hist_E = classifier_E.fit(x_train_ori, y_train_ori, batch_size=32, nb_epoch=200, verbose=1, 
                          validation_data=(x_test_ori, y_test_ori))