In [2]:
#This file reads the note_csv, spectrums. Generate and save the numpy arrays which are inputs to the model

In [3]:
import numpy as np
import glob

In [4]:
spectrums = 'E:/notes_database/spectrograms_1/'
files = glob.glob('E:/notes_database/index/*')

In [5]:
def parse_row(csv_row):  # same as generate_database.ipynb

    csv_row_array = csv_row.split(",")

    filename = csv_row_array[0]
    time_array = [float(t) for t in csv_row_array[1::2]]
    note_array = [int(x) for x in csv_row_array[2::2]]

    assert len(time_array) == len(note_array)

#     print(time_array)
#     print(note_array)

    return filename, time_array, note_array

In [6]:
def parse_csv(csv_file): # Convert the csv files into numpy arrays
    
    with open(csv_file, 'r') as csvfile:
        csv_rows = csvfile.read().split("\n")

    filenames = []
    time_arrays = []
    note_arrays = []
    
    for csv_row in csv_rows:
        filename, time_array, note_array = parse_row(csv_row)
        
        filenames.append(filename)
        time_arrays.append(time_array)
        note_arrays.append(note_array)
    
    return filenames,time_arrays,note_arrays

In [7]:
def prepare_inputs(csv_name):
    # the time for a unit width of spectrum is about 0.003 second
    # however a unit in time_array is 0.5 second, so to calculate the unit numbers, should divide the time_number by 0.006
    filenames,time,note = parse_csv(csv_name)
    one_row = np.zeros(24)
    lstm_all_inputs = []
    cnn_all_inputs = []
    y_all_train = []
    for file in range(len(filenames)):
        previous_time = [time[file][0]]
        spectrum = np.load(spectrums+filenames[file]+'.npy')
        y_train = []
        # load the spectrum numpy array from 20 units before the timing(onset) to 88 units after the timing
        for i in range(len(time[file])):
            flag = 0
            if time[file][i] == previous_time[-1]:
                one_row[note[file][i]] = 1
            else:
                #append lstm input
                lstm_input = np.zeros((5,24))
                for num in range(len(previous_time)):
                    # 5(steps) * 81(4/5 108 units, overlapped by 1/5) * 0.006 = 2.43
                    if previous_time[-1] - previous_time[num] < 2.43:
                        checkpoint = num
                        break
                        
                start_time = previous_time[-1] - (81*5 + 20)*0.006
                if start_time < 0:
                    start_time = 0
                
                for num in range(checkpoint, len(y_train)):
                    position = int(((previous_time[num]-start_time)//0.006)//81) 
                    lstm_input[position] = y_train[num]
                lstm_all_inputs.append(lstm_input)
                
                #append y_train
                y_train.append(one_row)
                y_all_train.append(one_row)
                
                #append cnn input
                cnn_input = spectrum[:, int(previous_time[-1]//0.006-20):int(previous_time[-1]//0.006+88)]
                cnn_all_inputs.append(cnn_input)
                
                one_row = np.zeros(24)
                one_row[note[file][i]] = 1
                previous_time.append(time[file][i])
                
        #check if the last cnn_input has right shape
        if cnn_all_inputs[-1].shape != (108, 108):
            cnn_all_inputs = cnn_all_inputs[:-1]
            lstm_all_inputs = lstm_all_inputs[:-1]
            y_all_train = y_all_train[:-1]
            
    lstm_all_inputs = np.array(lstm_all_inputs)
    cnn_all_inputs = np.array(cnn_all_inputs).reshape(len(cnn_all_inputs), 108, 108, 1)
    y_all_train = np.array(y_all_train)
    return lstm_all_inputs, cnn_all_inputs, y_all_train

In [8]:
save = 'E:/notes_database/'

In [9]:
count = 0
number = 0
lstm_save, cnn_save, y_train_save = prepare_inputs(files[0])
for i in range(len(files))[1:]:
    if number < 5:
        lstm, cnn, y_train = prepare_inputs(files[i])
        lstm_save = np.append(lstm_save, lstm, axis=0)
        cnn_save = np.append(cnn_save, cnn, axis=0)
        y_train_save = np.append(y_train_save, y_train, axis=0)
        number += 1
    elif number >= 5:
        np.save(save+'lstm_inputs_1/{}.npy'.format(count), lstm_save.astype(np.float16))
        np.save(save+'cnn_inputs_1/{}.npy'.format(count), cnn_save.astype(np.float16))
        np.save(save+'y_train_1/{}.npy'.format(count), y_train_save.astype(np.float16))
        count += 1
        number = 1
        lstm_save, cnn_save, y_train_save = prepare_inputs(files[i])
        
np.save(save+'lstm_inputs_1/{}.npy'.format(count), lstm_save.astype(np.float16))
np.save(save+'cnn_inputs_1/{}.npy'.format(count), cnn_save.astype(np.float16))
np.save(save+'y_train_1/{}.npy'.format(count), y_train_save.astype(np.float16))

In [10]:
spectrums = 'E:/notes_database/spectrograms_2/'

In [11]:
count = 0
number = 0
lstm_save, cnn_save, y_train_save = prepare_inputs(files[0])
for i in range(len(files))[1:]:
    if number < 5:
        lstm, cnn, y_train = prepare_inputs(files[i])
        lstm_save = np.append(lstm_save, lstm, axis=0)
        cnn_save = np.append(cnn_save, cnn, axis=0)
        y_train_save = np.append(y_train_save, y_train, axis=0)
        number += 1
    elif number >= 5:
        np.save(save+'lstm_inputs_2/{}.npy'.format(count), lstm_save.astype(np.float16))
        np.save(save+'cnn_inputs_2/{}.npy'.format(count), cnn_save.astype(np.float16))
        np.save(save+'y_train_2/{}.npy'.format(count), y_train_save.astype(np.float16))
        count += 1
        number = 1
        lstm_save, cnn_save, y_train_save = prepare_inputs(files[i])
        
np.save(save+'lstm_inputs_2/{}.npy'.format(count), lstm_save.astype(np.float16))
np.save(save+'cnn_inputs_2/{}.npy'.format(count), cnn_save.astype(np.float16))
np.save(save+'y_train_2/{}.npy'.format(count), y_train_save.astype(np.float16))