In [1]:
import import_ipynb

from Midi_processing import *

importing Jupyter notebook from Midi_processing.ipynb


## Dataset creation and data transformations

In this section we first write a few helper functions that will assist us in the dataset creation function (create_dataset)

#### Function that converts ticks to seconds

In [3]:
def ticks_to_seconds(midi_file, ticks):

    ticks_per_beat = midi_file.ticks_per_beat # this is a buil-in attribute of midi_files in Mido
    beats_per_sec = 1/(midi_file.tracks[0][0].dict()['tempo']/1000000)  
    secs_per_tick = 1/(ticks_per_beat*beats_per_sec)
    
    return secs_per_tick*ticks

#### One Hot Encode function

In [4]:
def one_hot_encode(element, length):
    
    one_hot = np.zeros(128)
    one_hot[element] = 1
    
    return one_hot    

#### Dataset creation function

In [5]:
def create_dataset(midi_files=None, batch_size=None):
    
    '''
    Arguments:
        midi_files: this is of type list and contains a list of all the our songs(midi files).
        batch_size: integer that represents the number of batches that we will provide later in the network.
                    we use it in order to separate different songs with a number of events equal to the batch size.
                    this way there is not going to be a batch with events from two different songs.
                    
    Returns a list of midi events. 
    Each midi event is a list that contains three elements:
        - the note, one_hot_encoded into a 128-length np.array
        - velocity, as an integer
        - time, as a float in the form of seconds
    '''
    
    dataset = []

    for song in midi_files:
        for msg in song.tracks[1]:
            if msg.dict()['type'] == 'note_on':
                one_hot_note = one_hot_encode(msg.dict()['note'], 128)
                dataset.append([one_hot_note, msg.dict()['velocity'], ticks_to_seconds(song, msg.dict()['time'])])
            else:
                for j in range(batch_size):
                    dataset.append([np.zeros(128), 0, 0])
                    
    return dataset

In [6]:
batch_size = 8   # denote batch_size

# Create dataset
DATA = create_dataset(midi_files=processed_songs, batch_size=batch_size)

In [7]:
# Convert it into a numpy array
DATA = np.array(DATA)

# print(f'Our dataset consists of {DATA.shape[0]} events.\nEach event consists of {DATA.shape[1]} elements.')
# print (f'\nIn other words our dataset consists of {DATA.shape[0]} rows and {DATA.shape[1]} columns.')

Our dataset consists of 1631120 events.
Each event consists of 3 elements.

In other words our dataset consists of 1631120 rows and 3 columns.


#### Isolate _note_ , _velocity_  and _time_ into different arrays

In [8]:
note_dataset = DATA.swapaxes(0,1)[0]      # get only the column with the note sequence
velocity_dataset = DATA.swapaxes(0,1)[1]   # get only the column with the velocity sequence
time_dataset = DATA.swapaxes(0,1)[2]       # get only the column with the time sequence

#### Create inputs and targets for each one of the isolated attributes

In [9]:
note_inputs = []
note_targets = []

velocity_inputs = []
velocity_targets = []

time_inputs = []
time_targets = []

datasets = {'note_dataset':note_dataset, 'velocity_dataset':velocity_dataset, 'time_dataset':time_dataset}

for dataset in datasets:
    if dataset == 'note_dataset':
        for inp, target in zip(note_dataset[:-1], note_dataset[1:]):
            note_inputs.append(inp)
            note_targets.append(target)
            
    elif dataset == 'velocity_dataset':
        for inp, target in zip(velocity_dataset[:-1], velocity_dataset[1:]):
            velocity_inputs.append(inp)
            velocity_targets.append(target)   
    
    else:
        for inp, target in zip(time_dataset[:-1], time_dataset[1:]):
            time_inputs.append(inp)
            time_targets.append(target)

__Add as many zero elements as needed in the end of input and target list, in order to make them dividable into batches of our given batch_size.__

In [10]:
while len(note_inputs)%batch_size != 0:
    note_inputs.append(np.zeros(128))
    note_targets.append(np.zeros(128))
    velocity_inputs.append(0)
    velocity_targets.append(0)
    time_inputs.append(0)
    time_targets.append(0)
# print (f'The new size of our input lists will be {len(note_inputs)}.')

The new size of our input lists will be 1631120.


In [11]:
# Convert all the input lists into numpy arrays

note_inputs, note_targets = np.array(note_inputs), np.array(note_targets)
velocity_inputs, velocity_targets = np.array(velocity_inputs).reshape(-1,1), np.array(velocity_targets).reshape(-1,1)
time_inputs, time_targets = np.array(time_inputs).reshape(-1,1), np.array(time_targets).reshape(-1,1)

#### Transform the _velocity_ and _time_ attributes so that they are in the same scale.

In [12]:
velocity_inputs_scaler = StandardScaler() # initiate our scaler
velocity_targets_scaler = StandardScaler()
time_inputs_scaler = StandardScaler()
time_targets_scaler = StandardScaler()

# Fit the scaler to the data we want to transform. Fit means that it finds the data mean and std.
velocity_inputs_scaler.fit(velocity_inputs)
velocity_inputs_transformed = velocity_inputs_scaler.transform(velocity_inputs)

velocity_targets_scaler.fit(velocity_targets)
velocity_targets_transformed = velocity_targets_scaler.transform(velocity_targets)

time_inputs_scaler.fit(time_inputs)
time_inputs_transformed = time_inputs_scaler.transform(time_inputs)

time_targets_scaler.fit(time_targets)
time_targets_transformed = time_targets_scaler.transform(time_targets)

#### Organize note, velocity and time datasets into batches, or sequences for the LSTM inputs

In [14]:
note_inputs_batched = note_inputs.reshape(-1, batch_size, 128)
note_targets_batched = note_targets.reshape(-1, batch_size, 128)

velocity_inputs_transformed_batched = velocity_inputs_transformed.reshape(-1, batch_size, 1)
velocity_targets_transformed_batched = velocity_targets_transformed.reshape(-1, batch_size, 1)

time_inputs_transformed_batched = time_inputs_transformed.reshape(-1, batch_size, 1)
time_targets_transformed_batched = time_targets_transformed.reshape(-1, batch_size, 1)

In [39]:
note_inputs_batched.shape

(203890, 8, 128)