In [53]:
NUM_TRACKS = 4
NUM_TIMESHIFTS = 100
TIMESHIFT_LENGTH = 1./NUM_TIMESHIFTS

In [54]:
track1 = [{"type": "note_on", "note": 72, "time": 0.2}, {"type": "note_off", "note": 72, "time": 0.04}]
track2 = [{"type": "note_on", "note": 65, "time": 0}, {"type": "note_off", "note": 65, "time": 0.21}]
track3 = [{"type": "note_on", "note": 63, "time": 0}, {"type": "note_off", "note": 63, "time": 0.2}]
track4 = [{"type": "note_on", "note": 61, "time": 0}, {"type": "note_off", "note": 61, "time": 0.3}]
tracks = [track1, track2, track3, track4]
tracks

[[{'note': 72, 'time': 0.2, 'type': 'note_on'},
  {'note': 72, 'time': 0.04, 'type': 'note_off'}],
 [{'note': 65, 'time': 0, 'type': 'note_on'},
  {'note': 65, 'time': 0.21, 'type': 'note_off'}],
 [{'note': 63, 'time': 0, 'type': 'note_on'},
  {'note': 63, 'time': 0.2, 'type': 'note_off'}],
 [{'note': 61, 'time': 0, 'type': 'note_on'},
  {'note': 61, 'time': 0.3, 'type': 'note_off'}]]

In [55]:
def tracks_to_vector_sequence(tracks):
    """Get final vector from note sequence dictionaries.

    Args:
       tracks: List of tracks containing note sequence dictionaries with type (note_on or note_off),
       note, and time delay before note. Tracks are sorted from highest to lowest.
       
    Returns:
        Final one-hot vector containing 128 * NUM_TRACKS note_on events, 128 * NUM_TRACKS note_off events,
        and NUM_TIMESHIFTS timeshift events in intervals of 10 ms each.
    """
    assert len(tracks) == NUM_TRACKS
    
    # Sort all events by start time.
    events = []
    for i, sequence in enumerate(tracks):
        start_time = 0
        for event in sequence:
            start_time += event["time"]
            new_event = {"type": event["type"], "track": i, "note": event["note"],
                         "start_time": start_time}
            events.append(new_event)
    events = sorted(events, key=lambda x: x["start_time"])
    
    # Fix time lengths.
    prev_start_time = 0
    for e in events:
        e["time"] = e["start_time"] - prev_start_time
        prev_start_time = e["start_time"]
    
    # Create final vector sequence.
    final_sequence = []
    for e in events:
        time_vector = [0] * (128 * 2 * NUM_TRACKS + NUM_TIMESHIFTS)
        
        # Create rest vectors if time is greater than 1 second.
        if e["time"] > 0:
            time = e["time"]
            while time > 1.0:
                rest_vector = [0] * (128 * 2 * NUM_TRACKS + NUM_TIMESHIFTS)
                rest_vector[-1] = 1
                final_sequence.append(rest_vector)
                time -= 1.0
            timeshift = int(np.ceil(time / TIMESHIFT_LENGTH) - 1)
            time_vector[128 * 2 * NUM_TRACKS + timeshift] = 1
            final_sequence.append(time_vector)
        
        note_vector = [0] * (128 * 2 * NUM_TRACKS + NUM_TIMESHIFTS)
        track_offset = 128 * e["track"]
        # Set correct note.
        if e["type"] == 'note_on':
            note_vector[track_offset + e["note"]] = 1
        else:
            note_vector[128 * NUM_TRACKS + track_offset + e["note"]] = 1
        final_sequence.append(note_vector)
        
    return final_sequence
        

In [56]:
tracks_to_vector_sequence(tracks)

[[0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
