In [1]:
import pretty_midi

from tqdm.auto import tqdm

from pathlib import Path

import torch

max_files = 1000

In [2]:
min_pitch = 21
max_pitch = 108
n_pitches = max_pitch - min_pitch + 1
sequence_length = 128

In [3]:
midi_files = list(Path(r"C:\Users\aniru\Desktop\projects\AI Music Generator\maestro-v3.0.0").rglob("*.midi"))

In [4]:
def create_roll(midi_file):
  ex = pretty_midi.PrettyMIDI(str(midi_file))
  instrument = ex.instruments[0]
  instrument.notes.sort(key = lambda note:note.start)
  roll = torch.zeros((len(instrument.notes), 4), dtype=torch.float32)
  prev_start_time = 0
  idx = 0
  for note in instrument.notes:
    # pitch: int32, velocity: int32, duration: float32, step: float32
    roll[idx][0] = note.pitch - min_pitch
    roll[idx][1] = note.velocity
    roll[idx][2] = note.end - note.start
    roll[idx][3] = note.start - prev_start_time
    prev_start_time = note.start
    idx += 1

  return roll

In [5]:
def create_sequence(roll, sequence_length):
  sequences = torch.zeros((len(roll) - sequence_length - 1, sequence_length, 4), dtype=torch.float32)
  targets = torch.zeros((len(roll) - sequence_length - 1, 4), dtype=torch.float32)
  
  for i in range(0, len(roll) - sequence_length - 1):
    sequences[i] = roll[i:i + sequence_length]
    targets[i] = roll[i + sequence_length]

  return sequences, targets

In [6]:
sequences = []
targets = []

count = 0
bar = tqdm(total=max_files)
for midi_file in midi_files:
  if (count == max_files):
    break
  count += 1
  roll = create_roll(midi_file)
  file_sequences, file_targets = create_sequence(roll, sequence_length)
  sequences.append(file_sequences)
  targets.append(file_targets)
  bar.update(1)
bar.close()

  0%|          | 0/1000 [00:00<?, ?it/s]

In [7]:
sequences_tensor = torch.cat(sequences, dim=0)
targets_tensor = torch.cat(targets, dim=0)

torch.save({"sequences": sequences_tensor, "targets": targets_tensor}, f"dataset/maestro-{max_files}.pth")

In [8]:
print(sequences_tensor.shape)

torch.Size([5195653, 128, 4])
