In [34]:
# !pip install miditoolkit 
# !pip install magenta

In [24]:
import os
import random
from miditoolkit import MidiFile
import matplotlib.pyplot as plt
from collections import Counter
import subprocess
import datetime
from miditok import REMI, TokenizerConfig
from miditok.pytorch_data import DatasetMIDI, DataCollator
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader


### Data Exploration

In [19]:
midi_dirpath = 'nesmdb_midi/'
midi_train_dirpath = os.path.join(midi_dirpath, 'train')
midi_test_dirpath = os.path.join(midi_dirpath, 'test')
midi_train_filesnames = os.listdir(midi_train_dirpath)
midi_test_filesnames = os.listdir(midi_test_dirpath)

midi_train_filepaths = [os.path.join(midi_train_dirpath, filename) for filename in midi_train_filesnames]
midi_test_filepaths = [os.path.join(midi_test_dirpath, filename) for filename in midi_test_filesnames]

In [15]:
def print_progress_bar(iteration, total, prefix='', length=50):
    percent = ("{0:.1f}").format(100 * (iteration / float(total)))
    filled_length = int(length * iteration // total)
    bar = '█' * filled_length + '-' * (length - filled_length)
    print(f'\r{prefix} |{bar}| {percent}% Complete', end='\r', flush=True)
    if iteration == total:
        print()

In [14]:
def to_pretty_midi(filepaths: list, num_samples: int = 100) -> list: 
    sampled_filepaths = random.sample(filepaths, num_samples)
    midis = []
    for i, filepath in enumerate(sampled_filepaths):
        print_progress_bar(i + 1, num_samples, prefix='Converting MIDI files to PrettyMIDI')
        try:
            midi = MidiFile(filepath)
            midis.append(midi)
        except Exception as e:
            print(f"Error processing {filepath}: {e}")
        
    return midis

In [16]:
midis = to_pretty_midi(midi_train_filepaths, num_samples=len(midi_train_filepaths))


Converting MIDI files to PrettyMIDI |██████████████████████████████████████████████████| 100.0% Complete


In [38]:
print(midis[0].ticks_per_beat)

22050


In [None]:
instrument_count_distribution = Counter(len(midi.instruments) for midi in midis)
unique_instruments = set([int(instrument.program) for midi in midis for instrument in midi.instruments])
unique_instruments_distribution = Counter([int(instrument.program) for midi in midis for instrument in midi.instruments])
instrument_sets_distribution = Counter(tuple(sorted([int(instrument.program) for instrument in midi.instruments])) for midi in midis)
print("Instrument count distribution:", dict(instrument_count_distribution))
print("Unique instruments:", unique_instruments)
print("Unique instruments distribution:", dict(unique_instruments_distribution))
print("Instrument set distribution:", dict(instrument_sets_distribution))

Instrument count distribution: {3: 1509, 4: 2439, 2: 450, 1: 102, 0: 2}
Unique instruments: {80, 81, 38, 121}
Unique instruments distribution: {80: 4334, 81: 4259, 38: 4045, 121: 2647}
Instrument set distribution: {(38, 80, 81): 1327, (38, 80, 81, 121): 2439, (80, 81): 277, (38, 81): 58, (38, 80, 121): 50, (38, 80): 97, (80, 81, 121): 95, (38,): 29, (38, 121): 8, (80,): 41, (38, 81, 121): 37, (81,): 24, (80, 121): 8, (121,): 8, (): 2, (81, 121): 2}


### Custom LSTM

In [10]:
TOKENIZER_SAMPLE_SIZE = 1000

In [18]:
config = TokenizerConfig(
    use_programs=True,
    programs=list(unique_instruments),
    num_velocities=1,
    use_chords=False,
    use_tempos=False,
    use_time_signatures=False
)
tokenizer = REMI(config)
tokenizer_train_files = random.sample(midi_train_filepaths, min(TOKENIZER_SAMPLE_SIZE, len(midi_train_filepaths)))
tokenizer.train(vocab_size=1000, files_paths=tokenizer_train_files, )
tokenizer.save("custom_lstm_tokenizer.json")

  super().__init__(tokenizer_config, params)







In [None]:
class CustomMIDITokenizer( ):
    def __init__(self, time_divisions=32):
        self.time_divisions = time_divisions  # e.g., 32 levels of time shift
        self.special_tokens = {
            "PAD": 0,
            "BOS": 1,
            "EOS": 2,
        }

        self.note_on_offset = 3
        self.note_off_offset = self.note_on_offset + 128
        self.time_offset = self.note_off_offset + 128
        self.vocab_size = self.time_offset + self.time_divisions + 1


    
    def encode(self, midi):
        return self.tokenizer.encode(midi)
    
    def decode(self, tokens):
        return self.tokenizer.decode(tokens)

In [None]:

MAX_SEQUENCE_LENGTH = 512

tokenizer = REMI()  # using defaults parameters (constants.py)
train_dataset = DatasetMIDI(
    files_paths=midi_train_filepaths,
    tokenizer=tokenizer,
    max_seq_len=MAX_SEQUENCE_LENGTH,
    bos_token_id=tokenizer["BOS_None"],
    eos_token_id=tokenizer["EOS_None"],
)

test_dataset = DatasetMIDI(
    files_paths=midi_train_filepaths,
    tokenizer=tokenizer,
    max_seq_len=MAX_SEQUENCE_LENGTH,
    bos_token_id=tokenizer["BOS_None"],
    eos_token_id=tokenizer["EOS_None"],
)
collator = DataCollator(tokenizer.pad_token_id)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collator)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, collate_fn=collator)

511


In [25]:
class MusicRNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers):
        super(MusicRNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.LSTM(
            input_size=embedding_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True
        )
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x, hidden=None):
        # x: (batch_size, seq_length)
        x = self.embedding(x)  # (batch_size, seq_length, embedding_dim)
        out, hidden = self.rnn(x, hidden)  # out: (batch_size, seq_length, hidden_dim)
        out = self.fc(out)  # (batch_size, seq_length, vocab_size)
        return out, hidden

In [27]:
def train(model, train_loader, val_loader, vocab_size, num_epochs=20, lr=0.001, device='cuda'):
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(num_epochs):
        # --------- Training ---------
        model.train()
        total_train_loss = 0

        for batch in train_loader:
            batch = batch['input_ids'].to(device)  # (batch_size, seq_length)

            inputs = batch[:, :-1]
            targets = batch[:, 1:]

            optimizer.zero_grad()
            outputs, _ = model(inputs)
            outputs = outputs.reshape(-1, vocab_size)
            targets = targets.reshape(-1)

            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()

        avg_train_loss = total_train_loss / len(train_loader)

        # --------- Validation ---------
        model.eval()
        total_val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                batch = batch['input_ids'].to(device)

                inputs = batch[:, :-1]
                targets = batch[:, 1:]

                outputs, _ = model(inputs)
                outputs = outputs.reshape(-1, vocab_size)
                targets = targets.reshape(-1)

                loss = criterion(outputs, targets)
                total_val_loss += loss.item()

        avg_val_loss = total_val_loss / len(val_loader)

        print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")


# Example usage
if __name__ == "__main__":
    vocab_size = tokenizer.vocab_size
    embedding_dim = 256
    hidden_dim = 512
    num_layers = 2

    model = MusicRNN(vocab_size, embedding_dim, hidden_dim, num_layers)
    train(model, train_loader, test_loader, vocab_size, device='cpu')

Epoch 1/20 | Train Loss: 1.3571 | Val Loss: 0.9967
Epoch 2/20 | Train Loss: 0.9442 | Val Loss: 0.8875


KeyboardInterrupt: 

### Magenta LSTM Model (UNSUCCESSFUL)

In [None]:
# filter the MIDI files to only include instruments with program number 81
# This is used as a starting point to train the performance-rnn model (as it is best suited for single instrument MIDI files)
def filter_program_81(input_root, output_root):
    for split in ['train', 'valid', 'test']:
        input_dir = os.path.join(input_root, split)
        output_dir = os.path.join(output_root, split)
        os.makedirs(output_dir, exist_ok=True)
        
        for i, fname in enumerate(os.listdir(input_dir)):
            print_progress_bar(i+1, len(os.listdir(input_dir)), prefix=f'Processing {split} MIDI files')
            if not fname.lower().endswith('.mid'):
                continue
            path = os.path.join(input_dir, fname)
            midi = MidiFile(path)
            filtered_instr = [inst for inst in midi.instruments if inst.program == 81]
            
            if filtered_instr:
                midi.instruments = filtered_instr
                midi.dump(os.path.join(output_dir, fname))

# filter_program_81(midi_dirpath, 'nesmdb_midi_program_81') 

Processing train MIDI files |██████████████████████████████████████████████████| 100.0% Complete
Processing valid MIDI files |██████████████████████████████████████████████████| 100.0% Complete
Processing test MIDI files |██████████████████████████████████████████████████| 100.0% Complete


In [37]:
def midi_to_note_sequence(midi_dir):
    subprocess.run([
        'convert_dir_to_note_sequences',
        '--input_dir=nesmdb_81_midi/train',
        '--output_file=nesmdb_81_notesequences.tfrecord',
        '--recursive'
    ], check=True)

midi_to_note_sequence('nesmdb_81_midi/train')

Traceback (most recent call last):
  File "/Users/leofriedman/Desktop/ucsd/cse_253/cse153-group-project/.env/bin/convert_dir_to_note_sequences", line 5, in <module>
    from magenta.scripts.convert_dir_to_note_sequences import console_entry_point
  File "/Users/leofriedman/Desktop/ucsd/cse_253/cse153-group-project/.env/lib/python3.10/site-packages/magenta/__init__.py", line 17, in <module>
    import magenta.common.beam_search
  File "/Users/leofriedman/Desktop/ucsd/cse_253/cse153-group-project/.env/lib/python3.10/site-packages/magenta/common/__init__.py", line 20, in <module>
    from .nade import Nade
  File "/Users/leofriedman/Desktop/ucsd/cse_253/cse153-group-project/.env/lib/python3.10/site-packages/magenta/common/nade.py", line 24, in <module>
    import tensorflow_probability as tfp
ModuleNotFoundError: No module named 'tensorflow_probability'


CalledProcessError: Command '['convert_dir_to_note_sequences', '--input_dir=nesmdb_81_midi/train', '--output_file=nesmdb_81_notesequences.tfrecord', '--recursive']' returned non-zero exit status 1.

In [None]:
def train_performance_rnn():
    current_time = datetime.datetime.now()
    subprocess.run([
        "performance_rnn_train",
        f"--run_dir=logdir/performance_rnn_finetune/{current_time}",
        "--sequence_example_file=nesmdb_81_notesequences.tfrecord",
        '--hparams=batch_size=64,rnn_layer_sizes=[256,256]',
        "--num_training_steps=5000",
        "--bundle_file=performance_with_dynamics.mag",
        "--save_checkpoints_steps=100",
        "--alsologtostderr"
    ], check=True)

# train_performance_rnn()

In [None]:
def generate_midi_from_performance_rnn():
    current_time = datetime.datetime.now()
    subprocess.run([
        "performance_rnn_generate",
        "--bundle_file=performance_with_dynamics.mag",
        f"--output_dir=performance_rnn_generated/{current_time}",
        "--num_outputs=1",
        "--num_steps=4000",  # ~1 minute at 100 qpm
        "--hparams=batch_size=64,rnn_layer_sizes=[256,256]",
        "--condition_on_primer=false",
        "--inject_primer_during_generation=false",
        "--instrument=81",
        "--alsologtostderr"
    ], check=True)