In [1]:
import os
import ast
import random
import time

from music21 import stream, note, chord, midi, converter
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, Subset, random_split
from sklearn.model_selection import KFold

root = 'data_processed/'

# Generation with Recurrent Neural Networks

To establish a baseline of music generation that we can improve on, we use Recurrent Neural Networks. We formulate the problem as a next-note prediciton problem. This method is quite similar to  recurrence-based language models that are used in NLP.

The input is sequential, but unlike words in NLP, timing and dynamics (duration, velocity, offset) matter a lot in music. To be able to predict notes/chords + durations + offsets + velocities we might need multi-output heads (e.g., softmax for notes/chords/velocities, regression for durations/offsets).

## Import Dataset and Definition of Useful functions

### Import Dataset

In [6]:
def safe_parse_all_columns_df(df):
    """
    Parse all columns in a DataFrame to numeric, coercing errors.
    """
    df['notes'] = df['notes'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
    df['chords'] = df['chords'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
    df['velocities'] = df['velocities'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
    df['durations'] = df['durations'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
    df['offsets'] = df['offsets'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
    df['ordered_events'] = df['ordered_events'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
    return df

def load_dataframe_from_two_csvs(file1, file2):
    """
    Load and concatenate two CSV files into a single pandas DataFrame.
    """
    df1 = pd.read_csv(file1)
    df2 = pd.read_csv(file2)
    full_df = pd.concat([df1, df2], ignore_index=True)
    full_df = safe_parse_all_columns_df(full_df)

    return full_df

def save_dataframe_to_two_csvs(df, file1, file2):
    """
    Split a DataFrame in half and save it into two CSV files.
    """
    halfway = len(df) // 2
    df.iloc[:halfway].to_csv(file1, index=False)
    df.iloc[halfway:].to_csv(file2, index=False)

def load_dataframe_from_one_csv(file):
    """
    Load a DataFrame from a single CSV file.
    """
    df = pd.read_csv(file)
    
    return df

def save_dataframe_to_one_csv(df, file):
    """
    Save a DataFrame to a single CSV file.
    """
    df.to_csv(file, index=True)

def load_reconstructed_events(file):
    """
    Loads the reconstructed events CSV and safely parses the 'sequence' column,
    converting notes to integers and chords to lists of integers.
    """
    df = pd.read_csv(file)

    def safe_parse(seq_str):
        try:
            parsed = ast.literal_eval(seq_str)
            if not isinstance(parsed, list):
                raise ValueError("Parsed sequence is not a list")

            normalized = []
            for el in parsed:
                if isinstance(el, list):
                    normalized.append([int(x) for x in el])
                else:
                    normalized.append(int(el))
            return normalized

        except Exception as e:
            print(f"Error parsing sequence: {seq_str}")
            raise e

    df['sequence'] = df['sequence'].apply(safe_parse)
    return df

In [7]:
file1 = root + 'data_part1.csv'
file2 = root + 'data_part2.csv'

df = load_dataframe_from_two_csvs(file1, file2)

### Useful functions

In [8]:
def parse_chord_to_list(chord):
    """
    Convert a chord string to a list of integers.
    """
    if isinstance(chord, str):
        print([int(x) for x in chord.split(',') if x.isdigit()])
        return [int(x) for x in chord.split(',') if x.isdigit()]
    return []

def reconstruct_ordered_events(df):
    """
    Reconstruct the ordered list of events (notes and chords) for each song.
    """
    sequences  = []

    for i in range(len(df)):
        idx_note = 0
        idx_chord = 0
        reconstructed = []

        for element in df['ordered_events'][i]:
            if element == 'n':
                reconstructed.append(df['notes'][i][idx_note])
                idx_note += 1
            elif element == 'c':
                parsed_chord = parse_chord_to_list(df['chords'][i][idx_chord])
                reconstructed.append(df['chords'][i][idx_chord])
                idx_chord += 1
            else:
                raise ValueError(f"Unknown event type: {e}")
        
        sequences.append(reconstructed)

    reconstructed_dataset = pd.DataFrame({'sequence': sequences})
    reconstructed_dataset.index.name = 'index'

    return reconstructed_dataset

In [None]:
save_dataframe_to_one_csv(reconstruct_ordered_events(df), root + 'reconstructed_ordered_events.csv')

## Predict only Events (Notes and Chords)

### Creating the data: Fixed number of events 

Idea for creating the input sequences:
- we take subsets of the list of events representing each song 
- we take the next event of each subset as corresponding training output sequences

This is easy to implement and we will have a consistent sequence lenght for batching, but we are ignoring the timing aspect.

In [9]:
class Vocabulary:
    def __init__(self, reconstructed_df):
        """
        Build vocabulary of unique single notes only.
        """
        self.notes = set()
        for i in range(len(reconstructed_df)):
            sequence = reconstructed_df['sequence'][i]
            for event in sequence:
                if isinstance(event, list):
                    for note in event:
                        self.notes.add(note)
                else:
                    self.notes.add(event)

        self.notes = sorted(self.notes)
        self.note_to_idx = {note: idx for idx, note in enumerate(self.notes)}
        self.idx_to_note = {idx: note for idx, note in enumerate(self.notes)}
        self.vocab_size = len(self.notes)

    def encode_event(self, event):
        """
        Encode an event as a multi-hot vector over single notes.
        """
        vec = np.zeros(self.vocab_size, dtype=np.float32)
        if isinstance(event, list):
            for note in event:
                vec[self.note_to_idx[note]] = 1.0
        else:
            vec[self.note_to_idx[event]] = 1.0
        return vec

    def decode_event(self, vec, threshold=0.5):
        """
        Decode multi-hot vector to list of notes.
        """
        indices = np.where(vec >= threshold)[0]
        notes = [self.idx_to_note[idx] for idx in indices]
        if len(notes) == 1:
            return notes[0]
        else:
            return notes

    def __len__(self):
        return self.vocab_size


Create Dataset object

In [10]:
class MusicEventDataset(Dataset):
    def __init__(self, reconstructed_df, vocab, seq_length=50):
        """
        Constructs all valid (input_seq, target_event) pairs from each song in the dataset.

        Args:
            reconstructed_df: DataFrame with 'sequence' column where each entry is a list of events
            vocab: Vocabulary object to encode events
            seq_length: Length of each training input sequence (target is the next event)
        """
        self.samples = []
        self.seq_length = seq_length
        self.vocab = vocab

        for row_index in range(len(reconstructed_df)):
            sequence = reconstructed_df['sequence'][row_index]
            n_events = len(sequence)

            if n_events <= seq_length:
                continue

            for i in range(n_events - seq_length):
                input_seq = sequence[i : i + seq_length]
                target_event = sequence[i + seq_length]
                self.samples.append((input_seq, target_event))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        input_seq, target_event = self.samples[idx]

        input_encoded = np.array([self.vocab.encode_event(event) for event in input_seq], dtype=np.float32)
        input_tensor = torch.tensor(input_encoded)

        target_encoded = self.vocab.encode_event(target_event)
        target_tensor = torch.tensor(target_encoded, dtype=torch.float32)

        return input_tensor, target_tensor


In [10]:
reconstructed_dataset = load_reconstructed_events(root + 'reconstructed_ordered_events.csv')

vocab = Vocabulary(reconstructed_dataset)

dataset = MusicEventDataset(reconstructed_dataset, vocab=vocab, seq_length=16)

x, y = dataset[0]

print("Input sequence shape:", x.shape)
print("Next event shape:", y.shape)
print("Input sequence (multi-hot vectors):", x)
print("Next event (multi-hot vector):", y)

Input sequence shape: torch.Size([16, 88])
Next event shape: torch.Size([88])
Input sequence (multi-hot vectors): tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
Next event (multi-hot vector): tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])


### Model, Metrics and Cross-validation over sequence length

Model

In [11]:
class MusicEventRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(MusicEventRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.rnn = nn.LSTM(input_size=input_size, hidden_size=hidden_size, 
                           num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x, hidden=None):
        out, hidden = self.rnn(x, hidden)  
        out_last = out[:, -1, :]
        out = self.fc(out_last)

        return out, hidden

Cross-validation Loop on Sequence Length

In [12]:
reconstructed_dataset = load_reconstructed_events(root + 'reconstructed_ordered_events.csv')
vocab = Vocabulary(reconstructed_dataset)

In [13]:
def train_model(model, dataset, epochs=10, batch_size=32, seq_length=16, val_split=0.1, print_every=5, checkpoint_path="best_model.pth"):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    model = model.to(device)

    train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    criterion = torch.nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters())

    for epoch in range(1, epochs + 1):
        model.train()
        running_loss = 0.0
        for batch_idx, (x_batch, y_batch) in enumerate(train_loader, 1):
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)

            optimizer.zero_grad()
            outputs, _ = model(x_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            if batch_idx % print_every == 0 or batch_idx == len(train_loader):
                avg_loss = running_loss / batch_idx
                print(f"Epoch {epoch}, Batch {batch_idx}/{len(train_loader)}, Avg Loss: {avg_loss:.4f}")

    print("Training done.")


In [14]:
def evaluate_model(model, val_dataset, batch_size=128):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    model.eval()

    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    criterion = nn.BCEWithLogitsLoss()
    val_loss = 0.0

    with torch.no_grad():
        for x_val, y_val in val_loader:
            x_val, y_val = x_val.to(device), y_val.to(device)
            outputs, _ = model(x_val)
            loss = criterion(outputs, y_val)
            val_loss += loss.item()

    return val_loss / len(val_loader)


In [20]:
sequence_lengths = [8, 16, 32]
hidden_sizes = [64, 128, 256]
num_layers_options = [1]
k_folds = 5
results = {}

generator = torch.Generator().manual_seed(42)

for seq_len in sequence_lengths:
    print(f"\n=== Tuning for sequence length: {seq_len} ===")
    dataset = MusicEventDataset(reconstructed_dataset, vocab=vocab, seq_length=seq_len)

    total_size = len(dataset)
    test_size = int(0.1 * total_size)
    train_size = total_size - test_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size], generator=generator)

    for hidden_size in hidden_sizes:
        for num_layers in num_layers_options:
            fold_losses = []
            print(f"\n-- hidden_size={hidden_size}, num_layers={num_layers} --")

            kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)

            for fold, (train_idx, val_idx) in enumerate(kf.split(train_dataset)):
                print(f"  Fold {fold+1}/{k_folds}")
                train_subset = Subset(train_dataset, train_idx)
                val_subset = Subset(train_dataset, val_idx)

                model = MusicEventRNN(
                    input_size=len(vocab),
                    hidden_size=hidden_size,
                    output_size=len(vocab),
                    num_layers=num_layers
                )

                train_model(
                    model=model,
                    dataset=train_subset,
                    epochs=3,
                    batch_size=128,
                    seq_length=seq_len,
                    print_every=10000,
                    checkpoint_path=None
                )

                val_loss = evaluate_model(model, val_subset, batch_size=128)
                print(f"    -> Validation Loss: {val_loss:.4f}")
                fold_losses.append(val_loss)

            avg_loss = sum(fold_losses) / len(fold_losses)
            results[(seq_len, hidden_size, num_layers)] = avg_loss

print("\n=== K-Fold Tuning Results ===")
for (seq_len, hidden_size, num_layers), val_loss in results.items():
    print(f"Seq Len: {seq_len}, Hidden: {hidden_size}, Layers: {num_layers} => Avg Val Loss: {val_loss:.4f}")



=== Tuning for sequence length: 8 ===

-- hidden_size=64, num_layers=1 --
  Fold 1/5
Using device: cpu
Epoch 1, Batch 10000/25282, Avg Loss: 0.0830
Epoch 1, Batch 20000/25282, Avg Loss: 0.0800
Epoch 1, Batch 25282/25282, Avg Loss: 0.0792
Epoch 2, Batch 10000/25282, Avg Loss: 0.0762
Epoch 2, Batch 20000/25282, Avg Loss: 0.0762
Epoch 2, Batch 25282/25282, Avg Loss: 0.0761
Epoch 3, Batch 10000/25282, Avg Loss: 0.0759
Epoch 3, Batch 20000/25282, Avg Loss: 0.0759
Epoch 3, Batch 25282/25282, Avg Loss: 0.0759
Training done.
    -> Validation Loss: 0.0758
  Fold 2/5
Using device: cpu
Epoch 1, Batch 10000/25282, Avg Loss: 0.0830
Epoch 1, Batch 20000/25282, Avg Loss: 0.0799
Epoch 1, Batch 25282/25282, Avg Loss: 0.0792
Epoch 2, Batch 10000/25282, Avg Loss: 0.0762
Epoch 2, Batch 20000/25282, Avg Loss: 0.0761
Epoch 2, Batch 25282/25282, Avg Loss: 0.0761
Epoch 3, Batch 10000/25282, Avg Loss: 0.0759
Epoch 3, Batch 20000/25282, Avg Loss: 0.0759
Epoch 3, Batch 25282/25282, Avg Loss: 0.0758
Training do

KeyboardInterrupt: 

The best model we have found is the one with sequence length 32 and hidden layer size 256

### Training and Music Generation

In [15]:
seq_len = 32
hidden_size = 256
num_layers = 1
batch_size = 128
epochs = 5
learning_rate = 0.001
checkpoint_path = "music_gen_seq32_hidden256.pth"

dataset = MusicEventDataset(reconstructed_dataset, vocab=vocab, seq_length=seq_len)
generator = torch.Generator().manual_seed(42)
total_size = len(dataset)
test_size = int(0.1 * total_size)
train_size = total_size - test_size
train_dataset, _ = random_split(dataset, [train_size, test_size], generator=generator)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

model = MusicEventRNN(
    input_size=len(vocab),
    hidden_size=hidden_size,
    output_size=len(vocab),
    num_layers=num_layers
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

model.load_state_dict(torch.load(checkpoint_path, map_location=device))

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.BCEWithLogitsLoss()

print(f"Continuing training for {epochs} more epochs with seq_len={seq_len}, hidden_size={hidden_size}...")
for epoch in range(1, epochs + 1):
    model.train()
    total_loss = 0

    for batch_idx, (x_batch, y_batch) in enumerate(train_loader, start=1):
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()
        outputs, _ = model(x_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        if batch_idx % 10000 == 0:
            avg_loss_so_far = total_loss / batch_idx
            print(f"Epoch {epoch}, Batch {batch_idx}/{len(train_loader)} - Avg Loss So Far: {avg_loss_so_far:.4f}")

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch}/{epochs} - Avg Loss: {avg_loss:.4f}")

torch.save(model.state_dict(), checkpoint_path)
print(f"Model saved as '{checkpoint_path}'")

Continuing training for 5 more epochs with seq_len=32, hidden_size=256...
Epoch 1, Batch 10000/31134 - Avg Loss So Far: 0.0727
Epoch 1, Batch 20000/31134 - Avg Loss So Far: 0.0728
Epoch 1, Batch 30000/31134 - Avg Loss So Far: 0.0728
Epoch 1/5 - Avg Loss: 0.0728
Epoch 2, Batch 10000/31134 - Avg Loss So Far: 0.0726
Epoch 2, Batch 20000/31134 - Avg Loss So Far: 0.0726
Epoch 2, Batch 30000/31134 - Avg Loss So Far: 0.0726
Epoch 2/5 - Avg Loss: 0.0726
Epoch 3, Batch 10000/31134 - Avg Loss So Far: 0.0725
Epoch 3, Batch 20000/31134 - Avg Loss So Far: 0.0725
Epoch 3, Batch 30000/31134 - Avg Loss So Far: 0.0725
Epoch 3/5 - Avg Loss: 0.0725
Epoch 4, Batch 10000/31134 - Avg Loss So Far: 0.0724
Epoch 4, Batch 20000/31134 - Avg Loss So Far: 0.0724
Epoch 4, Batch 30000/31134 - Avg Loss So Far: 0.0724
Epoch 4/5 - Avg Loss: 0.0725
Epoch 5, Batch 10000/31134 - Avg Loss So Far: 0.0723
Epoch 5, Batch 20000/31134 - Avg Loss So Far: 0.0723
Epoch 5, Batch 30000/31134 - Avg Loss So Far: 0.0724
Epoch 5/5 - Avg

Generate Music

In [None]:
seq_len = 32
hidden_size = 256
num_layers = 1

dataset = MusicEventDataset(reconstructed_dataset, vocab=vocab, seq_length=seq_len)

generator = torch.Generator().manual_seed(42)
total_size = len(dataset)
test_size = int(0.1 * total_size)
train_size = total_size - test_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size], generator=generator)

model = MusicEventRNN(
    input_size=len(vocab),
    hidden_size=hidden_size,
    output_size=len(vocab),
    num_layers=num_layers
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.load_state_dict(torch.load("music_gen_seq32_hidden256.pth", map_location=device))
model = model.to(device)

In [None]:
def generate_music_multihot(model, vocab, start_sequence, max_length=100, temperature=1.0, device='cpu', threshold=0.3):
    """
    Generate music with a model that outputs multi-hot note vectors.
    """
    model.eval()
    generated = start_sequence.copy()
    input_seq = torch.tensor([vocab.encode_event(ev) for ev in start_sequence], dtype=torch.float32, device=device).unsqueeze(0)
    hidden = None

    for _ in range(max_length):
        with torch.no_grad():
            output, hidden = model(input_seq, hidden)
            logits = output.squeeze(0)
            probs = torch.softmax(logits / temperature, dim=0)

            bernoulli_samples = torch.bernoulli(probs).int()
            sampled = (bernoulli_samples == 1).nonzero(as_tuple=True)[0].tolist()

            if not sampled:
                topk = probs.topk(3).indices.tolist()
                sampled = topk[:random.randint(1, 3)]

            decoded_event = [vocab.idx_to_note[idx] for idx in sampled]
            if len(decoded_event) == 1:
                decoded_event = decoded_event[0]
                
            generated.append(decoded_event)

            input_seq = torch.tensor([[vocab.encode_event(decoded_event)]], dtype=torch.float32, device=device)

    return generated[len(start_sequence):]

Trial for a single sequence

In [100]:
start_sequence = reconstructed_dataset['sequence'][1][:32]
threshold = 0.7

generated_events = generate_music_multihot(model, vocab, start_sequence, max_length=50, temperature=1, device=device, threshold=threshold)

print("Generated sequence of notes/chords:")
print(generated_events)

Generated sequence of notes/chords:
[[61, 65], 68, [61, 65], [49, 56], 61, [44, 65], 65, 56, [46, 56], 61, 62, [61, 65], 62, 55, 61, [56, 69], 73, 61, 61, 68, [61, 64, 70], [61, 70, 72], [37, 53], 73, 61, [61, 63, 58], 80, 80, [49, 61, 73, 84], 56, [61, 63], 65, [68, 70, 73], [75, 73], 44, [63, 75, 80], 87, [39, 75, 77], 54, 56, 61, [61, 63], [61, 63], 75, 68, 90, [87, 75], [75, 87, 61], 87, 87]


Multiple generations

In [None]:
def save_to_midi(events, filename="generated_from_test.mid", default_duration=0.5):
    from music21 import stream, note, chord, duration, midi

    part = stream.Part()
    current_offset = 0.0

    for ev in events:
        if isinstance(ev, list):
            m21_event = chord.Chord(ev)
        else:
            m21_event = note.Note(ev)

        m21_event.duration = duration.Duration(default_duration)
        part.insert(current_offset, m21_event)

        current_offset += default_duration

    score = stream.Score()
    score.insert(0, part)

    mf = midi.translate.music21ObjectToMidiFile(score)
    mf.open(filename, 'wb')
    mf.write()
    mf.close()


In [None]:
seq_len = 32
vocab = Vocabulary(reconstructed_dataset)
dataset = MusicEventDataset(reconstructed_dataset, vocab, seq_length=seq_len)

generator = torch.Generator().manual_seed(42)
test_size = int(0.1 * len(dataset))
train_size = len(dataset) - test_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size], generator=generator)

In [104]:
model = MusicEventRNN(input_size=vocab.vocab_size, hidden_size=256, output_size=vocab.vocab_size)
model.load_state_dict(torch.load("music_gen_seq32_hidden256.pth", map_location='cpu'))
model.eval()

MusicEventRNN(
  (rnn): LSTM(88, 256, batch_first=True)
  (fc): Linear(in_features=256, out_features=88, bias=True)
)

In [None]:
root_folder = "generated_music_rnn/"
for i in range(100):
    x, _ = test_dataset[random.randint(0, len(test_dataset)-1)]
    start_seq = [vocab.decode_event(vec) for vec in x]
    generated_events = generate_music_multihot(model, vocab, start_seq, max_length=50, temperature=1)

    save_to_midi(generated_events, root_folder + f"dataset/sample_{i}.mid")

### Generate from popular songs

In [None]:
def parse_midi_to_event_sequence(midi_path):
    """
    Parses a MIDI file into a list of notes and chords.
    Chords become lists of pitch numbers, notes stay as integers.
    """
    score = converter.parse(midi_path)
    flat = score.flat.notes
    events = []

    for el in flat:
        if isinstance(el, note.Note):
            events.append(el.pitch.midi)
        elif isinstance(el, chord.Chord):
            events.append([p.midi for p in el.pitches])

    return events

def prime_and_generate_from_midi(midi_path, model, vocab, max_length=50, temperature=1.0, seq_len=32, output_file="output.mid", def_duration=0.5):
    """
    Loads a MIDI file, extracts a sequence of notes/chords, and uses it to generate new music.
    """
    original_sequence = parse_midi_to_event_sequence(midi_path)

    if len(original_sequence) < seq_len:
        raise ValueError(f"Sequence too short: {len(original_sequence)} events (need at least {seq_len})")

    start_sequence = original_sequence[60:60+seq_len]

    generated = generate_music_multihot(model, vocab, start_sequence, max_length=max_length, temperature=temperature)

    save_to_midi(generated, output_file, default_duration=def_duration)
    return output_file


In [262]:
from_folder = "popular_songs/"
name_of_song = "HipsDontLie.mid"
to_folder = "generated_music_rnn/popular/"
prime_and_generate_from_midi(from_folder + name_of_song, model, vocab, temperature=0.7, output_file=to_folder + name_of_song, def_duration=0.6)

'generated_music_rnn/popular/HipsDontLie.mid'