In [2]:
!pip install tensorflow
!pip install miditok
!pip install pretty_midi

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Collecting miditok
  Using cached miditok-3.0.5.post1-py3-none-any.whl.metadata (10 kB)
Collecting huggingface-hub>=0.16.4 (from miditok)
  Downloading huggingface_hub-0.32.3-py3-none-any.whl.metadata (14 kB)
Collecting symusic>=0.5.0 (from miditok)
  Downloading symusic-0.5.8-cp312-cp312-win_amd64.whl.metadata (9.0 kB)
Collecting tokenizers>=0.13.0 (from miditok)
  Using cached tokenizers-0.21.1-cp39-abi3-win_amd64.whl.metadata (6.9 kB)
Collecting tqdm (from miditok)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting filelock (from huggingface-hub>=0.16.4->miditok)
  Using cached filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting fsspec>=2023.5.0 (from huggingface-hub>=0.16.4->miditok)
  Downloading fsspec-2025.5.1-py3-none-any.whl.metadata (11 kB)
Collecting pyyaml>=5.1 (from huggingface-hub>=0.16.4



Defaulting to user installation because normal site-packages is not writeable
Collecting pretty_midi
  Using cached pretty_midi-0.2.10.tar.gz (5.6 MB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting mido>=1.1.16 (from pretty_midi)
  Using cached mido-1.3.3-py3-none-any.whl.metadata (6.4 kB)
Using cached mido-1.3.3-py3-none-any.whl (54 kB)
Building wheels for collected packages: pretty_midi
  Building wheel for pretty_midi (setup.py): started
  Building wheel for pretty_midi (setup.py): finished with status 'done'
  Created wheel for pretty_midi: filename=pretty_midi-0.2.10-py3-none-any.whl size=5592356 sha256=de2a1ab376c5fe2620ef97c9a478d3db915f807fe8549d27e84689db282dd68e
  Stored in directory: c:\users\jc\appdata\local\pip\cache\wheels\a4\f9\9e\08350c27e386558df0ae234e28a8facd145ba45506ddd1b989
Successfully built pretty_midi
Installing collected packages: mido, pretty_midi

   ---------------------------------------- 0/

  DEPRECATION: Building 'pretty_midi' using the legacy setup.py bdist_wheel mechanism, which will be removed in a future version. pip 25.3 will enforce this behaviour change. A possible replacement is to use the standardized build interface by setting the `--use-pep517` option, (possibly combined with `--no-build-isolation`), or adding a `pyproject.toml` file to the source tree of 'pretty_midi'. Discussion can be found at https://github.com/pypa/pip/issues/6334


In [None]:
import os
import miditok
import pretty_midi
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np

POP909_DATA_FILE_PATH = "POP909-Dataset-master/POP909"

In [16]:
def extract_notes_from_midi(file_path):
    midi = pretty_midi.PrettyMIDI(file_path)
    notes = []
    for instrument in midi.instruments:
        if not instrument.is_drum:
            for note in instrument.notes:
                pitch = note.pitch
                start = note.start
                duration = round(note.end - start, 3)
                notes.append((start, pitch, duration))
    notes.sort()           # sort by start-time
    return notes

def read_notes_from_pop909_dataset():
    all_notes = []
    for i in range(1, 910):
        mid_path = POP909_DATA_FILE_PATH + f"/{i:03}/{i:03}.mid"
        all_notes += extract_notes_from_midi(mid_path)

    return all_notes

def quantize(value, step=0.25):
    return round(value / step) * step

def notes_to_token_sequence(notes, time_step=0.25):
    tokens = []
    prev_start = 0.0

    for start, pitch, duration in notes:
        time_shift = quantize(start - prev_start, time_step)
        token = f"TS_{time_shift:.2f}_P_{pitch}_D_{duration:.2f}"
        tokens.append(token)
        prev_start = start

    return tokens

def token_sequence_to_notes(token_sequence):
    notes = []
    current_time = 0.0

    for token in token_sequence:
        try:
            parts = token.split('_')
            step = float(parts[1])
            pitch = int(parts[3])
            duration = float(parts[5])
        except:
            continue  # skip malformed tokens

        start_time = current_time + step
        end_time = start_time + duration
        notes.append((pitch, start_time, end_time))
        current_time = start_time  # update time based on step

    return notes

def notes_to_midi_file(notes, output_file='generated.mid'):
    midi = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(program=0)

    for pitch, start, end in notes:
        note = pretty_midi.Note(velocity=100, pitch=pitch, start=start, end=end)
        instrument.notes.append(note)

    midi.instruments.append(instrument)
    midi.write(output_file)

In [17]:
notes = read_notes_from_pop909_dataset()
tokens = notes_to_token_sequence(notes)

In [18]:
print(len(notes))

1533438


In [19]:
print(len(tokens))
print(len(set(tokens)))

1533438
59424


In [6]:
unique_tokens = sorted(set(tokens))
token_to_id = {tok: i for i, tok in enumerate(unique_tokens)}
id_to_token = {i: tok for tok, i in token_to_id.items()}

encoded_sequence = [token_to_id[tok] for tok in tokens]

seq_length = 20  # how many tokens in input sequence

inputs = []
targets = []

for i in range(len(encoded_sequence) - seq_length):
    inputs.append(encoded_sequence[i:i+seq_length])
    targets.append(encoded_sequence[i+1:i+seq_length+1])

inputs = np.array(inputs)
targets = np.array(targets)

vocab_size = len(unique_tokens)
embedding_dim = 64
rnn_units = 128

model = tf.keras.Sequential([
    layers.Embedding(vocab_size, embedding_dim, input_length=seq_length),
    layers.LSTM(rnn_units, return_sequences=True),
    layers.Dense(vocab_size, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

model.fit(inputs, targets, epochs=5, batch_size=64)

Epoch 1/5




[1m2564/2564[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m664s[0m 258ms/step - loss: 8.9053
Epoch 2/5
[1m2564/2564[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m619s[0m 242ms/step - loss: 6.2180
Epoch 3/5
[1m2564/2564[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m619s[0m 241ms/step - loss: 3.7503
Epoch 4/5
[1m2564/2564[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m628s[0m 245ms/step - loss: 2.1953
Epoch 5/5
[1m2564/2564[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m631s[0m 246ms/step - loss: 1.3797


<keras.src.callbacks.history.History at 0x255884fb110>

In [15]:
train_loss = model.evaluate(inputs, targets, verbose=0)
train_perplexity = np.exp(train_loss)

print(f"Training Loss: {train_loss:.4f}")
print(f"Training Perplexity: {train_perplexity:.4f}")

Training Loss: 0.9961
Training Perplexity: 2.7076


In [9]:
def generate_tokens(model, seed_sequence, gen_length=1000, temperature=1.0):
    generated = list(seed_sequence)
    for _ in range(gen_length):
        input_seq = np.array(generated[-seq_length:])[None, :]  # batch size 1
        preds = model.predict(input_seq)[0, -1]
        preds = np.log(preds) / temperature
        exp_preds = np.exp(preds)
        preds = exp_preds / np.sum(exp_preds)
        next_id = np.random.choice(len(preds), p=preds)
        generated.append(next_id)
    return generated

# Start generation with the first sequence as seed
seed_seq = encoded_sequence[:1]
generated_ids = generate_tokens(model, seed_seq)

generated_tokens = [id_to_token[i] for i in generated_ids]
print(generated_tokens)

def token_sequence_to_notes(token_sequence):
    notes = []
    current_time = 0.0

    for token in token_sequence:
        try:
            parts = token.split('_')
            step = float(parts[1])
            pitch = int(parts[3])
            duration = float(parts[5])
        except:
            continue  # skip malformed tokens

        start_time = current_time + step
        end_time = start_time + duration
        notes.append((pitch, start_time, end_time))
        current_time = start_time  # update time based on step

    return notes

def notes_to_midi_file(notes, output_file='generated.mid'):
    midi = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(program=0)

    for pitch, start, end in notes:
        note = pretty_midi.Note(velocity=100, pitch=pitch, start=start, end=end)
        instrument.notes.append(note)

    midi.instruments.append(instrument)
    midi.write(output_file)
    print(f"Generated notes saved as {output_file}")

notes = token_sequence_to_notes(generated_tokens)
notes_to_midi_file(notes, 'generated_music.mid')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3

In [13]:
# Baseline Stuff
import random
from glob import glob
from collections import defaultdict
from collections import Counter

import numpy as np
from numpy.random import choice

from symusic import Score
from miditok import REMI, TokenizerConfig
from midiutil import MIDIFile

random.seed(42)

duration2length = {
    '0.2.8': 2,  # sixteenth note, 0.25 beat in 4/4 time signature
    '0.4.8': 4,  # eighth note, 0.5 beat in 4/4 time signature
    '1.0.8': 8,  # quarter note, 1 beat in 4/4 time signature
    '2.0.8': 16, # half note, 2 beats in 4/4 time signature
    '4.0.4': 32, # whole note, 4 beats in 4/4 time signature
}

midi_files = []
for i in range(1, 910):
    midi_files.append(POP909_DATA_FILE_PATH + f"/{i:03}/{i:03}.mid")

config = TokenizerConfig(num_velocities=1, use_chords=False, use_programs=False)
tokenizer = REMI(config)
tokenizer.train(vocab_size=1000, files_paths=midi_files)

def note_extraction(midi_file):
    # Q1a: Your code goes here
    tokens = tokenizer(Score(midi_file))[0].tokens
    notes = []
    for str in tokens:
        if 'Pitch' in str:
            notes.append(int(str.split("_")[1]))
            
    return notes

def note_frequency(midi_files):
    # Q1b: Your code goes here
    count = defaultdict(int)
    for file in midi_files:
        notes = note_extraction(file)
        for note in notes:
            count[note] += 1
    
    return count

def note_unigram_probability(midi_files):
    note_counts = note_frequency(midi_files)
    unigramProbabilities = {}

    # Q2: Your code goes here
    # ...
    total = sum(note_counts.values())
    for key in note_counts:
        unigramProbabilities[key] = note_counts[key]/total

    return unigramProbabilities

def note_bigram_probability(midi_files):
    bigramTransitions = defaultdict(list)
    bigramTransitionProbabilities = defaultdict(list)

    # Q3a: Your code goes here
    # ...
    for file in midi_files:
        notes = note_extraction(file)
        for i in range(1,len(notes)):
            prev_note = notes[i-1]
            cur_note = notes[i]
            bigramTransitions[prev_note].append(cur_note)

    for prev_note in bigramTransitions:
        count = len(bigramTransitions[prev_note])
        all_notes = Counter(bigramTransitions[prev_note])
        bigramTransitions[prev_note] = list(set(bigramTransitions[prev_note]))
        for note in bigramTransitions[prev_note]:
            bigramTransitionProbabilities[prev_note].append(all_notes[note]/count)

    return bigramTransitions, bigramTransitionProbabilities

def note_trigram_probability(midi_files):
    trigramTransitions = defaultdict(list)
    trigramTransitionProbabilities = defaultdict(list)

    # Q5a: Your code goes here
    # ...
    for file in midi_files:
        notes = note_extraction(file)
        for i in range(2,len(notes)):
            next_prev_note = notes[i-2]
            prev_note = notes[i-1]
            cur_note = notes[i]
            trigramTransitions[(next_prev_note, prev_note)].append(cur_note)

    for prev_notes in trigramTransitions:
        count = len(trigramTransitions[prev_notes])
        all_notes = Counter(trigramTransitions[prev_notes])
        trigramTransitions[prev_notes] = list(set(trigramTransitions[prev_notes]))
        for notes in trigramTransitions[prev_notes]:
            trigramTransitionProbabilities[prev_notes].append(all_notes[notes]/count)

    return trigramTransitions, trigramTransitionProbabilities

def beat_extraction(midi_file):
    # Q6: Your code goes here
    tokens = tokenizer(Score(midi_file))[0].tokens
    beats = []
    for i in range(len(tokens)):
        if 'Position' in tokens[i]:
            position = int(tokens[i].split("_")[1])
            dur = tokens[i+3].split("_")[1]
            if dur not in duration2length:
                continue
            duration = duration2length[dur]
            beats.append((position,duration))

    return beats

def beat_bigram_probability(midi_files):
    bigramBeatTransitions = defaultdict(list)
    bigramBeatTransitionProbabilities = defaultdict(list)

    # Q7: Your code goes here
    for file in midi_files:
        beats = beat_extraction(file)
        for i in range(1, len(beats)):
            prev_beat = beats[i-1][1]
            cur_beat = beats[i][1]
            bigramBeatTransitions[prev_beat].append(cur_beat)
            
    for prev_beat in bigramBeatTransitions:
        count = len(bigramBeatTransitions[prev_beat])
        all_beats = Counter(bigramBeatTransitions[prev_beat])
        bigramBeatTransitions[prev_beat] = list(set(bigramBeatTransitions[prev_beat]))
        for beat in bigramBeatTransitions[prev_beat]:
            bigramBeatTransitionProbabilities[prev_beat].append(all_beats[beat]/count)
            
    return bigramBeatTransitions, bigramBeatTransitionProbabilities

def beat_pos_bigram_probability(midi_files):
    bigramBeatPosTransitions = defaultdict(list)
    bigramBeatPosTransitionProbabilities = defaultdict(list)

    # Q8a: Your code goes here
    for file in midi_files:
        beats = beat_extraction(file)
        for beat_pos, beat_len in beats:
            bigramBeatPosTransitions[beat_pos].append(beat_len)
            
    for beat_pos in bigramBeatPosTransitions:
        count = len(bigramBeatPosTransitions[beat_pos])
        all_beats = Counter(bigramBeatPosTransitions[beat_pos])
        bigramBeatPosTransitions[beat_pos] = list(set(bigramBeatPosTransitions[beat_pos]))
        for beat in bigramBeatPosTransitions[beat_pos]:
            bigramBeatPosTransitionProbabilities[beat_pos].append(all_beats[beat]/count)

    return bigramBeatPosTransitions, bigramBeatPosTransitionProbabilities

def beat_unigram_probability(midi_files):
    beatProbabilties = {}
    beatCounts = defaultdict(int)

    for file in midi_files:
        beats = beat_extraction(file)
        for _, beat_length in beats:
            beatCounts[beat_length] += 1
    
    total = sum(beatCounts.values())
    for beat_len in beatCounts.keys():
        beatProbabilties[beat_len] = beatCounts[beat_len]/total

    return beatProbabilties

def beat_bigram_perplexity(midi_file):
    unigramBeat = beat_unigram_probability(midi_files)
    bigramBeatTransitions, bigramBeatTransitionProbabilities = beat_bigram_probability(midi_files)
    bigramBeatPosTransitions, bigramBeatPosTransitionProbabilities = beat_pos_bigram_probability(midi_files)
    # Q8b: Your code goes here
    # Hint: one more probability function needs to be computed

    beats = beat_extraction(midi_file)
    # perplexity for Q7
    perplexity_Q7 = np.log(unigramBeat[beats[0][1]])
    for i in range(1, len(beats)):
        prev_beat_len = beats[i-1][1]
        cur_beat_len = beats[i][1]
        prob = 0
        if cur_beat_len in bigramBeatTransitions[prev_beat_len]:
            index = bigramBeatTransitions[prev_beat_len].index(cur_beat_len)
            prob = bigramBeatTransitionProbabilities[prev_beat_len][index]
        perplexity_Q7 += np.log(prob)

    perplexity_Q7 *= -1/len(beats)
    perplexity_Q7 = np.exp(perplexity_Q7)

    # perplexity for Q8
    perplexity_Q8 = 0
    for beat_pos, beat_len in beats:
        prob = 0
        if beat_len in bigramBeatPosTransitions[beat_pos]:
            index = bigramBeatPosTransitions[beat_pos].index(beat_len)
            prob = bigramBeatPosTransitionProbabilities[beat_pos][index]
        perplexity_Q8 += np.log(prob)

    perplexity_Q8 *= -1/len(beats)
    perplexity_Q8 = np.exp(perplexity_Q8)

    return perplexity_Q7, perplexity_Q8

def beat_trigram_probability(midi_files):
    trigramBeatTransitions = defaultdict(list)
    trigramBeatTransitionProbabilities = defaultdict(list)

    # Q9a: Your code goes here
    for file in midi_files:
        beats = beat_extraction(file)
        prev_beat_len = beats[0][1]
        for beat_pos, beat_len in beats[1:]:
            trigramBeatTransitions[(prev_beat_len, beat_pos)].append(beat_len)
            prev_beat_len = beat_len
    
    for prev_beat_len, beat_pos in trigramBeatTransitions.keys():
        count = len(trigramBeatTransitions[(prev_beat_len, beat_pos)])
        all_beats = Counter(trigramBeatTransitions[(prev_beat_len, beat_pos)])
        trigramBeatTransitions[(prev_beat_len, beat_pos)] = list(set(trigramBeatTransitions[(prev_beat_len, beat_pos)]))
        for beat in trigramBeatTransitions[(prev_beat_len, beat_pos)]:
            trigramBeatTransitionProbabilities[(prev_beat_len, beat_pos)].append(all_beats[beat]/count)

    return trigramBeatTransitions, trigramBeatTransitionProbabilities

def beat_trigram_perplexity(midi_file):
    bigramBeatPosTransitions, bigramBeatPosTransitionProbabilities = beat_pos_bigram_probability(midi_files)
    trigramBeatTransitions, trigramBeatTransitionProbabilities = beat_trigram_probability(midi_files)
    # Q9b: Your code goes here

    beats = beat_extraction(midi_file)
    index = bigramBeatPosTransitions[beats[0][0]].index(beats[0][1])
    perplexity = np.log(bigramBeatPosTransitionProbabilities[beats[0][0]][index])
    prev_beat_len = beats[0][1]
    for beat_pos, beat_len in beats[1:]:
        prob = 0
        if beat_len in trigramBeatTransitions[(prev_beat_len, beat_pos)]:
            index = trigramBeatTransitions[(prev_beat_len, beat_pos)].index(beat_len)
            prob = trigramBeatTransitionProbabilities[(prev_beat_len, beat_pos)][index]
        perplexity += np.log(prob)
        prev_beat_len = beat_len

    perplexity *= -1/len(beats)
    perplexity = np.exp(perplexity)

    return perplexity

def sample_note(unigramProbabilities):
    note_choices = list(unigramProbabilities.keys())
    notes_prob = [unigramProbabilities[note] for note in note_choices]
    rand_num = random.random()
    total_probability = 0
    for i in range(len(note_choices)):
        total_probability += notes_prob[i]
        if total_probability > rand_num:
            return note_choices[i]
    return note_choices[-1]

def sample_next_note1(note, transitions, probabilites):
    note_choices = transitions[note]
    note_probs = probabilites[note]
    rand_num = random.random()
    total_probability = 0
    for i in range(len(note_choices)):
        total_probability += note_probs[i]
        if total_probability > rand_num:
            return note_choices[i]
        
    return note_choices[-1]

def sample_next_note2(next_prev_note, prev_note, trigramTransitions, trigramTransitionProbabilities):
    note_choices = trigramTransitions[(next_prev_note, prev_note)]
    notes_prob = trigramTransitionProbabilities[(next_prev_note, prev_note)]
    rand_num = random.random()
    total_probability = 0
    for i in range(len(note_choices)):
        total_probability += notes_prob[i]
        if total_probability > rand_num:
            return note_choices[i]
    return note_choices[-1]

def sample_beat(beat_position, bigramBeatPosTransitions, bigramBeatPosTransitionProbabilities):
    beat_choices = bigramBeatPosTransitions[beat_position]
    beat_prob = bigramBeatPosTransitionProbabilities[beat_position]
    rand_num = random.random()
    total_probability = 0
    for i in range(len(beat_choices)):
        total_probability += beat_prob[i]
        if total_probability > rand_num:
            return beat_choices[i]
    return beat_choices[-1]

def music_generate(length):
    # sample notes
    unigramProbabilities = note_unigram_probability(midi_files)
    bigramTransitions, bigramTransitionProbabilities = note_bigram_probability(midi_files)
    trigramTransitions, trigramTransitionProbabilities = note_trigram_probability(midi_files)
    bigramBeatPosTransitions, bigramBeatPosTransitionProbabilities = beat_pos_bigram_probability(midi_files)


    # Q10: Your code goes here ...
    sampled_notes = []
    sampled_notes.append(sample_note(unigramProbabilities))
    sampled_notes.append(sample_next_note1(sampled_notes[-1], bigramTransitions, bigramTransitionProbabilities))
    for _ in range(length-2):
        sampled_notes.append(sample_next_note2(sampled_notes[-2], sampled_notes[-1], trigramTransitions, trigramTransitionProbabilities))

    # sample beats
    position = 0
    sampled_beats = []
    for _ in range(length):
        beat_length = sample_beat(position % 32, bigramBeatPosTransitions, bigramBeatPosTransitionProbabilities)
        position += beat_length
        sampled_beats.append(beat_length)

    # save the generated music as a midi file
    midi = MIDIFile(1)
    track = 0 # Set track number
    time = 0 # Where is the event placed (at the beginning)
    tempo = 120 # The tempo (beats per minute)
    midi.addTempo(track, time, tempo) # Add tempo information

    # print(sampled_beats)
    current_time = 0
    for pitch, duration in zip(sampled_notes, sampled_beats):
        midi.addNote(track, 0, pitch, current_time, duration/8, 100)
        current_time += duration/8

    with open("q10.mid", "wb") as f:
        midi.writeFile(f) # write MIDI file

In [14]:
music_generate(500)