Imports

In [None]:
%pip install pretty_midi
%pip install --upgrade pyfluidsynth





[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import numpy as np
import tensorflow as tf
import pandas as pd
import collections
import fluidsynth
import glob
import pretty_midi
from IPython import display
from typing import Dict, List, Optional, Sequence, Tuple


Work with midi

In [None]:
sample_rate = 44100

def play_audio(midi_object, duration=30):
    audio_waveform = midi_object.fluidsynth(fs=sample_rate)
    trimmed_waveform = audio_waveform[:duration * sample_rate]
    return display.Audio(trimmed_waveform, rate=sample_rate)

midi_object = pretty_midi.PrettyMIDI()

piano = pretty_midi.Instrument(program=0, is_drum=False, name="Acoustic Grand Piano")
midi_object.instruments.append(piano)

In [None]:
def extract_midi_notes(midi_path):
    midi_data = pretty_midi.PrettyMIDI(midi_path)

    if not midi_data.instruments:
        print(f"Warning: No instruments found in {midi_path}!")
        return pd.DataFrame()  
    main_instrument = midi_data.instruments[0]  
    if not main_instrument.notes:
        print(f"Warning: No notes found in {midi_path}!")
        return pd.DataFrame()

    note_info = collections.defaultdict(list)
    ordered_notes = sorted(main_instrument.notes, key=lambda n: n.start)
    previous_start = ordered_notes[0].start if ordered_notes else 0

    for n in ordered_notes:
        start_time = n.start
        end_time = n.end
        note_info["pitch"].append(n.pitch)
        note_info["start"].append(start_time)
        note_info["end"].append(end_time)
        note_info["step"].append(start_time - previous_start)
        note_info["duration"].append(end_time - start_time)
        previous_start = start_time

    return pd.DataFrame({key: np.array(value) for key, value in note_info.items()})

Load data

In [6]:
filenames = glob.glob('music-midi-dataset/midi_dataset/midi_dataset/*.mid') 

In [None]:
num_files = 50 
all_notes = []

for f in filenames[:num_files]:
    print(f"Processing: {f}")
    try:
        notes = extract_midi_notes(f)
        if notes.empty:
            print(f"Warning: {f} produced an empty DataFrame!")
        else:
            all_notes.append(notes)
    except Exception as e:
        print(f"Error processing {f}: {e}")

Processing: music-midi-dataset/midi_dataset/midi_dataset\x (1).mid
Processing: music-midi-dataset/midi_dataset/midi_dataset\x (10).mid
Processing: music-midi-dataset/midi_dataset/midi_dataset\x (11).mid
Processing: music-midi-dataset/midi_dataset/midi_dataset\x (12).mid
Processing: music-midi-dataset/midi_dataset/midi_dataset\x (13).mid
Processing: music-midi-dataset/midi_dataset/midi_dataset\x (14).mid




Processing: music-midi-dataset/midi_dataset/midi_dataset\x (15).mid
Processing: music-midi-dataset/midi_dataset/midi_dataset\x (16).mid
Processing: music-midi-dataset/midi_dataset/midi_dataset\x (17).mid
Processing: music-midi-dataset/midi_dataset/midi_dataset\x (18).mid
Processing: music-midi-dataset/midi_dataset/midi_dataset\x (19).mid
Processing: music-midi-dataset/midi_dataset/midi_dataset\x (2).mid
Processing: music-midi-dataset/midi_dataset/midi_dataset\x (20).mid
Processing: music-midi-dataset/midi_dataset/midi_dataset\x (21).mid
Processing: music-midi-dataset/midi_dataset/midi_dataset\x (22).mid
Processing: music-midi-dataset/midi_dataset/midi_dataset\x (23).mid
Processing: music-midi-dataset/midi_dataset/midi_dataset\x (24).mid
Processing: music-midi-dataset/midi_dataset/midi_dataset\x (25).mid
Processing: music-midi-dataset/midi_dataset/midi_dataset\x (26).mid
Processing: music-midi-dataset/midi_dataset/midi_dataset\x (27).mid
Processing: music-midi-dataset/midi_dataset/midi_

In [None]:

if all_notes:
    all_notes = pd.concat(all_notes, ignore_index=True)
    print("Final Processed Notes DataFrame:")
    print(all_notes.head()) 
else:
    print("No valid MIDI files processed!")

if not all_notes.empty:
    note_names = np.vectorize(pretty_midi.note_number_to_name)
    sample_note_names = note_names(all_notes["pitch"])
    print("Sample Note Names:", sample_note_names[:10])


Final Processed Notes DataFrame:
   pitch  start  end  step  duration
0     67    0.0  2.0   0.0       2.0
1     64    0.0  2.0   0.0       2.0
2     59    0.0  2.0   0.0       2.0
3     71    2.0  4.0   2.0       2.0
4     55    2.0  4.0   0.0       2.0
Sample Note Names: ['G4' 'E4' 'B3' 'B4' 'G3' 'G4' 'G4' 'E4' 'C4' 'G3']


Generate music

In [19]:
def notes_to_midi(
    notes: pd.DataFrame,
    out_file: str,
    instrument_name: str,
    velocity: int = 100, 
) -> pretty_midi.PrettyMIDI:
    pm = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(
        program=pretty_midi.instrument_name_to_program(instrument_name))

    prev_start = 0
    for _, note in notes.iterrows():
        start = float(prev_start + note['step'])
        end = float(start + note['duration'])
        midi_note = pretty_midi.Note(
            velocity=velocity,
            pitch=int(note['pitch']),
            start=start,
            end=end,
        )
        instrument.notes.append(midi_note)
        prev_start = start

    pm.instruments.append(instrument)
    pm.write(out_file)
    return pm

if not all_notes.empty:
    output_midi = "generated_music.mid"
    notes_to_midi(all_notes, output_midi, "Acoustic Grand Piano")

if not all_notes.empty:
    key_order = ["pitch", "step", "duration"]
    train_notes = np.stack([all_notes[key] for key in key_order], axis=1)

    notes_ds = tf.data.Dataset.from_tensor_slices(train_notes)


Sequences

In [20]:
seq_length = 20
vocab_size = 128

def create_sequences(dataset, seq_length, vocab_size=128):
    sequences = []
    targets = []
    num_seq = dataset.shape[0] - seq_length 

    for i in range(num_seq):
        sequence = dataset[i:i + seq_length, :] / [vocab_size, 1, 1]  
        target = dataset[i + seq_length, :] / [vocab_size, 1, 1]  
        sequences.append(sequence)
        targets.append(target)

    sequences = np.array(sequences)
    targets = np.array(targets)
    
    print(sequences.shape, targets.shape) 

    tf_dataset = tf.data.Dataset.from_tensor_slices(
        (sequences, {"pitch": targets[:, 0], "step": targets[:, 1], "duration": targets[:, 2]})
    )
    
    return tf_dataset

seq_ds = create_sequences(train_notes, seq_length, vocab_size)

batch_size = 64
buffer_size = 5000

train_ds = seq_ds.shuffle(buffer_size).batch(batch_size)

print(train_ds.element_spec)  


(6938, 20, 3) (6938, 3)
(TensorSpec(shape=(None, 20, 3), dtype=tf.float64, name=None), {'pitch': TensorSpec(shape=(None,), dtype=tf.float64, name=None), 'step': TensorSpec(shape=(None,), dtype=tf.float64, name=None), 'duration': TensorSpec(shape=(None,), dtype=tf.float64, name=None)})


LSTM model

In [None]:
layer = tf.keras.layers
learning_rate = 0.005
input_data = tf.keras.Input(shape=(seq_length , 3))
x= layer.LSTM(128)(input_data)
outputs = {
    "pitch":tf.keras.layers.Dense(64 , name = "pitch")(x),
    "step":tf.keras.layers.Dense(1 , name = "step")(x),
    "duration":tf.keras.layers.Dense(1 , name = "duration")(x),
}
model = tf.keras.Model(input_data , outputs)

loss  ={
    "pitch" : tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),
    "step": tf.keras.losses.MeanSquaredError(),
    "duration":tf.keras.losses.MeanSquaredError(),
}
optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate)
model.compile(loss=loss ,    loss_weights={
        'pitch': 0.05,
        'step': 1.0,
        'duration':1.0,
    }, optimizer = optimizer)

model.summary()


In [13]:
model.fit(train_ds , epochs = 50)

hist = model.predict(train_ds)
print(hist["duration"].shape)


Epoch 1/50
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - duration_loss: 0.0273 - loss: 1.5467 - pitch_loss: 0.0270 - step_loss: 1.5177
Epoch 2/50
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - duration_loss: 0.0247 - loss: 3.3053 - pitch_loss: 0.0290 - step_loss: 3.2788
Epoch 3/50
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - duration_loss: 0.0261 - loss: 1.5524 - pitch_loss: 0.0112 - step_loss: 1.5254
Epoch 4/50
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - duration_loss: 0.0243 - loss: 2.0444 - pitch_loss: 0.0125 - step_loss: 2.0191
Epoch 5/50
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - duration_loss: 0.0286 - loss: 2.0386 - pitch_loss: 0.0040 - step_loss: 2.0094
Epoch 6/50
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - duration_loss: 0.0264 - loss: 8.5531 - pitch_loss: 0.0019 - step_loss: 8.5263
Epoc

Generate notes

In [None]:
def predict_next_note(notes, keras_model, temperature):
    assert temperature > 0, "Temperature must be greater than zero."
    
    inputs = np.expand_dims(notes, 0)
    predictions = keras_model.predict(inputs)

    pitch_logits = predictions["pitch"] 
    step = predictions["step"] 
    duration = predictions["duration"]  

    pitch_logits /= temperature
    pitch = tf.random.categorical(pitch_logits, num_samples=1)
    pitch = tf.squeeze(pitch, axis=-1)
    duration = tf.squeeze(duration, axis=-1)
    step = tf.squeeze(step, axis=-1)

    step = tf.maximum(0, step)
    duration = tf.maximum(0, duration)

    return int(pitch), float(step), float(duration)

temperature = 2.0
num_predictions = int(10 / 0.25)

sample_notes = np.stack([all_notes[key] for key in key_order], axis=1)
input_notes = sample_notes[:seq_length] / np.array([vocab_size, 1, 1])

generated_notes = []
prev_start = 0

for _ in range(num_predictions):
    pitch, step, duration = predict_next_note(input_notes, model, temperature)
    
    start = prev_start + step
    end = start + duration
    input_note = (pitch, step, duration)
    
    generated_notes.append((*input_note, start, end))
    
    input_notes = np.delete(input_notes, 0, axis=0)
    input_notes = np.append(input_notes, np.expand_dims(input_note, 0), axis=0)
    
    prev_start = start

generated_notes = pd.DataFrame(
    generated_notes, columns=(*key_order, "start", "end")) 


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2

See music

In [None]:
out_file = 'generated.mid'  

instrument_name = "Acoustic Grand Piano"  

out_pm = notes_to_midi(
    generated_notes, out_file=out_file, instrument_name=instrument_name)

print(f"Generated MIDI file saved as: {out_file}")

play_audio(out_pm, duration=10)  


Generated MIDI file saved as: generated.mid
