In [1]:
!pip install pyfluidsynth # Install the Python bindings for FluidSynth
!apt-get install fluidsynth # Install the FluidSynth library on Linux
!apt-get install fluid-soundfont-gm # Install a General MIDI soundfont
!pip install pretty_midi # Install the pretty_midi library

import numpy as np
import tensorflow as tf
import pandas as pd
import collections
import fluidsynth # Import fluidsynth after installing the library
import glob
import pretty_midi
from IPython import display
from typing import Dict, List, Optional, Sequence, Tuple

Collecting pyfluidsynth
  Downloading pyfluidsynth-1.3.4-py3-none-any.whl.metadata (7.5 kB)
Downloading pyfluidsynth-1.3.4-py3-none-any.whl (22 kB)
Installing collected packages: pyfluidsynth
Successfully installed pyfluidsynth-1.3.4
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  fluid-soundfont-gm libevdev2 libfluidsynth3 libgudev-1.0-0 libinput-bin libinput10
  libinstpatch-1.0-2 libmd4c0 libmtdev1 libqt5core5a libqt5dbus5 libqt5gui5 libqt5network5
  libqt5svg5 libqt5widgets5 libwacom-bin libwacom-common libwacom9 libxcb-icccm4 libxcb-image0
  libxcb-keysyms1 libxcb-render-util0 libxcb-util1 libxcb-xinerama0 libxcb-xinput0 libxcb-xkb1
  libxkbcommon-x11-0 qsynth qt5-gtk-platformtheme qttranslations5-l10n timgm6mb-soundfont
Suggested packages:
  fluid-soundfont-gs qt5-image-formats-plugins qtwayland5 jackd
The following NEW packages will be installed:
  fluid-soundfont-gm fluidsynt

In [2]:
!mkdir music-midi-dataset    # create a directory to extract the dataset
!unzip archive(7).zip  # replace the zip file name as per your file name

/bin/bash: -c: line 1: syntax error near unexpected token `('
/bin/bash: -c: line 1: `unzip archive(7).zip  # replace the zip file name as per your file name'


In [3]:
sampling_rate = 44100

def display_audio(pm, seconds=30):
	waveform = pm.fluidsynth(fs=sampling_rate)
  # Take a sample of the generated waveform to mitigate kernel resets
	waveform_short = waveform[:seconds*sampling_rate]
	return display.Audio(waveform_short, rate=sampling_rate)

pm = pretty_midi.PrettyMIDI()
# Create an instrument instance and add it to the PrettyMIDI object
instrument = pretty_midi.Instrument(program=0, is_drum=False, name='acoustic grand piano')
pm.instruments.append(instrument)
print(pm.instruments)
instrument = pm.instruments[0]

# This code is modified by Susobhan Akhuli


[Instrument(program=0, is_drum=False, name="acoustic grand piano")]


In [6]:
def midi_to_notes(midi_file):
	pm = pretty_midi.PrettyMIDI(midi_file)
	instrument = pm.instruments[0]
	notes = collections.defaultdict(list)
	sorted_notes = sorted(instrument.notes , key=lambda note:note.start)
	prev_start = sorted_notes[0].start

	for note in sorted_notes:
		start = note.start
		end = note.end
		notes["pitch"].append(note.pitch)
		notes["start"].append(start)
		notes["end"].append(end)
		notes["step"].append(start - prev_start)
		notes["duration"].append(end - start)
		prev_start = start
	return pd.DataFrame({name:np.array(value) for name,value in notes.items()})

raw_notes = midi_to_notes('/content/x (5).mid')
note_names = np.vectorize(pretty_midi.note_number_to_name)
sample_note_names = note_names(raw_notes["pitch"])

# This code is modified by Susobhan Akhuli



In [7]:
def notes_to_midi(
  notes: pd.DataFrame,
  out_file: str,
  instrument_name: str,
  velocity: int = 100,  # note loudness
) -> pretty_midi.PrettyMIDI:

  pm = pretty_midi.PrettyMIDI()
  instrument = pretty_midi.Instrument(
      program=pretty_midi.instrument_name_to_program(
          instrument_name))

  prev_start = 0
  for i, note in notes.iterrows():
    start = float(prev_start + note['step'])
    end = float(start + note['duration'])
    note = pretty_midi.Note(
        velocity=velocity,
        pitch=int(note['pitch']),
        start=start,
        end=end,
    )
    instrument.notes.append(note)
    prev_start = start

  pm.instruments.append(instrument)
  pm.write(out_file)
  return pm


In [8]:
num_files = 5
all_notes = []
filenames = glob.glob('*.mid') # Get a list of all MIDI files in the current directory
for f in filenames[:num_files] :
	notes = midi_to_notes(f)
	all_notes.append(notes)
all_notes = pd.concat(all_notes)
print(all_notes)
key_order = ["pitch" , "step" , "duration"]
train_notes = np.stack([all_notes[key] for key in key_order] , axis = 1)
notes_ds=tf.data.Dataset.from_tensor_slices(train_notes)
notes_ds.element_spec

# This code is modified by Susobhan Akhuli


    pitch   start     end    step  duration
0      69   1.500   1.625   0.000     0.125
1      69   1.875   2.000   0.375     0.125
2      66   2.125   2.250   0.250     0.125
3      64   2.500   2.750   0.375     0.250
4      62   2.750   2.875   0.250     0.125
5      64   3.000   3.250   0.250     0.250
6      62   3.250   3.375   0.250     0.125
7      69   3.500   3.625   0.250     0.125
8      69   3.875   4.000   0.375     0.125
9      66   4.125   4.250   0.250     0.125
10     64   4.500   4.750   0.375     0.250
11     62   4.750   4.875   0.250     0.125
12     64   5.000   5.250   0.250     0.250
13     62   5.250   5.375   0.250     0.125
14     69   5.500   5.625   0.250     0.125
15     69   5.875   6.000   0.375     0.125
16     66   6.125   6.250   0.250     0.125
17     64   6.500   6.750   0.375     0.250
18     62   6.750   6.875   0.250     0.125
19     64   7.000   7.250   0.250     0.250
20     62   7.250   7.375   0.250     0.125
21     69   7.500   7.625   0.25

TensorSpec(shape=(3,), dtype=tf.float64, name=None)

In [9]:
seq_length = 20
vocab_size = 128
def create_sequences(dataset,seq_length,vocab_size=128):
    sequences = []
    targets = []
    num_seq = train_notes.shape[0] - seq_length
    for i in range(num_seq):
        sequence = train_notes[i:i+seq_length - 1,:] / [vocab_size, 1 ,1]
        target =  train_notes[i+seq_length] / vocab_size
        sequences.append(sequence)
        targets.append(target)
    sequences = np.array(sequences)

seq_length = 20
vocab_size = 128
def create_sequences(dataset,seq_length,vocab_size=128):
    sequences = []
    targets = []
    num_seq = train_notes.shape[0] - seq_length
    for i in range(num_seq):
        sequence = train_notes[i:i+seq_length - 1,:] / [vocab_size, 1 ,1]
        target =  train_notes[i+seq_length] / vocab_size
        sequences.append(sequence)
        targets.append(target)
    sequences = np.array(sequences)
    targets = np.array(targets)
    print(sequences.shape , targets.shape)
    dataset = tf.data.Dataset.from_tensor_slices((sequences,{"pitch":targets[:,0] , "step":targets[:,1] ,"duration" :targets[:,2]}))
    return dataset
seq_ds = create_sequences(notes_ds, 21, vocab_size)
batch_size =64
buffer_size = 5000
train_ds = seq_ds.shuffle(buffer_size).batch(batch_size)
train_ds.element_spec
batch_size =64
buffer_size = 5000
train_ds = seq_ds.shuffle(buffer_size).batch(batch_size)
train_ds.element_spec


(31, 20, 3) (31, 3)


(TensorSpec(shape=(None, 20, 3), dtype=tf.float64, name=None),
 {'pitch': TensorSpec(shape=(None,), dtype=tf.float64, name=None),
  'step': TensorSpec(shape=(None,), dtype=tf.float64, name=None),
  'duration': TensorSpec(shape=(None,), dtype=tf.float64, name=None)})

In [10]:
layer = tf.keras.layers
learning_rate = 0.005
input_data = tf.keras.Input(shape=(seq_length , 3))
layer = tf.keras.layers
learning_rate = 0.005
input_data = tf.keras.Input(shape=(seq_length , 3))
x= layer.LSTM(128)(input_data)
outputs = {
    "pitch":tf.keras.layers.Dense(64 , name = "pitch")(x),
    "step":tf.keras.layers.Dense(1 , name = "step")(x),
    "duration":tf.keras.layers.Dense(1 , name = "duration")(x),
}
model = tf.keras.Model(input_data , outputs)

loss  ={
    "pitch" : tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),
    "step": tf.keras.losses.MeanSquaredError(),
    "duration":tf.keras.losses.MeanSquaredError(),
}
optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate)
model.compile(loss=loss ,    loss_weights={
        'pitch': 0.05,
        'step': 1.0,
        'duration':1.0,
    }, optimizer = optimizer)

model.summary()

model = tf.keras.Model(input_data , outputs)
layer = tf.keras.layers
learning_rate = 0.005
input_data = tf.keras.Input(shape=(seq_length , 3))
layer = tf.keras.layers
learning_rate = 0.005
input_data = tf.keras.Input(shape=(seq_length , 3))
x= layer.LSTM(128)(input_data)
outputs = {
    "pitch":tf.keras.layers.Dense(64 , name = "pitch")(x),
    "step":tf.keras.layers.Dense(1 , name = "step")(x),
    "duration":tf.keras.layers.Dense(1 , name = "duration")(x),
}
model = tf.keras.Model(input_data , outputs)

loss  ={
    "pitch" : tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),
    "step": tf.keras.losses.MeanSquaredError(),
    "duration":tf.keras.losses.MeanSquaredError(),
}
optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate)
model.compile(loss=loss ,    loss_weights={
        'pitch': 0.05,
        'step': 1.0,
        'duration':1.0,
    }, optimizer = optimizer)

model.summary()

model = tf.keras.Model(input_data , outputs)

loss  ={
    "pitch" : tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),
    "step": tf.keras.losses.MeanSquaredError(),
    "duration":tf.keras.losses.MeanSquaredError(),
}
optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate)
model.compile(loss=loss ,    loss_weights={
        'pitch': 0.05,
        'step': 1.0,
        'duration':1.0,
    }, optimizer = optimizer)

model.summary()

optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate)
model.compile(loss=loss ,    loss_weights={
        'pitch': 0.05,
        'step': 1.0,
        'duration':1.0,
    }, optimizer = optimizer)

model.summary()


In [11]:
model.fit(train_ds , epochs = 10)
model.fit(train_ds , epochs = 10)

hist = model.predict(train_ds)
print(hist["duration"].shape)

Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - duration_loss: 0.0099 - loss: 0.2468 - pitch_loss: 4.2144 - step_loss: 0.0262
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - duration_loss: 0.1215 - loss: 0.5402 - pitch_loss: 4.0541 - step_loss: 0.2160
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - duration_loss: 0.0031 - loss: 0.2330 - pitch_loss: 3.9672 - step_loss: 0.0315
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 597ms/step - duration_loss: 0.0049 - loss: 0.2118 - pitch_loss: 3.8823 - step_loss: 0.0128
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - duration_loss: 0.0068 - loss: 0.2374 - pitch_loss: 3.8098 - step_loss: 0.0401
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - duration_loss: 0.0058 - loss: 0.2285 - pitch_loss: 3.7634 - step_loss: 0.0345
Epoch 7/10
[1m1/1[0m 

In [12]:
def predict_next_note(
notes , keras_model , temperature):

    assert temperature &gt; 0
    inputs = np.expand_dims(notes , 0)
def predict_next_note(
    notes , keras_model , temperature):

    assert temperature > 0
    inputs = np.expand_dims(notes , 0)
    predictions = model.predict(inputs)
    pitch_logits = predictions['pitch']
    step = predictions["step"]
    duration = predictions["duration"]
    pitch_logits /= temperature
    pitch = tf.random.categorical(pitch_logits , num_samples = 1)
    pitch = tf.squeeze(pitch , axis = -1)
    duration = tf.squeeze(duration , axis =-1)
    step = tf.squeeze(step,axis = -1)
    step = tf.maximum(0,step)
    duration = tf.maximum(0 , duration)
    return int(pitch) , float(step) , float(duration)

temperature = 2.0
num_predictions = 1200

sample_notes = np.stack([raw_notes[key] for key in key_order], axis=1)

# The initial sequence of notes and the pitch is normalized similar to training sequences
input_notes = (
    sample_notes[:seq_length] / np.array([vocab_size, 1, 1]))

generated_notes = []
prev_start = 0
for _ in range(num_predictions):
      pitch, step, duration = predict_next_note(input_notes, model, temperature)
      start = prev_start + step
      end = start + duration
      input_note = (pitch, step, duration)
      generated_notes.append((*input_note, start, end))
      input_notes = np.delete(input_notes, 0, axis=0)
      input_notes = np.append(input_notes, np.expand_dims(input_note, 0), axis=0)
      prev_start = start

temperature = 2.0
num_predictions = 1200

sample_notes = np.stack([raw_notes[key] for key in key_order], axis=1)

# The initial sequence of notes and the pitch is normalized similar to training sequences
input_notes = (
    sample_notes[:seq_length] / np.array([vocab_size, 1, 1]))

generated_notes = []
prev_start = 0
for _ in range(num_predictions):
    pitch, step, duration = predict_next_note(input_notes, model, temperature)
    start = prev_start + step
    end = start + duration
    input_note = (pitch, step, duration)
    generated_notes.append((*input_note, start, end))
    input_notes = np.delete(input_notes, 0, axis=0)
    input_notes = np.append(input_notes, np.expand_dims(input_note, 0), axis=0)
    prev_start = start

generated_notes = pd.DataFrame(
    generated_notes, columns=(*key_order, 'start', 'end'))

sample_notes = np.stack([raw_notes[key] for key in key_order], axis=1)

# The initial sequence of notes and the pitch is normalized similar to training sequences
input_notes = (
    sample_notes[:seq_length] / np.array([vocab_size, 1, 1]))

generated_notes = []
prev_start = 0
for _ in range(num_predictions):
      pitch, step, duration = predict_next_note(input_notes, model, temperature)
      start = prev_start + step
      end = start + duration
      input_note = (pitch, step, duration)
      generated_notes.append((*input_note, start, end))
      input_notes = np.delete(input_notes, 0, axis=0)
      input_notes = np.append(input_notes, np.expand_dims(input_note, 0), axis=0)
      prev_start = start

generated_notes = pd.DataFrame(
    generated_notes, columns=(*key_order, 'start', 'end'))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 128ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[

In [13]:
out_file = 'gfgmusicgnerate.mid'
instrument_name= pretty_midi.program_to_instrument_name(instrument.program)
out_pm = notes_to_midi(
    generated_notes, out_file=out_file, instrument_name=instrument_name)
display_audio(out_pm , 500)
