<a href="https://colab.research.google.com/github/nicovaras/deep-learning-projects/blob/main/Music_generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!sudo apt install -y fluidsynth
!pip install --upgrade pyfluidsynth
!pip install --upgrade pretty_midi

Reading package lists... Done
Building dependency tree       
Reading state information... Done
fluidsynth is already the newest version (2.1.1-2).
0 upgraded, 0 newly installed, 0 to remove and 23 not upgraded.
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import os
import numpy as np
import pretty_midi
import tensorflow as tf
from keras import backend as K
from sklearn.metrics import f1_score
from keras.models import Sequential
from keras.layers import LSTM, Dense, TimeDistributed, Dropout, GRU
from keras.callbacks import LambdaCallback
from keras.callbacks import EarlyStopping
from IPython import display
import tensorflow as tf
import keras

inputs_raw = []

# Path to a folder with midi tracks of an individual instrument
folder_path = "/content/drive/MyDrive/music_gen/midi_25"
files = os.listdir(folder_path)

## Helper methods

In [3]:
def piano_roll_to_midi(piano_roll, fs=48, program=0):
    notes, frames = piano_roll.shape
    pm = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(program=program, is_drum=False)

    # pad 1 column of zeros so we can acknowledge inital and ending events
    piano_roll = np.pad(piano_roll, [(0, 0), (1, 1)], 'constant')

    # use changes in velocities to find note on / note off events
    velocity_changes = np.nonzero(np.diff(piano_roll).T)

    # keep track on velocities and note on times
    prev_velocities = np.zeros(notes, dtype=int)
    note_on_time = np.zeros(notes)

    for time, note in zip(*velocity_changes):
        # use time + 1 because of padding above
        velocity = piano_roll[note, time + 1]
        time = time / fs
        if velocity > 0:
            if prev_velocities[note] == 0:
                note_on_time[note] = time
                prev_velocities[note] = velocity
        else:
            pm_note = pretty_midi.Note(
                velocity=prev_velocities[note],
                pitch=note,
                start=note_on_time[note],
                end=time)
            instrument.notes.append(pm_note)
            prev_velocities[note] = 0
    pm.instruments.append(instrument)
    return pm

def midi_to_piano_roll(midi_file, fs=16):
    midi_data = pretty_midi.PrettyMIDI(midi_file)
    # midi_data.instruments[0].program = 0
    # midi_data.instruments[0].is_drum = False
    piano_roll = midi_data.get_piano_roll(fs=fs)
    return piano_roll

def binarize_piano_roll(piano_roll, threshold=0):
    return (piano_roll > threshold).astype(np.float32)

def split_piano_roll(piano_roll, sequence_length):
    num_sequences = piano_roll.shape[1] // sequence_length
    return np.array_split(piano_roll[:, :num_sequences * sequence_length], num_sequences, axis=1)

_SAMPLING_RATE = 48000
def display_audio(pm: pretty_midi.PrettyMIDI, seconds=30):
  waveform = pm.fluidsynth(fs=_SAMPLING_RATE)
  waveform_short = waveform[:seconds*_SAMPLING_RATE]
  return display.Audio(waveform_short, rate=_SAMPLING_RATE)

def generate_sequence(model, seed_sequence, output_length):
    generated_sequence = seed_sequence
    
    for _ in range(output_length):
        input_sequence = np.array([generated_sequence[-seed_sequence.shape[0]:]])
        # input_sequence = tf.convert_to_tensor(input_sequence, dtype=tf.float32)
        next_step_probs = model.predict(input_sequence)[0]

        next_step = (next_step_probs > 0.5).astype(np.float32)
        generated_sequence = np.vstack([generated_sequence, next_step])

    return np.array(generated_sequence)  

def create_input_target_sequences(piano_roll_sequences):
    X = np.array(piano_roll_sequences)
    Y = np.roll(X, shift=-1, axis=2)
    return X, Y    

## Preprocessing

Midi preproecssing stage. Transforms midis into piano roll arrays.


In [4]:
sequences = []
sequence_length = 256
fs = 48

for i, file in enumerate(files):
  print(i,len(files), file)
  midi_file = os.path.join(folder_path, file)
  piano_roll = midi_to_piano_roll(midi_file, fs=fs)
  piano_roll = binarize_piano_roll(piano_roll)
  if piano_roll.size == 0:
    continue
  try:
    sequences += split_piano_roll(piano_roll, sequence_length)
  except:
    print('err')
    continue
input_data = np.array(sequences).transpose(0, 2, 1)
input_data.shape


0 260 3.mid
1 260 4.mid
2 260 1.mid
3 260 2.mid
4 260 5 (copy 1).mid
5 260 7.mid
6 260 9 (copy 1).mid
7 260 10.mid
8 260 5.mid
9 260 8.mid
10 260 6.mid
11 260 9.mid
12 260 0.mid
13 260 3 (copy 1).mid
14 260 21.mid
15 260 18.mid
16 260 16.mid
17 260 12.mid
18 260 11.mid
19 260 12 (copy 1).mid
20 260 11 (copy 2).mid
21 260 20.mid
22 260 13.mid
23 260 24 (copy 1).mid
24 260 14 (copy 1).mid
25 260 23.mid
26 260 14.mid
27 260 18 (copy 1).mid
28 260 17.mid
29 260 25.mid
30 260 24.mid
31 260 19.mid
32 260 11 (copy 1).mid
33 260 18 (copy 2).mid
34 260 21 (copy 1).mid
35 260 22.mid
36 260 35.mid
37 260 26.mid
38 260 33.mid
39 260 33 (copy 1).mid
40 260 27.mid
41 260 29 (copy 1).mid
42 260 28.mid
43 260 31.mid
44 260 29.mid
45 260 34.mid
46 260 30.mid
47 260 32.mid
48 260 43.mid
49 260 39.mid
50 260 38 (copy 2).mid
51 260 36.mid
52 260 37.mid
53 260 41.mid
54 260 42.mid
55 260 40.mid
56 260 38.mid
57 260 39 (copy 1).mid
58 260 42 (copy 1).mid
59 260 38 (copy 1).mid
60 260 46 (copy 2).mid
61 260 

(10648, 256, 128)

Listen to a track

In [5]:
idx=600

generated_sequence = input_data[idx]
generated_sequence_int = (generated_sequence * 127).astype(np.int32)
print("Unique velocities:", np.unique(generated_sequence_int))

midi_data = piano_roll_to_midi(generated_sequence_int.T, program=30, fs=fs)
midi_data.write('output_midi_file.mid')
print(input_data[idx].argmax(axis=1))
pm = pretty_midi.PrettyMIDI("output_midi_file.mid")  
display_audio(pm)

Unique velocities: [  0 127]
[55 55 55 55 55 55 55 55 53 53 53 53 53 53 53 53 53 53 50 50 50 50 50 50
 50 50 50 50 50 55 55 55 55 55 55 55 55 55 55 55 49 49 49 49 49 49 49 49
 49 49 49 50 50 50 50 50 50 50 50 50 50 55 55 55 55 55 55 55 55 55 55 55
 50 50 50 50 50 50 50 50 50 50 50 55 55 55 55 55 55 55 55 55 55 55 46 46
 46 46 46 46 46 46 46 46 55 55 55 55 55 55 55 55 55 55 55 55 55 55 55 55
 55 55 55 55 55 55 50 50 50 50 50 50 50 50 50 50 50 55 55 55 55 55 55 55
 55 55 55 52 52 52 52 52 52 52 52 52 52 52 50 50 50 50 50 50 50 50 50 50
 50 55 55 55 55 55 55 55 55 55 55 55 58 58 58 58 58 58 58 58 58 58 50 50
 50 50 50 50 50 50 50 50 50 55 55 55 55 55 55 55 55 55 55 55 53 53 53 53
 53 53 53 53 53 53 53 50 50 50 50 50 50 50 50 50 50 55 55 55 55 55 55 55
 55 55 55 55 55 55 55 55 55 55 55 55 55 55 55 55]


## Model

In [6]:
# Print TensorFlow device list
sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))
K.set_session(sess)

Device mapping:
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5



In [7]:
train_ratio = 0.8
num_train = int(train_ratio * input_data.shape[0])

X_train, X_val = input_data[:num_train], input_data[num_train:]

X_train, Y_train = create_input_target_sequences(X_train)
X_val, Y_val = create_input_target_sequences(X_val)
X_train.shape, X_val.shape

((8518, 256, 128), (2130, 256, 128))

In [8]:
# Build the model
input_shape = (sequence_length, 128)

model = Sequential()
model.add(GRU(units=1024, input_shape=input_shape, return_sequences=True))
model.add(Dropout(0.5))
model.add(TimeDistributed(Dense(1024, activation="sigmoid")))
model.add(Dropout(0.3))
model.add(TimeDistributed(Dense(128, activation="sigmoid")))
model.compile(loss="binary_crossentropy", optimizer="adam")

# Custom callback to compute F1-score on validation set
def on_epoch_end(epoch, logs):
    y_pred = np.argmax(model.predict(X_val) > 0.5, axis=2)
    y_true = np.argmax(Y_val > 0.5, axis=2)
    f1 = f1_score(y_true.ravel(), y_pred.ravel(), average='macro')
    print(f"\nEpoch {epoch + 1} - F1-score on validation set: {f1:.4f}")

f1_callback = LambdaCallback(on_epoch_end=on_epoch_end)
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)



In [9]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru (GRU)                   (None, 256, 1024)         3545088   
                                                                 
 dropout (Dropout)           (None, 256, 1024)         0         
                                                                 
 time_distributed (TimeDistr  (None, 256, 1024)        1049600   
 ibuted)                                                         
                                                                 
 dropout_1 (Dropout)         (None, 256, 1024)         0         
                                                                 
 time_distributed_1 (TimeDis  (None, 256, 128)         131200    
 tributed)                                                       
                                                                 
Total params: 4,725,888
Trainable params: 4,725,888
Non-

In [None]:
model.fit(X_train, Y_train, validation_split=0.2, epochs=1000, batch_size=32, callbacks=[f1_callback, early_stopping], verbose=1)

Epoch 1/1000

Epoch 1 - F1-score on validation set: 0.0107
Epoch 2/1000

Epoch 2 - F1-score on validation set: 0.0108
Epoch 3/1000

Epoch 3 - F1-score on validation set: 0.2140
Epoch 4/1000

Epoch 4 - F1-score on validation set: 0.5609
Epoch 5/1000

Epoch 5 - F1-score on validation set: 0.7278
Epoch 6/1000


In [None]:
model.save('/content/drive/MyDrive/music_gen/model_0.h5')

In [None]:
model = keras.models.load_model('/content/drive/MyDrive/music_gen/model_0.h5')

## Generation

In [None]:
# Choose a random seed sequence from the input data
seed_idx = np.random.randint(len(input_data))
seed_sequence = input_data[seed_idx]

# Generate a new sequence of desired length
output_length = 8
generated_sequence = generate_sequence(model, seed_sequence, output_length)

In [None]:
generated_sequence.argmax(axis=1)

In [None]:
# generated_sequence = input_data[0]
generated_sequence_int = (generated_sequence * 127).astype(np.int32)
print("Unique velocities:", np.unique(generated_sequence_int))

midi_data = piano_roll_to_midi(generated_sequence_int.T, program=9, fs=48)
midi_data.write('output_midi_file.mid')

In [None]:
pm = pretty_midi.PrettyMIDI("output_midi_file.mid")  
display_audio(pm)