# Getting the dataset

In [None]:
#@title Get the full version of the Lakh MIDI Dataset v0.1
!wget http://hog.ee.columbia.edu/craffel/lmd/lmd_full.tar.gz
!tar xvf lmd_full.tar.gz
!rm lmd_full.tar.gz

dataset_path = "/content/lmd_full"

In [1]:
#@title Get a smaller version of the Lakh MIDI Dataset v0.1
!wget http://hog.ee.columbia.edu/craffel/lmd/clean_midi.tar.gz
!tar xvf clean_midi.tar.gz
!rm clean_midi.tar.gz

dataset_path = "/content/clean_midi"

[1;30;43mOutput streaming troncato alle ultime 5000 righe.[0m
clean_midi/Camaleonti/
clean_midi/Camaleonti/Eternita.mid
clean_midi/Camaleonti/Applausi.1.mid
clean_midi/Camaleonti/L'ora dell'amore.1.mid
clean_midi/Camaleonti/L'ora dell'amore.2.mid
clean_midi/Camaleonti/L'ora dell'amore.3.mid
clean_midi/Camaleonti/Mamma mia.2.mid
clean_midi/Camaleonti/Viso d'angelo.mid
clean_midi/Camaleonti/Io per lei.mid
clean_midi/Camaleonti/Mamma mia.mid
clean_midi/Camaleonti/Perche ti amo.1.mid
clean_midi/Camaleonti/Perche ti amo.mid
clean_midi/Camaleonti/L'ora dell'amore.mid
clean_midi/Camaleonti/Viso d'angelo.1.mid
clean_midi/Camaleonti/Applausi.mid
clean_midi/Camaleonti/Mamma mia.1.mid
clean_midi/Camaleonti/Cuore di vetro.mid
clean_midi/Mike + The Mechanics/
clean_midi/Mike + The Mechanics/Silent Running.mid
clean_midi/Mike + The Mechanics/Another Cup of Coffee.2.mid
clean_midi/Mike + The Mechanics/Over My Schoulder.mid
clean_midi/Mike + The Mechanics/All I Need Is a Miracle.mid
clean_midi/Mike 

In [None]:
#@title Get the NESMDB dataset
!gdown 1gIli7G1wu0QWDLzRc-CPWB8C4Hu0XVn3
!unzip nesmdb_midi.zip
!rm nesmdb_midi.zip

# Tokenization

## Libraries to manage MIDI files and their tokenization

In [2]:
!pip install miditok
!pip install pretty_midi

Collecting miditok
  Downloading miditok-3.0.4-py3-none-any.whl.metadata (10 kB)
Collecting symusic>=0.5.0 (from miditok)
  Downloading symusic-0.5.4-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (10 kB)
Collecting pySmartDL (from symusic>=0.5.0->miditok)
  Downloading pySmartDL-1.3.4-py3-none-any.whl.metadata (2.8 kB)
Downloading miditok-3.0.4-py3-none-any.whl (157 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m157.2/157.2 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading symusic-0.5.4-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (2.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m43.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pySmartDL-1.3.4-py3-none-any.whl (20 kB)
Installing collected packages: pySmartDL, symusic, miditok
Successfully installed miditok-3.0.4 pySmartDL-1.3.4 symusic-0.5.4
Collecting pretty_midi
  Downloading pretty_midi-0.2.10.tar.gz (5.6 MB)
[2K     

# Utility functions

In [5]:
def normalize_to_range(arr, range_min=0, range_max=1):
    # Get the minimum and maximum of the array
    arr_min = np.min(arr)
    arr_max = np.max(arr)

    # Normalize to [0, 1]
    normalized_arr = (arr - arr_min) / (arr_max - arr_min)

    # Scale to [range_min, range_max] -> [-1, 1]
    scaled_arr = normalized_arr * (range_max - range_min) + range_min

    return scaled_arr, arr_min, arr_max

def de_normalize(arr, original_min, original_max, range_min=0, range_max=1):
    # Scale to [0, 1]
    scaled_arr = (arr - range_min) / (range_max - range_min)

    # Normalize to [original_min, original_max]
    de_normalized_arr = scaled_arr * (original_max - original_min) + original_min

    return de_normalized_arr

# Data preparation (using miditok)

In [25]:
from random import shuffle
from miditok.pytorch_data import DatasetMIDI, DataCollator
from miditok.utils import split_files_for_training
from miditok import REMI
from pathlib import Path

# Paths to the files of the dataset
midi_paths = list(Path("clean_midi").resolve().glob("**/*.mid"))[:100]

# Tokenizer
tokenizer = REMI()
tokenizer.train(vocab_size=30000, files_paths=midi_paths)
tokenizer.save("tokenizer.json")

# Split MIDI paths in train/valid/test sets
total_num_files = len(midi_paths)
num_files_valid = round(total_num_files * 0.15)
num_files_test = round(total_num_files * 0.15)
shuffle(midi_paths)
midi_paths_valid = midi_paths[:num_files_valid]
midi_paths_test = midi_paths[num_files_valid:num_files_valid + num_files_test]
midi_paths_train = midi_paths[num_files_valid + num_files_test:]

# Chunk MIDIs and perform data augmentation on each subset independently
for files_paths, subset_name in (
    (midi_paths_train, "train"), (midi_paths_valid, "valid"), (midi_paths_test, "test")
):

    # Split the MIDIs into chunks of sizes approximately about 1024 tokens
    subset_chunks_dir = Path(f"Lakh_{subset_name}")
    split_files_for_training(
        files_paths=files_paths,
        tokenizer=tokenizer,
        save_dir=subset_chunks_dir,
        max_seq_len=1024,
        num_overlap_bars=2,
    )

    # Perform data augmentation
    #augment_dataset(
    #    subset_chunks_dir,
    #    pitch_offsets=[-12, 12],
    #    velocity_offsets=[-4, 4],
    #    duration_offsets=[-0.5, 0.5],
    #)

# Create Dataset and Collator for training
midi_paths_train = list(Path("Lakh_train").glob("**/*.mid"))
midi_paths_valid = list(Path("Lakh_valid").glob("**/*.mid"))
midi_paths_test = list(Path("Lakh_test").glob("**/*.mid"))
kwargs_dataset = {"max_seq_len": 1024, "tokenizer": tokenizer, "bos_token_id": tokenizer["BOS_None"], "eos_token_id": tokenizer["EOS_None"]}
dataset_train = DatasetMIDI(midi_paths_train, **kwargs_dataset)
dataset_valid = DatasetMIDI(midi_paths_valid, **kwargs_dataset)
dataset_test = DatasetMIDI(midi_paths_test, **kwargs_dataset)

Splitting music files (Lakh_train): 100%|██████████| 70/70 [00:01<00:00, 45.74it/s]
Splitting music files (Lakh_valid): 100%|██████████| 15/15 [00:00<00:00, 53.26it/s]
Splitting music files (Lakh_test): 100%|██████████| 15/15 [00:00<00:00, 48.80it/s]


# Data cleaning

## Show some information of the Dataset

In [34]:
print(tokenizer)
print(dataset_train[0])
tokens = tokenizer(Path("Lakh_train", "Asia", "Don't Cry_t0_0.mid"))
print(type(tokens))
tokenizer(tokens).dump_midi("test.mid")

30000 tokens with ('I', 'T') io format, trained with BPE
{'input_ids': tensor([16108, 22464, 12832,  1042, 12935,  5751,  4944, 12991, 22464, 23676,
        18382, 24354,  1961,   234,  9968, 26774,   198,  4827,  7559,   321,
        22752, 14435,  3624,  1042,  1346, 18824,  2272,  3773, 22463,  8254,
          763, 23676,  3624,   194,   231,  9968,  7340,   198,  4052,   200,
         7340,  4819, 18923, 12618,   206,  5353,   208, 12618,  1902,   332,
         5353,  9628,   332,  1242,  1792, 23760, 13057, 29933, 13057, 29933,
        13057, 13057, 23443,   321,   216,  1242, 18382,  1961,  1242, 18820,
         3601,  1407,  1656, 22752, 22464, 23760,  3044,   224,   332])}
<class 'list'>


## Padding and cleaning of DatasetMIDI

In [90]:
import numpy as np
import torch
import tensorflow as tf

def torch_tensor_to_padded_numpy(tensor: torch.Tensor,
                                 padded_max_length: int) -> np.ndarray:
  array = tensor.numpy()
  if array[0] != tokenizer["BOS_None"]:
    array = np.insert(array, 0, tokenizer["BOS_None"])
  if array[-1] != tokenizer["EOS_None"]:
    array = np.append(array, tokenizer["EOS_None"])

  array = np.pad(array, (0, padded_max_length - len(array)), 'constant')
  return array


count = 0
train_x = []
val_x = []
test_x = []

max_len_train = max(len(arr["input_ids"]) for arr in dataset_train)
max_len_val = max(len(arr["input_ids"]) for arr in dataset_valid)
max_len_test = max(len(arr["input_ids"]) for arr in dataset_test)
max_len = max(max_len_train, max_len_val, max_len_test)
print("Max length of sequence in train_x is: " + str(max_len_train))
print("Max length of sequence in val_x is: " + str(max_len_val))
print("Max length of sequence in test_x is: " + str(max_len_test))

print("Using max_length: " + str(max_len))

for (result, input) in \
[(train_x, dataset_train), (val_x, dataset_valid), (test_x, dataset_test)]:
  for i in input:
    ids = i['input_ids']
    array = torch_tensor_to_padded_numpy(ids, max_len)
    result.append(array)

train_x = np.array(train_x)
print("Shape of train_x is " + str(train_x.shape))
val_x = np.array(val_x)
print("Shape of val_x is " + str(val_x.shape))
test_x = np.array(test_x)
print("Shape of test_x is " + str(test_x.shape))

Max length of sequence in train_x is: 406
Max length of sequence in val_x is: 865
Max length of sequence in test_x is: 673
Shape of train_x is (5246, 406)
Shape of val_x is (2977, 865)
Shape of test_x is (4189, 673)


## Saving the generated train, valid and test arrays (if necessary)

In [91]:
np.savetxt('train_x.txt', train_x, fmt='%d')
np.savetxt('val_x.txt', val_x, fmt='%d')
np.savetxt('test_x.txt', test_x, fmt='%d')

## Loading previous train, valid and test arrays (if necessary)

In [92]:
train_x = np.loadtxt('train_x.txt', dtype=int)
val_x = np.loadtxt('val_x.txt', dtype=int)
test_x = np.loadtxt('test_x.txt', dtype=int)

## Normalization

In [None]:
assert not np.any(np.isnan(train_x))
assert not np.any(np.isnan(val_x))
assert not np.any(np.isnan(test_x))

normalized_train_x, original_min_train, original_max_train = normalize_to_range(train_x, 0, 1)
assert (np.max(normalized_train_x)) == 1
assert (np.min(normalized_train_x)) == 0
normalized_val_x, original_min_val, original_max_val = normalize_to_range(val_x, 0, 1)
assert (np.max(normalized_val_x)) == 1
assert (np.min(normalized_val_x)) == 0
normalized_test_x, original_min_test, original_max_test = normalize_to_range(test_x, 0, 1)
assert (np.max(normalized_test_x)) == 1
assert (np.min(normalized_test_x)) == 0


## Preparing labels

In [None]:
normalized_train_y = np.roll(normalized_train_x, shift=-1, axis=1)
normalized_val_y = np.roll(normalized_val_x, shift=-1, axis=1)
normalized_test_y = np.roll(normalized_test_x, shift=-1, axis=1)

# Model creation

## Install `keras_nlp`

In [None]:
!pip install keras_nlp

Collecting keras_nlp
  Downloading keras_nlp-0.17.0-py3-none-any.whl.metadata (1.2 kB)
Collecting keras-hub==0.17.0 (from keras_nlp)
  Downloading keras_hub-0.17.0-py3-none-any.whl.metadata (7.4 kB)
Collecting tensorflow-text (from keras-hub==0.17.0->keras_nlp)
  Downloading tensorflow_text-2.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.8 kB)
Downloading keras_nlp-0.17.0-py3-none-any.whl (2.0 kB)
Downloading keras_hub-0.17.0-py3-none-any.whl (644 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m644.1/644.1 kB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tensorflow_text-2.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.2/5.2 MB[0m [31m81.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tensorflow-text, keras-hub, keras_nlp
Successfully installed keras-hub-0.17.0 keras_nlp-0.17.0 tensorflow-text-2.17.0


## Creating a transformer

In [None]:
def create_transformer(vocab_size, seq_len, embedding_dim, num_heads, dff, num_layers):
  # Input
    inputs = tf.keras.Input(shape=(seq_len,))

    # Embedding
    embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)(inputs)

    # Encoder
    encoder = nlp_layers.TransformerEncoder(num_heads=num_heads, intermediate_dim=dff)(embedding)

    # Decoder
    decoder = nlp_layers.TransformerDecoder(num_heads=num_heads, intermediate_dim=dff)(embedding, encoder)

    # Output
    outputs = tf.keras.layers.Dense(vocab_size, activation='softmax')(decoder)

    # Crea il modello
    model = tf.keras.Model(inputs=inputs, outputs=outputs)

    return model, encoder, decoder

In [None]:
import os
import pretty_midi

import keras_nlp.layers as nlp_layers


def generate_midi(autoencoder, original_min, original_max, num_files=1):
    for _ in range(num_files):
        # Genera un vettore latente casuale
        random_latent_vector = np.random.normal(size=(1, 12800))  # Dimensione latente

        generated_sequence = autoencoder.predict(random_latent_vector)
        print(generated_sequence)
        generated_sequence = de_normalize(generated_sequence, original_min, original_max)
        # generated_sequence = np.argmax(generated_sequence, axis=-1)
        generated_sequence = np.squeeze(generated_sequence)
        generated_sequence = generated_sequence.astype(int)
        print(generated_sequence)
        print(generated_sequence.shape)

        generated_midi = pretty_midi.PrettyMIDI()
        instrument = pretty_midi.Instrument(program=0)

        for time_step in range(generated_sequence.shape[0]):
            pitch = generated_sequence[time_step]
            if pitch > 0:  # Evita il token 0, se usato per il padding
                note = pretty_midi.Note(velocity=100, pitch=pitch, start=time_step * 0.1, end=(time_step + 1) * 0.1)
                instrument.notes.append(note)

        generated_midi.instruments.append(instrument)
        generated_midi.write(f'generated_midi_{_}.mid')

# Old model

In [None]:
# Passo 0: Imposta il tuo flusso completo
midi_directory = dataset_path
midi_files = []

for root, _, files in os.walk(midi_directory):
    for file in files:
        if file.endswith('.mid'):
            midi_files.append(os.path.join(root, file))

# tokenized_data = load_and_tokenize_midi(midi_files[:50])
tokenizer = tokenizer_remi(dataset_path)
max_sequence_length = max(len(seq) for seq in tokenized_data)  # Calcola la lunghezza massima
padded_data = pad_sequences(tokenized_data, maxlen=128)

input_data = np.array(padded_data)

import numpy as np

assert not np.any(np.isnan(input_data))
normalized_x, original_min, original_max = normalize_to_range(input_data, 0, 1)
assert (np.max(normalized_x)) == 1
assert (np.min(normalized_x)) == 0
print(normalized_x)


vocab_size = len(np.unique(normalized_x))

Tokenizing music files (content/tokens):   4%|▍         | 667/17256 [02:02<30:54,  8.94it/s]Exception ignored in: <function _xla_gc_callback at 0x7d0bb0271510>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/jax/_src/lib/__init__.py", line 96, in _xla_gc_callback
    def _xla_gc_callback(*args):
KeyboardInterrupt: 
Tokenizing music files (content/tokens):   6%|▋         | 1084/17256 [02:42<22:01, 12.24it/s]

In [None]:
vocab_size

55

In [None]:
model_transformer, encoder, decoder = create_transformer(vocab_size=vocab_size, seq_len=128, embedding_dim=256, num_heads=8, dff=1024, num_layers=6)

In [None]:
model_transformer.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
model_transformer.summary()

In [None]:
y = np.roll(normalized_x, shift=-1, axis=1)
reshaped_x = normalized_x.reshape(normalized_x.shape[0], normalized_x.shape[1], 1)
reshaped_y = y.reshape(y.shape[0], y.shape[1], 1)
print(reshaped_x)
print(reshaped_y)

[[[0.71666667]
  [0.73333333]
  [0.71666667]
  ...
  [0.43333333]
  [0.48333333]
  [0.4       ]]

 [[0.2       ]
  [0.2       ]
  [0.2       ]
  ...
  [0.13333333]
  [0.13333333]
  [0.13333333]]

 [[0.13333333]
  [0.35      ]
  [0.13333333]
  ...
  [0.13333333]
  [0.13333333]
  [0.13333333]]

 ...

 [[0.11666667]
  [0.48333333]
  [0.3       ]
  ...
  [0.3       ]
  [0.3       ]
  [0.13333333]]

 [[0.05      ]
  [0.05      ]
  [0.05      ]
  ...
  [0.3       ]
  [0.16666667]
  [0.21666667]]

 [[0.36666667]
  [0.51666667]
  [0.45      ]
  ...
  [0.6       ]
  [0.4       ]
  [0.45      ]]]
[[[0.73333333]
  [0.71666667]
  [0.71666667]
  ...
  [0.48333333]
  [0.4       ]
  [0.71666667]]

 [[0.2       ]
  [0.2       ]
  [0.16666667]
  ...
  [0.13333333]
  [0.13333333]
  [0.2       ]]

 [[0.35      ]
  [0.13333333]
  [0.38333333]
  ...
  [0.13333333]
  [0.13333333]
  [0.13333333]]

 ...

 [[0.48333333]
  [0.3       ]
  [0.11666667]
  ...
  [0.3       ]
  [0.13333333]
  [0.11666667]]

 [[0.05 

In [None]:
model_transformer.fit(reshaped_x,
                 reshaped_y,
                 epochs=10,
                 batch_size=32)

Epoch 1/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 7s/step - accuracy: 0.0000e+00 - loss: 3.5607
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 1.8989e-04 - loss: 0.0083
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 1.8989e-04 - loss: 0.0069
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 1.0851e-04 - loss: 0.0047
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 3.7977e-04 - loss: 0.0041
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 3.7977e-04 - loss: 0.0040
Epoch 7/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 1.8989e-04 - loss: 0.0043
Epoch 8/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 1.0851e-04 - loss: 0.0044
Epoch 9/10
[1m2/2[0m [32m━━━━━

<keras.src.callbacks.history.History at 0x7d0b9769a290>

In [None]:
import random
def generate_seed_tokens(vocab_size, sequence_length=128):
  """Genera una sequenza di token casuali.
  Args:
      tokenizer: Il tokenizer da usare per ottenere la dimensione del vocabolario.
      sequence_length: La lunghezza della sequenza di token da generare.

  Returns:
      Una lista di token casuali.
  """
  seed_tokens = np.array([random.randint(0, vocab_size - 1) for _ in range(sequence_length)])
  return seed_tokens

In [None]:
temp_generated_tokens = generate_seed_tokens(vocab_size=vocab_size)
print(len(temp_generated_tokens))

# Genera token finché non viene raggiunta la lunghezza massima
while len(temp_generated_tokens) < 460:
    # Prevedi il prossimo token
    predictions = model_transformer.predict(temp_generated_tokens[-128:].reshape(128, 1))
    print(predictions.shape)
    # Seleziona il token con la probabilità più alta
    next_token = tf.argmax(predictions[0][-1]).numpy() # Select the last token from the prediction
    #print(next_token)


    # Aggiungi il token alla sequenza generata
    temp_generated_tokens = np.append(temp_generated_tokens, [next_token])

128
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
(128, 1, 55)
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
(128, 1, 55)
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
(128, 1, 55)
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
(128, 1, 55)
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
(128, 1, 55)
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
(128, 1, 55)
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
(128, 1, 55)
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
(128, 1, 55)
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
(128, 1, 55)
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
(128, 1, 55)
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
(128, 1, 55)
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m

KeyboardInterrupt: 

# Training of the model

In [None]:
# Passo 0: Imposta il tuo flusso completo
midi_directory = dataset_path
midi_files = []

for root, _, files in os.walk(midi_directory):
    for file in files:
        if file.endswith('.mid'):
            midi_files.append(os.path.join(root, file))

In [None]:
# 1
tokenized_data = load_and_tokenize_midi(midi_files[:50])

Errore nel caricamento del file /content/clean_midi/Scott McKenzie/Forrest Gump: San Francisco (Be Sure to Wear Some Flowers in Your Hair).1.mid: data byte must be in range 0..127
File /content/clean_midi/Scott McKenzie/Forrest Gump: San Francisco (Be Sure to Wear Some Flowers in Your Hair).2.mid tokenizzato con successo.




File /content/clean_midi/Scott McKenzie/Forrest Gump: San Francisco (Be Sure to Wear Some Flowers in Your Hair).mid tokenizzato con successo.
File /content/clean_midi/Malmsteen Yngwie/Leviathan.mid tokenizzato con successo.
File /content/clean_midi/Malmsteen Yngwie/Crying.mid tokenizzato con successo.
File /content/clean_midi/The Marshall Tucker Band/Heard It in a Love Song.mid tokenizzato con successo.
File /content/clean_midi/The Cranberries/Will You Remember?.mid tokenizzato con successo.
File /content/clean_midi/The Cranberries/Not Sorry.mid tokenizzato con successo.
Errore nel caricamento del file /content/clean_midi/The Cranberries/Animal Instinct.mid: data byte must be in range 0..127
File /content/clean_midi/The Cranberries/How.mid tokenizzato con successo.
File /content/clean_midi/The Cranberries/Zombie.2.mid tokenizzato con successo.
File /content/clean_midi/The Cranberries/Hollywood.mid tokenizzato con successo.
File /content/clean_midi/The Cranberries/Ode to My Family.2.mid

In [None]:
# 2
max_sequence_length = max(len(seq) for seq in tokenized_data)  # Calcola la lunghezza massima
padded_data = pad_sequences(tokenized_data, maxlen=max_sequence_length)

In [None]:
# 3
input_data = np.array(padded_data)

import numpy as np

assert not np.any(np.isnan(input_data))
normalized_x, original_min, original_max = normalize_to_range(input_data, 0, 1)
assert (np.max(normalized_x)) == 1
assert (np.min(normalized_x)) == 0
print(normalized_x)


[[0.69607843 0.70588235 0.69607843 ... 0.         0.         0.        ]
 [0.39215686 0.39215686 0.39215686 ... 0.         0.         0.        ]
 [0.35294118 0.48039216 0.35294118 ... 0.         0.         0.        ]
 ...
 [0.34313725 0.55882353 0.45098039 ... 0.         0.         0.        ]
 [0.30392157 0.30392157 0.30392157 ... 0.         0.         0.        ]
 [0.49019608 0.57843137 0.53921569 ... 0.         0.         0.        ]]


In [None]:
autoencoder = create_autoencoder((normalized_x.shape[1],1),[512,256,128],2,'elu','sigmoid')

y = np.roll(normalized_x, shift=-1, axis=1)
reshaped_x = normalized_x.reshape(normalized_x.shape[0], normalized_x.shape[1], 1)
reshaped_y = y.reshape(y.shape[0], y.shape[1], 1)
print(reshaped_x)
print(reshaped_y)

ValueError: Cannot convert '2' to a shape.

In [None]:
# 4
autoencoder.fit(reshaped_x,
                 reshaped_y,  # Same data for autoencoder input and output
                 epochs=10,
                 batch_size=32)

Epoch 1/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4s/step - loss: 0.1376
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3s/step - loss: 0.1348
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3s/step - loss: 0.1300
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3s/step - loss: 0.1230
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3s/step - loss: 0.1187
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3s/step - loss: 0.1068
Epoch 7/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3s/step - loss: 0.0928
Epoch 8/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3s/step - loss: 0.0822
Epoch 9/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3s/step - loss: 0.0810
Epoch 10/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3s/step - loss: 0.0795


<keras.src.callbacks.history.History at 0x786750301030>

In [None]:
generate_midi(autoencoder, num_files=1, original_min=original_min, original_max=original_max)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 327ms/step
[[[0.1143358 ]
  [0.19844712]
  [0.25868213]
  ...
  [0.39076546]
  [0.39076546]
  [0.39076546]]]
[11 20 26 ... 39 39 39]
(12800,)


# Utility functions

In [None]:
def random_file(root, keyword=None):
    import glob
    import os
    import random
    mid_files = glob.glob(os.path.join(root, "**", "*.mid"), recursive=True)
    if keyword is not None:
      mid_files = [file for file in mid_files if keyword in file.lower()]
    return random.choice(mid_files)

def generate_midi_from_tokens(tokens, tokenizer, output_path):
  from pathlib import Path
  # Convert to MIDI and save it
  generated_midi = tokenizer(tokens)  # MidiTok can handle PyTorch/Numpy/Tensorflow tensors
  generated_midi.dump_midi(Path(output_path))

# MIDI playing

## Installing the required libraries

In [None]:
!apt-get update -qq && apt-get install -y fluidsynth
!pip install pretty_midi midi-clip

W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  fluid-soundfont-gm libevdev2 libfluidsynth3 libgudev-1.0-0 libinput-bin libinput10
  libinstpatch-1.0-2 libmd4c0 libmtdev1 libqt5core5a libqt5dbus5 libqt5gui5 libqt5network5
  libqt5svg5 libqt5widgets5 libwacom-bin libwacom-common libwacom9 libxcb-icccm4 libxcb-image0
  libxcb-keysyms1 libxcb-render-util0 libxcb-util1 libxcb-xinerama0 libxcb-xinput0 libxcb-xkb1
  libxkbcommon-x11-0 qsynth qt5-gtk-platformtheme qttranslations5-l10n timgm6mb-soundfont
Suggested packages:
  fluid-soundfont-gs qt5-image-formats-plugins qtwayland5 jackd
The following NEW packages will be installed:
  fluid-soundfont-gm fluidsynth libevdev2 libfluidsynth3 libgudev-1.0-0 lib

## Download example Soundfonts (GeneralUser GS v2 and PICONICA)

In [None]:
!gdown 1wlpTIS70nQHMrYBjDT0M6nyg07kUejUv
!unzip GeneralUser_GS_v2.0.0--doc_r2.zip
!rm -rf GeneralUser_GS_v2.0.0--doc_r2.zip support documentation demo\ MIDIs
!mv GeneralUser\ GS\ v2.0.0.sf2 guGS.sf2

# PICONICA
!gdown 1uk51T9Gvo1n2JRl3_CHCg2FVGWiNI4qJ

Downloading...
From (original): https://drive.google.com/uc?id=1wlpTIS70nQHMrYBjDT0M6nyg07kUejUv
From (redirected): https://drive.google.com/uc?id=1wlpTIS70nQHMrYBjDT0M6nyg07kUejUv&confirm=t&uuid=20915a7a-f38b-4d28-ba1a-861b0a263940
To: /content/GeneralUser_GS_v2.0.0--doc_r2.zip
100% 62.5M/62.5M [00:01<00:00, 31.4MB/s]
Archive:  GeneralUser_GS_v2.0.0--doc_r2.zip
   creating: demo MIDIs/
   creating: demo MIDIs/audio/
  inflating: demo MIDIs/audio/Bond.ogg  
  inflating: demo MIDIs/audio/Breakout.ogg  
  inflating: demo MIDIs/audio/Dance.ogg  
  inflating: demo MIDIs/audio/Earth Day - by Richard Audd (fixed).ogg  
  inflating: demo MIDIs/audio/J-cycle.ogg  
  inflating: demo MIDIs/audio/Jump!.ogg  
  inflating: demo MIDIs/audio/Santa Claus is Comin' to Town.ogg  
  inflating: demo MIDIs/audio/The HYBRID Collage (v2.0) - by S. Christian Collins.ogg  
  inflating: demo MIDIs/audio/Umi no Mieru Machi.ogg  
  inflating: demo MIDIs/Bond.mid     
  inflating: demo MIDIs/Breakout.mid  
  infla

## Optional: download other soundfonts

In [None]:
# Pokemon
!gdown 1vDK_xH7WeAqQrrBFXfh4Q205x6oNhTQt

## Utility function to generate the audio on Colab

### Taken from https://github.com/bzamecnik/midi2audio/blob/master/midi2audio.py

In [None]:
import argparse
import os
import subprocess

__all__ = ['FluidSynth']

DEFAULT_SOUND_FONT = '~/.fluidsynth/default_sound_font.sf2'
DEFAULT_SAMPLE_RATE = 44100
DEFAULT_GAIN = 0.2

class FluidSynth():
    def __init__(self, sound_font=DEFAULT_SOUND_FONT, sample_rate=DEFAULT_SAMPLE_RATE, gain=DEFAULT_GAIN):
        self.sample_rate = sample_rate
        self.sound_font = os.path.expanduser(sound_font)
        self.gain = gain

    def midi_to_audio(self, midi_file: str, audio_file: str, verbose=True):
        if verbose:
            stdout = None
        else:
            stdout = subprocess.DEVNULL
        subprocess.call(
            ['fluidsynth', '-ni', '-g', str(self.gain), self.sound_font, midi_file, '-F', audio_file, '-r', str(self.sample_rate)],
            stdout=stdout,
        )

    def play_midi(self, midi_file):
        subprocess.call(['fluidsynth', '-i', '-g', str(self.gain), self.sound_font, midi_file, '-r', str(self.sample_rate)])

### Other utility functions

In [None]:
import pretty_midi
import os
import librosa.display

def show_midi_info(midi_path, print_notes=False):
  midi_data = pretty_midi.PrettyMIDI(midi_path)
  print("Instruments: ", [instrument.name for instrument in midi_data.instruments])
  print("MIDI duration: {duration:.2f} seconds".format(duration=midi_data.get_end_time()))
  if print_notes:
    for instrument in midi_data.instruments:
      print(instrument.name)
      for note in instrument.notes:
        print(note.start, note.end, note.pitch, note.velocity)

def piano_roll(midi_path):
  plt.figure(figsize=(12, 4))
  plot_piano_roll(path, 24, 84)

def plot_piano_roll(path, start_pitch, end_pitch, fs=100):
    midi_data = pretty_midi.PrettyMIDI(path)
    # Use librosa's specshow function for displaying the piano roll
    librosa.display.specshow(midi_data.get_piano_roll(fs)[start_pitch:end_pitch],
                             hop_length=1, sr=fs, x_axis='time', y_axis='cqt_note',
                             fmin=pretty_midi.note_number_to_hz(start_pitch))

def change_midi_velocity(midi_path, output_path, delta=0): # Renamed the function to avoid name conflict
  midi_data = pretty_midi.PrettyMIDI(midi_path)
  for instrument in midi_data.instruments:
    for note in instrument.notes:
      note.velocity += delta
  midi_data.write(output_path)

def convert_midi_to_wav(soundfont_path, midi_path, output_path, gain=None, velocity_change=0): # Renamed the argument
  change_midi_velocity(midi_path, "temp.mid", delta=velocity_change) # Call the renamed function
  FluidSynth(soundfont_path, gain=gain).midi_to_audio("temp.mid", output_path)
  os.remove("temp.mid")


def trim_midi(midi_path, start, end):
  import mido
  import midi_clip
  mid = mido.MidiFile(midi_path)
  trimmed_midi = midi_clip.midi_clip(mid, start, end)

  dir_name, base_name = os.path.split(midi_path)
  new_base_name = "trimmed_" + base_name
  output_path = os.path.join(dir_name, new_base_name)
  trimmed_midi.save(output_path)
  return output_path

def playMidi(midi_file_path,
             soundfont_path="/content/guGS.sf2",
             output_path="audio.wav",
             start=None,
             end=None,
             gain=DEFAULT_GAIN,
             velocity_change=0
             ):
    from IPython.display import Audio

    if start is not None and end is not None:
      midi_file_path = trim_midi(midi_file_path, start, end)
      convert_midi_to_wav(soundfont_path, midi_file_path, output_path, gain=gain, velocity_change=velocity_change)
      os.remove(midi_file_path)
    else:
      convert_midi_to_wav(soundfont_path, midi_file_path, output_path, gain=gain, velocity_change=velocity_change)
    return Audio(output_path)

# Play a specific MIDI

In [None]:
playMidi("gen_res/1.mid", soundfont_path="PICONICA.sf2")

## Play a random MIDI of the Lakh dataset

In [None]:
path = random_file(dataset_path)
print("Converting: " + path)
print("Midi info:")
show_midi_info(path)
print("Synthetized:")
playMidi(path)

Converting: /content/clean_midi/Scorpions/Send Me an Angel.mid
Midi info:
Instruments:  ['Melodie', 'Strings', 'Choir', 'Akk-Guita', 'Solo-Guit', 'Solovox', 'Bass', 'Drums']
MIDI duration: 268.73 seconds
Synthetized:




FileNotFoundError: [Errno 2] No such file or directory: 'temp.mid'

## Play a random MIDI of the NESMDB dataset

In [None]:
path = random_file("nesmdb_midi")
print("Converting: " + path)
print("Midi info:")
show_midi_info(path)
print("Synthetized:")
playMidi(path, soundfont_path="PICONICA.sf2", velocity_change=30, gain=1)

In [None]:
show_midi_info("generated_midi_0.mid")

Instruments:  ['']
MIDI duration: 1280.00 seconds


In [None]:
playMidi("generated_midi_0.mid", soundfont_path="PICONICA.sf2")

FileNotFoundError: [Errno 2] No such file or directory: 'temp.mid'