In [45]:
!pip install pretty_midi



In [46]:
!pip install gdown
!pip install miditok
!pip install midi-clip

!wget https://raw.githubusercontent.com/roostico/NesGen/refs/heads/main/utility.py

Collecting gdown
  Downloading gdown-5.2.0-py3-none-any.whl.metadata (5.8 kB)
Downloading gdown-5.2.0-py3-none-any.whl (18 kB)
Installing collected packages: gdown
Successfully installed gdown-5.2.0
Collecting miditok
  Downloading miditok-3.0.4-py3-none-any.whl.metadata (10 kB)
Collecting symusic>=0.5.0 (from miditok)
  Downloading symusic-0.5.5-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (11 kB)
Collecting pySmartDL (from symusic>=0.5.0->miditok)
  Downloading pySmartDL-1.3.4-py3-none-any.whl.metadata (2.8 kB)
Downloading miditok-3.0.4-py3-none-any.whl (157 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m157.2/157.2 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading symusic-0.5.5-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (2.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m65.5 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hDownloading pySmartDL-1.3.4-py3-none-any.whl (20 kB

In [47]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import glob

import os
import random
import shutil
from pathlib import Path
import pretty_midi
import numpy as np
from miditok import REMI, TokenizerConfig
import json
from miditok.utils import split_files_for_training
from miditok.data_augmentation import augment_dataset
from random import shuffle
from tqdm import tqdm

import sys
import pickle
     

In [48]:
# Get Maestro Dataset
!wget https://storage.googleapis.com/magentadata/datasets/maestro/v3.0.0/maestro-v3.0.0-midi.zip
!unzip 'maestro-v3.0.0-midi.zip'
!rm 'maestro-v3.0.0-midi.zip'
dataset_path = "/kaggle/working/maestro-v3.0.0"

--2024-12-18 16:08:28--  https://storage.googleapis.com/magentadata/datasets/maestro/v3.0.0/maestro-v3.0.0-midi.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 108.177.96.207, 142.250.153.207, 142.251.18.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|108.177.96.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 58416533 (56M) [application/octet-stream]
Saving to: 'maestro-v3.0.0-midi.zip'


2024-12-18 16:08:30 (28.1 MB/s) - 'maestro-v3.0.0-midi.zip' saved [58416533/58416533]

Archive:  maestro-v3.0.0-midi.zip
  inflating: maestro-v3.0.0/2004/MIDI-Unprocessed_XP_08_R1_2004_01-02_ORIG_MID--AUDIO_08_R1_2004_01_Track01_wav.midi  
  inflating: maestro-v3.0.0/2004/MIDI-Unprocessed_XP_09_R1_2004_05_ORIG_MID--AUDIO_09_R1_2004_06_Track06_wav.midi  
  inflating: maestro-v3.0.0/2004/MIDI-Unprocessed_XP_14_R1_2004_01-03_ORIG_MID--AUDIO_14_R1_2004_01_Track01_wav.midi  
  inflating: maestro-v3.0.0/2004/MIDI-Unprocessed_XP_01_R1_2004

In [49]:
# Paths to the files of the dataset

midi_paths = list(Path(dataset_path).resolve().glob("**/*.mid")) + list(Path(dataset_path).resolve().glob("**/*.midi"))

midis_dir = "midis"
os.makedirs(midis_dir, exist_ok=True)


for i, midi_path in enumerate(midi_paths):
  new_midi_path = os.path.join(midis_dir, f"{i}.midi")
  shutil.move(str(midi_path), new_midi_path)


midis = list(Path("/kaggle/working/midis").resolve().glob("**/*.mid")) + list(Path("/kaggle/working/midis").resolve().glob("**/*.midi"))

def sample():
  return str(random.choice(midis))

NameError: name 'dataset_path' is not defined

In [None]:
BEAT_RES = {(0, 1): 12, (1, 2): 4, (2, 4): 2, (4, 8): 1}

TOKENIZER_PARAMS = {
    "pitch_range": (21, 109),
    "beat_res": BEAT_RES,
    "num_velocities": 6,
    "special_tokens": ["BOS", "EOS"],
    "use_chords": True,
    "use_rests": True,
    "use_tempos": True,
    "num_tempos": 8,
    "tempo_range": (50, 200),  # (min_tempo, max_tempo),
}

config = TokenizerConfig(**TOKENIZER_PARAMS)

tokenizer = REMI(config)

In [None]:
vocab_size = 1000
tokenizer.train(vocab_size=vocab_size, files_paths=midis)
processed = [Path(f"{s}") for s in midis]
print(len(processed))

valid_perc = 0.05
augment = False

total_num_files = len(processed)
num_files_valid = round(total_num_files * valid_perc)
shuffle(processed)
midi_paths_valid = processed[:num_files_valid]
midi_paths_train = processed[num_files_valid:]

# Chunk MIDIs and perform data augmentation on each subset independently

for files_paths, subset_name in (
    (midi_paths_train, "train"),
    (midi_paths_valid, "valid"),
):
    print(files_paths[0])

    # Split the MIDIs into chunks of sizes approximately about 1024 tokens

    subset_chunks_dir = Path(f"Maestro_{subset_name}")

    split_files_for_training(
        files_paths=files_paths,
        tokenizer=tokenizer,
        save_dir=subset_chunks_dir,
        max_seq_len=1024,
        num_overlap_bars=2,
    )

    # Perform data augmentation
    if augment:
        augment_dataset(
            subset_chunks_dir,
            pitch_offsets=[-12, 12],
            velocity_offsets=[-3, 3],
            duration_offsets=[-0.5, 0.5],
        )

midi_paths_train = list(Path("Maestro_train").glob("**/*.mid")) + list(Path("Maestro_train").glob("**/*.midi"))
midi_paths_valid = list(Path("Maestro_valid").glob("**/*.mid")) + list(Path("Maestro_valid").glob("**/*.midi"))

In [None]:
def midi_valid(midi) -> bool:

    if any(ts.numerator != 4 for ts in midi.time_signature_changes):

        return False  # time signature different from 4/*, 4 beats per bar

    return True



if os.path.exists("tokenized"):

  shutil.rmtree("tokenized")


for dir in ("train", "valid"):
    tokenizer.tokenize_dataset(        
    
        Path(f"/kaggle/working/Maestro_{dir}"),
        Path(f"/kaggle/working/tokenized_{dir}"),
        midi_valid,
    
    )

In [None]:
def read_json(path: str) -> dict:

  with open(path, "r") as f:

    return json.load(f)

def read_json_files(json_file_paths):
    """Reads a list of JSON files and returns a list of objects.
    Args:
        json_file_paths: A list of file paths to JSON files.
    Returns:
        A list of objects, where each object represents the data from a JSON file.
        Returns an empty list if any error occurs during file processing.
    """

    objects = []

    for file_path in tqdm(json_file_paths):

        try:

            objects.append(read_json(file_path))

        except FileNotFoundError:

            print(f"Error: File not found - {file_path}")

            return [] # Return empty list on error

        except json.JSONDecodeError:

            print(f"Error decoding JSON in file: {file_path}")

            return [] # Return empty list on error

    return objects

In [None]:
tokenized_train = list(Path("tokenized_train").resolve().glob("**/*.json"))
data_objects_train = read_json_files(tokenized_train)

tokenized_valid = list(Path("tokenized_valid").resolve().glob("**/*.json"))
data_objects_valid = read_json_files(tokenized_valid)

if data_objects_train:
    print(f"\nSuccessfully read {len(data_objects_train)} training JSON files.")
else:
    print("Error reading JSON files.")

In [None]:
encoded_train = [np.array(song["ids"][0]) for song in data_objects_train]
encoded_valid = [np.array(song["ids"][0]) for song in data_objects_valid]

In [None]:
tokenizer.decode([encoded_train[0][:1024]]).dump_midi("sample.mid")
all_ids_train = np.concatenate(encoded_train)
all_ids_valid = np.concatenate(encoded_valid)
import datetime
today = datetime.datetime.today()
day = today.day
month = today.month
name = "tokenizer{:d}_{:02d}{:02d}.json".format(vocab_size, month, day)
tokenizer.save(name)
np.savetxt("ids_train_{:02d}{:02d}.txt".format(month, day), all_ids_train)
np.savetxt("ids_valid_{:02d}{:02d}.txt".format(month, day), all_ids_valid)
all_ids_train = all_ids_train.astype(dtype=np.int32)
all_ids_valid = all_ids_valid.astype(dtype=np.int32)

In [95]:
# if you need to skip all
!gdown 1ZIPjenm4tEzAKPc-ONE4gYLzILR3YYqe # tokenizer1000_1217.json
!gdown 1LN8wrTcUOzlPkQs7Gh-RD9Z2ftbua_E6 # ids_train_1217.txt
#!gdown 12SOuWNUM9ofo5dhGWvNEj09c_dYisB7g # ids_valid_1217.txt
tokenizer = REMI(params="tokenizer1000_1217.json")
all_ids_train = np.loadtxt("ids_train_1217.txt").astype(dtype=np.int32)
#all_ids_valid = np.loadtxt("ids_valid_1217.txt").astype(dtype=np.int32)

Downloading...
From: https://drive.google.com/uc?id=1ZIPjenm4tEzAKPc-ONE4gYLzILR3YYqe
To: /kaggle/working/tokenizer1000_1217.json
100%|██████████████████████████████████████| 64.2k/64.2k [00:00<00:00, 79.9MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1LN8wrTcUOzlPkQs7Gh-RD9Z2ftbua_E6
From (redirected): https://drive.google.com/uc?id=1LN8wrTcUOzlPkQs7Gh-RD9Z2ftbua_E6&confirm=t&uuid=2c2f6c63-f036-4dc3-9494-4404126662fd
To: /kaggle/working/ids_train_1217.txt
100%|█████████████████████████████████████████| 397M/397M [00:02<00:00, 196MB/s]


In [96]:
vocab_size = len(tokenizer)
normalized_seq = (all_ids_train - vocab_size / 2) / (vocab_size / 2)

In [97]:
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.optim as optim

# Generator
class Generator(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(True),
            nn.Linear(128, 256),
            nn.ReLU(True),
            nn.Linear(256, 512),
            nn.ReLU(True),
            nn.Linear(512, 256),
            nn.ReLU(True),
            nn.Linear(256, 128),
            nn.ReLU(True),
            nn.Linear(128, 64),
            nn.ReLU(True),
            nn.Linear(64, output_dim),
            nn.Tanh()
        )

    def forward(self, x):
        return self.main(x)

# Discriminator
class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(256, 128),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.main(x)

seq_length = 100

# Suddivisione in sequenze
all_ids_train_seq = [normalized_seq[i:i + seq_length] 
                 for i in range(0, len(normalized_seq) - seq_length, seq_length)]

# Dataset personalizzato
class TokenDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return torch.tensor(self.data[idx], dtype=torch.float32)

dataset = TokenDataset(all_ids_train_seq)
batch_size = 256
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Inizializzazione
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
z_dim = 100
data_dim = seq_length

G = Generator(z_dim, data_dim).to(device)
D = Discriminator(data_dim).to(device)

criterion = nn.BCELoss()
optimizer_G = optim.Adam(G.parameters(), lr=0.0002)
optimizer_D = optim.Adam(D.parameters(), lr=0.0002)

# Training loop
epochs = 100
for epoch in range(epochs):
    for real_data in dataloader:
        batch_size = real_data.size(0)
        real_data = real_data.to(device)
        
        # Etichette
        real_labels = torch.ones(batch_size, 1).to(device)
        fake_labels = torch.zeros(batch_size, 1).to(device)
        
        # Train Discriminator
        D.zero_grad()
        outputs_real = D(real_data)
        loss_real = criterion(outputs_real, real_labels)
        
        z = torch.randn(batch_size, z_dim).to(device)
        fake_data = G(z)
        outputs_fake = D(fake_data.detach())
        loss_fake = criterion(outputs_fake, fake_labels)
        
        loss_D = loss_real + loss_fake
        loss_D.backward()
        optimizer_D.step()
        
        # Train Generator
        G.zero_grad()
        outputs = D(fake_data)
        loss_G = criterion(outputs, real_labels)
        loss_G.backward()
        optimizer_G.step()
        
    print(f'Epoca [{epoch+1}/{epochs}] Loss D: {loss_D.item():.4f}, Loss G: {loss_G.item():.4f}')

Epoca [1/100] Loss D: 2.2863, Loss G: 0.6172
Epoca [2/100] Loss D: 1.5040, Loss G: 0.8382
Epoca [3/100] Loss D: 1.4601, Loss G: 0.6178
Epoca [4/100] Loss D: 1.3780, Loss G: 0.7270
Epoca [5/100] Loss D: 1.1020, Loss G: 0.9251
Epoca [6/100] Loss D: 0.9805, Loss G: 1.2206
Epoca [7/100] Loss D: 1.5544, Loss G: 0.5620
Epoca [8/100] Loss D: 1.2891, Loss G: 0.6197
Epoca [9/100] Loss D: 1.4388, Loss G: 0.7834
Epoca [10/100] Loss D: 1.4517, Loss G: 0.6763
Epoca [11/100] Loss D: 1.5818, Loss G: 0.9500
Epoca [12/100] Loss D: 1.3403, Loss G: 0.5613
Epoca [13/100] Loss D: 1.3159, Loss G: 0.7915
Epoca [14/100] Loss D: 1.2357, Loss G: 0.9513
Epoca [15/100] Loss D: 1.8059, Loss G: 1.0398
Epoca [16/100] Loss D: 1.5322, Loss G: 0.5444
Epoca [17/100] Loss D: 1.1402, Loss G: 0.9126
Epoca [18/100] Loss D: 1.2227, Loss G: 1.0797
Epoca [19/100] Loss D: 0.6044, Loss G: 1.7313
Epoca [20/100] Loss D: 1.3704, Loss G: 0.6153
Epoca [21/100] Loss D: 1.3550, Loss G: 0.9648
Epoca [22/100] Loss D: 1.6418, Loss G: 0.61

In [98]:
# Dimensioni del rumore
num_samples = 1  # Numero di canzoni da generare
z = torch.randn(num_samples, z_dim)  # Rumore casuale
z = z.to(device)
# Genera il campione
generated_data = G(z).cpu().detach().numpy()

In [99]:
boundary = int(vocab_size / 2)
pred_token = [x * boundary + boundary for x in generated_data]

In [100]:
decoded = tokenizer.decode(pred_token)
decoded.dump_midi("generated.mid")

In [101]:
from IPython.display import FileLink
FileLink(r'generated.mid')