In [1]:
!pip install pretty_midi

Collecting pretty_midi
  Downloading pretty_midi-0.2.10.tar.gz (5.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m39.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting mido>=1.1.16 (from pretty_midi)
  Downloading mido-1.3.3-py3-none-any.whl.metadata (6.4 kB)
Downloading mido-1.3.3-py3-none-any.whl (54 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.6/54.6 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: pretty_midi
  Building wheel for pretty_midi (setup.py) ... [?25ldone
[?25h  Created wheel for pretty_midi: filename=pretty_midi-0.2.10-py3-none-any.whl size=5592292 sha256=dbbb783e9430e780a51241102f06d8784d36763af543c0203b10ac867818ac81
  Stored in directory: /root/.cache/pip/wheels/cd/a5/30/7b8b7f58709f5150f67f98fde4b891ebf0be9ef07a8af49f25
Successfully built pretty_midi
Installing collected packages: mido, pretty_m

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import pretty_midi
import glob
import numpy as np

In [3]:
# Get Maestro Dataset
!wget https://storage.googleapis.com/magentadata/datasets/maestro/v3.0.0/maestro-v3.0.0-midi.zip
!unzip 'maestro-v3.0.0-midi.zip'
!rm 'maestro-v3.0.0-midi.zip'
!mv 'maestro-v3.0.0' 'archive'

--2024-12-16 13:26:29--  https://storage.googleapis.com/magentadata/datasets/maestro/v3.0.0/maestro-v3.0.0-midi.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 64.233.189.207, 173.194.174.207, 74.125.23.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|64.233.189.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 58416533 (56M) [application/octet-stream]
Saving to: 'maestro-v3.0.0-midi.zip'


2024-12-16 13:26:33 (17.7 MB/s) - 'maestro-v3.0.0-midi.zip' saved [58416533/58416533]

Archive:  maestro-v3.0.0-midi.zip
  inflating: maestro-v3.0.0/2004/MIDI-Unprocessed_XP_08_R1_2004_01-02_ORIG_MID--AUDIO_08_R1_2004_01_Track01_wav.midi  
  inflating: maestro-v3.0.0/2004/MIDI-Unprocessed_XP_09_R1_2004_05_ORIG_MID--AUDIO_09_R1_2004_06_Track06_wav.midi  
  inflating: maestro-v3.0.0/2004/MIDI-Unprocessed_XP_14_R1_2004_01-03_ORIG_MID--AUDIO_14_R1_2004_01_Track01_wav.midi  
  inflating: maestro-v3.0.0/2004/MIDI-Unprocessed_XP_01_R1_2004_

In [4]:
# Percorso ai file MIDI del dataset Maestro
midi_files = glob.glob('archive/*/*.midi', recursive=True)

In [5]:
len(midi_files)

1276

In [6]:
from tqdm import tqdm

In [7]:
# Funzione per convertire MIDI in rappresentazione numerica
def midi_to_numpy(file_path):
    try:
        midi_data = pretty_midi.PrettyMIDI(file_path)
        piano_roll = midi_data.get_piano_roll(fs=10)
        return piano_roll
    except:
        return None

# Caricamento e preprocessamento dei dati
data = []
for file in tqdm(midi_files[:50]):
    piano_roll = midi_to_numpy(file)
    if piano_roll is not None:
        data.append(piano_roll)

# Converti in Tensor e crea DataLoader
data = [torch.tensor(d, dtype=torch.float32) for d in data]
def collate_fn(batch):
    # Trova la lunghezza massima nel batch
    max_length = max([item.shape[1] for item in batch])
    # Padda le sequenze
    padded_batch = [torch.nn.functional.pad(item, (0, max_length - item.shape[1])) for item in batch]
    return torch.stack(padded_batch, dim=0)
dataloader = DataLoader(data, batch_size=128, shuffle=True, collate_fn=collate_fn)

100%|██████████| 50/50 [00:23<00:00,  2.17it/s]


In [18]:
# Generator
class Generator(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(True),
            nn.Linear(128, 256),
            nn.ReLU(True),
            nn.Linear(256, output_dim),
            nn.Tanh()
        )

    def forward(self, x):
        return self.main(x)

# Discriminator
class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(256, 128),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = x.view(x.size(0), -1)
        return self.main(x)

In [29]:
# Parametri
z_dim = 100  # Dimensione del rumore di input
# Supponendo che ogni sequenza abbia dimensione (note, time_steps)
sample_data = next(iter(dataloader))
data_dim = sample_data.shape[1] * sample_data.shape[2]

# Inizializzazione dei modelli
G = Generator(z_dim, data_dim)
D = Discriminator(data_dim)

# Loss function e ottimizzatori
criterion = nn.BCELoss()
optimizer_G = optim.Adam(G.parameters(), lr=0.0002)
optimizer_D = optim.Adam(D.parameters(), lr=0.0002)

# Training loop
epochs = 65
for epoch in range(epochs):
    for real_data in dataloader:
        batch_size = real_data.size(0)
        real_data = real_data.view(batch_size, -1)      
        
        # Etichette reali e false
        real_labels = torch.ones(batch_size, 1)
        fake_labels = torch.zeros(batch_size, 1)
        
        # ---------------------
        # Train Discriminator
        # ---------------------
        D.zero_grad()
        
        # Output per dati reali
        outputs_real = D(real_data)
        loss_real = criterion(outputs_real, real_labels)
        
        # Output per dati falsi
        z = torch.randn(batch_size, z_dim)
        fake_data = G(z)
        outputs_fake = D(fake_data.detach())
        loss_fake = criterion(outputs_fake, fake_labels)
        
        # Totale perdita e backprop
        loss_D = loss_real + loss_fake
        loss_D.backward()
        optimizer_D.step()
        
        # -----------------
        # Train Generator
        # -----------------
        G.zero_grad()
        
        outputs = D(fake_data)
        loss_G = criterion(outputs, real_labels)
        
        loss_G.backward()
        optimizer_G.step()
        
    print(f'Epoca [{epoch+1}/{epochs}] Loss D: {loss_D.item():.4f}, Loss G: {loss_G.item():.4f}')

Epoca [1/65] Loss D: 1.3075, Loss G: 21.8494
Epoca [2/65] Loss D: 0.0000, Loss G: 29.9004
Epoca [3/65] Loss D: 0.0000, Loss G: 33.3574
Epoca [4/65] Loss D: 0.0000, Loss G: 33.5289
Epoca [5/65] Loss D: 0.0000, Loss G: 32.8124
Epoca [6/65] Loss D: 0.0000, Loss G: 30.1148
Epoca [7/65] Loss D: 0.0000, Loss G: 25.1716
Epoca [8/65] Loss D: 0.0000, Loss G: 20.1532
Epoca [9/65] Loss D: 0.0000, Loss G: 14.2744
Epoca [10/65] Loss D: 0.0482, Loss G: 9.5491
Epoca [11/65] Loss D: 2.1024, Loss G: 6.5706
Epoca [12/65] Loss D: 8.2648, Loss G: 6.9258
Epoca [13/65] Loss D: 14.1600, Loss G: 7.1259
Epoca [14/65] Loss D: 21.9708, Loss G: 7.2002
Epoca [15/65] Loss D: 30.3616, Loss G: 7.1453
Epoca [16/65] Loss D: 25.4632, Loss G: 8.7205
Epoca [17/65] Loss D: 15.5676, Loss G: 10.6035
Epoca [18/65] Loss D: 3.5850, Loss G: 13.8346
Epoca [19/65] Loss D: 2.2094, Loss G: 16.2952
Epoca [20/65] Loss D: 0.0983, Loss G: 18.0965
Epoca [21/65] Loss D: 0.2098, Loss G: 20.2796
Epoca [22/65] Loss D: 0.0002, Loss G: 21.2952

In [30]:
# Dimensioni del rumore
num_samples = 1  # Numero di canzoni da generare
z = torch.randn(num_samples, z_dim)  # Rumore casuale

# Genera il campione
generated_data = G(z).detach().numpy()

# Reshape per riportare la sequenza alla forma originale
generated_sequence = generated_data.reshape(-1, sample_data.shape[1], sample_data.shape[2])

In [31]:
def sequence_to_midi(sequence, output_file='generated_song.mid'):
    midi = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(program=0)  # Strumento: Piano

    for note_idx, note_data in enumerate(sequence):
        pitch = int(note_data[0])  # Nota (ad esempio, C4 = 60)
        start = note_data[1]       # Tempo di inizio
        duration = note_data[2]    # Durata
        end = start + duration

        # Crea una nota MIDI
        note = pretty_midi.Note(velocity=100, pitch=pitch, start=start, end=end)
        instrument.notes.append(note)

    midi.instruments.append(instrument)
    midi.write(output_file)

# Esempio di conversione
sequence_to_midi(generated_sequence[0], 'generated_song_60_epoch.mid')