In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Célula de Configuração do Monitoramento (adicionar no início)

# 1. Instalar bibliotecas para monitoramento
!pip install psutil pynvml

# 2. Importar tudo o que vamos precisar
import time
import threading
import psutil
import pynvml
import pandas as pd
import matplotlib.pyplot as plt

# 3. Definir a classe que fará o monitoramento
class ResourceMonitor:
    def __init__(self, interval=5):
        self.interval = interval
        self.data = []
        self._stop_event = threading.Event()
        self.thread = threading.Thread(target=self.run, daemon=True)

        # Inicializa a NVML para monitoramento da GPU
        try:
            pynvml.nvmlInit()
            self.gpu_count = pynvml.nvmlDeviceGetCount()
        except pynvml.NVMLError:
            self.gpu_count = 0
            print("AVISO: Placa NVIDIA não encontrada ou driver indisponível. O monitoramento da GPU será desativado.")

    def _get_gpu_ram_usage(self):
        if self.gpu_count == 0:
            return 0
        handle = pynvml.nvmlDeviceGetHandleByIndex(0)
        info = pynvml.nvmlDeviceGetMemoryInfo(handle)
        return info.used / (1024**3)  # Convertido para GB

    def run(self):
        """O método que roda em segundo plano para coletar dados."""
        start_time = time.time()
        while not self._stop_event.is_set():
            timestamp = time.time() - start_time

            # Coleta de dados
            sys_ram_used = psutil.virtual_memory().used / (1024**3) # GB
            gpu_ram_used = self._get_gpu_ram_usage() # GB
            disk_used = psutil.disk_usage('/').used / (1024**3) # GB

            self.data.append([timestamp, sys_ram_used, gpu_ram_used, disk_used])
            time.sleep(self.interval)

        if self.gpu_count > 0:
            pynvml.nvmlShutdown()

    def start(self):
        """Inicia o monitoramento."""
        print("Iniciando monitoramento de recursos...")
        self.thread.start()

    def stop(self):
        """Para o monitoramento."""
        self._stop_event.set()
        self.thread.join()
        print("Monitoramento de recursos finalizado.")
        return pd.DataFrame(self.data, columns=['Tempo (s)', 'RAM Sistema (GB)', 'RAM GPU (GB)', 'Disco (GB)'])

    def plot(self):
        """Plota os dados coletados."""
        df = self.stop()

        if df.empty:
            print("Nenhum dado de monitoramento foi coletado.")
            return

        fig, axes = plt.subplots(3, 1, figsize=(12, 15), sharex=True)
        fig.suptitle('Utilização de Recursos do Sistema Durante a Execução', fontsize=16)

        # Gráfico de RAM do Sistema
        axes[0].plot(df['Tempo (s)'], df['RAM Sistema (GB)'], label='RAM do Sistema Utilizada', color='blue')
        axes[0].set_ylabel('Uso (GB)')
        axes[0].set_title('Uso de RAM do Sistema')
        axes[0].grid(True)
        axes[0].legend()
        axes[0].fill_between(df['Tempo (s)'], df['RAM Sistema (GB)'], alpha=0.1, color='blue')

        # Gráfico de RAM da GPU
        if self.gpu_count > 0:
            axes[1].plot(df['Tempo (s)'], df['RAM GPU (GB)'], label='RAM da GPU Utilizada', color='green')
        else:
            axes[1].text(0.5, 0.5, 'Monitoramento de GPU não disponível', ha='center', va='center')
        axes[1].set_ylabel('Uso (GB)')
        axes[1].set_title('Uso de RAM da GPU')
        axes[1].grid(True)
        axes[1].legend()
        axes[1].fill_between(df['Tempo (s)'], df['RAM GPU (GB)'], alpha=0.1, color='green')

        # Gráfico de Uso de Disco
        axes[2].plot(df['Tempo (s)'], df['Disco (GB)'], label='Espaço em Disco Utilizado', color='red')
        axes[2].set_xlabel('Tempo (segundos)')
        axes[2].set_ylabel('Uso (GB)')
        axes[2].set_title('Uso de Disco')
        axes[2].grid(True)
        axes[2].legend()
        axes[2].fill_between(df['Tempo (s)'], df['Disco (GB)'], alpha=0.1, color='red')

        plt.tight_layout(rect=[0, 0, 1, 0.96])
        plt.show()

# 4. Iniciar o cronômetro e o monitoramento
# (Coloque estas 2 linhas logo antes do seu código principal começar a rodar)
tempo_inicial = time.time()
monitor = ResourceMonitor(interval=5) # O 'interval' é em segundos
monitor.start()

# Imports

In [None]:
!pip install pretty_midi
!apt-get install -y fluidsynth
!pip install midi2audio

In [None]:
from tqdm import tqdm
import os
import math
import random
import pandas as pd
import numpy as np
import IPython
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from IPython import *
import os
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import random_split
import pretty_midi
import torch
import math as m
import torch.optim as optim
import collections
from itertools import chain
from torch import tensor
from midi2audio import FluidSynth
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

# Dataloading and Featurizing
Basiclly a midi(Musical Instrument Digital Interface) note conistes of four things pitch, start time, end time, and volume. What I do first is take the Midi and convert it to a tensor containing the pitch, and volume, the duritation between the last note and the current note, and duration of the note. I changed the repersenations of timings to reduce the range of the timing vaules.

Then I shifted the timings to be in 16 notes, and tokenized the data, then clamped the range of some features so I could put all the vaules in one tensor without padding. I use preprocessed data because it takes ~ an hour to process maestro.

In [None]:
volume = 50
def untenosrize(t): return [pretty_midi.containers.Note(volume, int(note[0]), float(note[1]), float(note[2])) for note in t]
def tenosrize(r):
    a = pretty_midi.PrettyMIDI(r)
    tnotes = []
    for b in a.instruments:
        b = b.notes
        notes =  tensor([[c.pitch, c.start, c.get_duration()] for c in b])
        tnotes.append(notes)
    return tnotes[0]
def dic(t, dics):
    shape = t.shape
    t = t.clone()
    for a in range(3):
        for b in tqdm(range(len(t[:, a]))):
            t[b, a] = dics[a].index([t[b, a].item()])
    t = t.type(torch.int)
    return t.reshape(shape)
def dic(t, dics):
    t = t.clone()
    for a in range(3):
        t[:, a] = tensor([dics[a].index([t[b, a].item()]) for b in range(len(t[:, a]))])
    return t.type(torch.int)
def undic(t, dics):
    shape = t.shape
    l = []
    for a in range(3):
        l.append(tensor(list(map(dics[a].__getitem__, t[:, a]))))
    l = torch.stack(l, dim=1)
    return l.reshape(shape)
def featurize(t):
    index = torch.argsort(t[:, 1], dim=0)
    t = torch.stack([t[int(a)] for a in index])
    out = t[1:, 1] - t[:-1, 1]
    t[1:, 1]= out
    return t
def unfeaturize(t):
    t = t.clone()
    for a in range(len(t)-1):
        out = t[a+1, 1]+ t[a, 1]
        t[a+1, 1]= out
    out = t[:, 2]+t[:, 1]
    t[:, 2] = out
    return t

In [None]:
# Substitua o bloco que cria a lista 'data' por este:

import os
from tqdm import tqdm # Garanta que tqdm está importado aqui

path = '/content/drive/MyDrive/test-code/classical-music-midi/chopin' # Verifique se este caminho está 100% correto
data = []

# Este novo loop procura arquivos diretamente na pasta 'path'
for filename in os.listdir(path):
    if filename.lower().endswith(('.mid', '.midi')):
        data.append(os.path.join(path, filename))

# Adicione esta linha de verificação para ter certeza:
print(f"VERIFICAÇÃO: Foram encontrados {len(data)} arquivos MIDI.")

In [None]:
# Função de construção
def get_dics(directory):
    song = []
    for t in tqdm(directory):
        t = featurize(tenosrize(t))
        t[:, 1] = torch.clamp(t[:, 1], max=3.9687)
        t[:, 2] = torch.clamp(t[:, 2], min=1/time_step , max=4)
        song.append(t)
    t = torch.cat(song)
    t[:, [1,2]] = torch.round(t[:, [1,2]]*time_step)/time_step
    dics = [list(np.array(torch.unique(t[:, a]).type(torch.float))) for a in range(3)]
    dics[0] = [np.float32(a) for a in range(128)]
    return t, dics

# Rodar tudo do zero (sem usar arquivos salvos)

time_step = 32
t, dics = get_dics(data)  # <- 'data' deve estar carregada

songs = dic(t, dics)      # <- essa função também precisa estar definida corretamente

# Função de reconstrução
def unlatent(t, dics=dics):
    t = undic(t, dics)
    t = untenosrize(unfeaturize(t))
    return t


In [None]:
batch_size = 32
sequence_len = 128
split = torch.split(songs, sequence_len)[:-1]
x , y = torch.stack(split)[1:], torch.stack(split)[:-1]
dslen = len(x)//10
xtrain, ytrain = x[:dslen*9], y[:dslen*9]
xtest, ytest = x[dslen*9:], y[dslen*9:]
class trainset(Dataset):
    def __init__(self, data):
        self.x, self.y = data
    def __len__(self): return len(self.x)
    def __getitem__(self, index):
        x = self.x[index] # Input tokens
        y_tokens = self.y[index].long() # Target tokens, shape (seq_len, 3)

        # Codifica cada atributo com seu vocabulário correto
        y_pitch = nn.functional.one_hot(y_tokens[:, 0], num_classes=len(dics[0]))
        y_dtime = nn.functional.one_hot(y_tokens[:, 1], num_classes=len(dics[1]))
        y_dur = nn.functional.one_hot(y_tokens[:, 2], num_classes=len(dics[2]))

        # Retorna o input e uma tupla com as 3 etiquetas codificadas
        return x, (y_pitch, y_dtime, y_dur)

train, test = trainset([xtrain, ytrain]), trainset([xtest, ytest])
train, test = DataLoader(train, batch_size=batch_size, shuffle = True), DataLoader(test, batch_size=batch_size, shuffle = True)

This is a test song after going through featurizing and compression, definatble noticable, but it will make the model work a lot better. Particular notes that are split into three parts aren't nicely presevered by 16th note compression. The only way i could get to display audio was through synthensizer library so that's why all the display audio sounds like it's form a syntehsizer, the higher quality unsynthesied audio is in the output of this notebook.

In [None]:
t, tok = get_dics(data[:2])
lat = dic(t, tok)
qwe = unlatent(lat, tok)
mid = pretty_midi.PrettyMIDI(data[0])
p1 = "test_uncompressed.mid"
p2 = "test_compressed.mid"
mid.write(p1)
mid.instruments[0].notes = qwe
mid.write(p2)
fs = 44000

In [None]:
# original
mid = pretty_midi.PrettyMIDI(p1)
IPython.display.Audio(mid.synthesize(fs=fs), rate=fs)

In [None]:
#compressed
mid = pretty_midi.PrettyMIDI(p2)
IPython.display.Audio(mid.synthesize(fs=fs), rate=fs)

# Generate music with model

In [None]:
p = data[0]
numiter = range(999).__iter__()
def gener(gen, x, dis= None, sequences=100, escape_count=10):
    output = []
    gen.to(device)
    output = []
    for a in range(sequences):
        if dis==None:
            x = gen(x, generate=True)
        else:
            dis.to(device)
            samples = [gen(x.type(torch.int), generate=True).type(torch.float32) for a in range(escape_count)]
            score = [dis(samples[a]) for a in range(escape_count)]
            x = samples[score.index(max(score))]
        output.append(x)
    return undic(torch.cat(output, dim=1).squeeze().type(torch.int), dics)
def make_song(gen, p=p, sequence_len=sequence_len, dis=None, sequences= 25, escape_count=10, evalu=False):
    with torch.no_grad():
        if evalu:
            model.eval()
        else: model.train()
        x,y = next(iter(train))
        x = x[0].unsqueeze(dim=0)
        preds = gener(gen, x.to(device), dis=dis, sequences=sequences, escape_count=escape_count)
        out = untenosrize(unfeaturize(preds.squeeze()))
        mid = pretty_midi.PrettyMIDI(p)
        mid.instruments[0].notes = out
        itera = str(numiter.__next__())
        mid.write("song " + itera + '.mid')

# Model
I addatied this transformer from the pytorch docs (https://pytorch.org/tutorials/beginner/transformer_tutorial.html)
A lot of the stuff I read said sparse transformers worked better which I might try a some point.

In [None]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = sequence_len):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(batch_size, max_len, 1, d_model)
        pe[:, :, 0, 0::2] = torch.sin(position * div_term)
        pe[:, :, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)
    def forward(self, x):
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)
def compose(f, x): return f(x)
class transformer(nn.Module):
    def __init__(self, d_model=1024, nhead=16, d_hid=16, nlayers=6, dropout= 0.25, nembeds=128):
        super().__init__()

        self.pos_encoder = PositionalEncoding(d_model, dropout)
        self.embeds = nn.Embedding(nembeds, d_model)
        self.pos_embeds = PositionalEncoding(d_model, dropout)
        layers = nn.TransformerEncoderLayer(d_model*3, nhead, d_hid, dropout, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(layers, nlayers)
        self.decoder = nn.Linear(d_model*3, nembeds*3)

        self.dropout = nn.Dropout(dropout)
        self.d_model = d_model
        self.mask = torch.triu(torch.ones(sequence_len, sequence_len) * float('-inf'), diagonal=1).to(device)

    def forward(self, x, generate=False):
        x = self.embeds(x)* math.sqrt(self.d_model)
        x = self.pos_embeds(x)
        sp = x.shape
        x = torch.reshape(x, (sp[0], sp[1], (self.d_model*3)))
        x = self.transformer_encoder(x, self.mask)
        x= self.decoder(x)
        shap = x.shape
        x = torch.reshape(x, (shap[0], shap[1], 3, shap[2]//3))
        if generate:
            x = torch.argmax(x, dim=3)
        return x

# Evaulation function

In [None]:
# CÓDIGO NOVO - SUBSTITUA SUA FUNÇÃO 'eval' INTEIRA POR ESTA

def eval(model, dis=None):
    model.eval()
    model.to(device)
    with torch.no_grad():
        loss, false_preds, true_preds = [], [], []
        count = 0
        for x_batch, y_batch in test: # Renomeei as variáveis

            # --- CORREÇÃO APLICADA AQUI ---
            x = x_batch.to(device)
            y = (y_batch[0].to(device), y_batch[1].to(device), y_batch[2].to(device))
            # -----------------------------

            if count == 0:
                shape = torch.numel(x)
                count = 1
            preds = model(x)
            if dis != None:
                gen = torch.argmax(preds, dim=3)
                dis.to(device)
                false_preds.append(dis(gen).mean().item())
                true_preds.append(dis(x).mean().item())
            loss.append(creloss(preds, y).item())
        numelloss = round((sum(loss)/len(loss))/shape, 4)
        avloss = round(sum(loss)/len(loss), 4)
        if dis != None:
            false_preds = round(sum(false_preds)/len(false_preds), 4)
            true_preds = round(sum(true_preds)/len(true_preds), 4)
            print("Eval batch loss " + str(avloss) + " numel loss " + str(numelloss) + " False " + str(false_preds) + " True " + str(true_preds))
        else:
            print("Eval batch loss " + str(avloss) + " numel loss " + str(numelloss))

# Traing loop
Here their's a custom Cross entropy loss becuase pytorch's inbuilt CEL doesn't work on these are dimensions

In [None]:
from tqdm import tqdm
# CÓDIGO NOVO
import torch.nn.functional as F

def smax(t):
    # Usando a função softmax nativa do PyTorch, que é mais estável
    return F.softmax(t, dim=-1)

def creloss(preds, targets):
    # preds tem shape: (batch, seq, 3, 128)
    # targets é uma tupla com y_pitch, y_dtime, y_dur
    y_pitch, y_dtime, y_dur = targets

    # Fatiamos as previsões do modelo para corresponder ao tamanho de cada vocabulário
    preds_pitch = preds[:, :, 0, :y_pitch.shape[-1]]
    preds_dtime = preds[:, :, 1, :y_dtime.shape[-1]]
    preds_dur   = preds[:, :, 2, :y_dur.shape[-1]]

    # Aplicamos softmax para obter as probabilidades
    preds_pitch = smax(preds_pitch)
    preds_dtime = smax(preds_dtime)
    preds_dur   = smax(preds_dur)

    # Calculamos a perda para cada atributo (adicionei 1e-8 para estabilidade numérica)
    loss_pitch = -torch.sum(y_pitch.float() * torch.log(preds_pitch + 1e-8))
    loss_dtime = -torch.sum(y_dtime.float() * torch.log(preds_dtime + 1e-8))
    loss_dur   = -torch.sum(y_dur.float() * torch.log(preds_dur + 1e-8))

    # Retornamos a soma das perdas
    return loss_pitch + loss_dtime + loss_dur
# CÓDIGO NOVO (PARA COLAR NO LUGAR DO ANTIGO)

def fit(model, dl, epochs=1, lr=0.0001):
    count = 0
    model.to(device)
    lossfunc = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=lr)
    model.train()
    total_loss = []
    count = 0
    for a in range(epochs):
        for x_batch, y_batch in tqdm(dl): # Renomeei as variáveis para clareza
            if count == 0:
                shape = torch.numel(x_batch)
                count +=1

            # --- CORREÇÃO APLICADA AQUI ---
            # Move o input 'x' para o dispositivo
            x = x_batch.to(device)
            # Move cada tensor DENTRO da tupla 'y' para o dispositivo
            y = (y_batch[0].to(device), y_batch[1].to(device), y_batch[2].to(device))
            # -----------------------------

            preds = model(x)
            loss = creloss(preds, y)
            with torch.no_grad():
                total_loss.append(loss.item())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        avloss = sum(total_loss)/len(total_loss)
        print("training batch loss " + str(avloss) + " numel loss " + str(avloss/shape))
        eval(model)

In [None]:
model = transformer()
fit(model, train, lr=0.0001, epochs=50)
torch.save(model, "music_transformer.pkl")
make_song(model)
make_song(model, evalu=True)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Dados extraídos do seu log de treinamento (Transformer + Chopin, Rodada 2)
epochs = list(range(1, 51))

# --- Valores de NUMEL LOSS (Perda Média por Elemento) ---
train_numel_loss = [
    3.2927, 2.9649, 2.8228, 2.7348, 2.6712, 2.6172, 2.5695, 2.5265, 2.4896, 2.4570,
    2.4269, 2.3972, 2.3711, 2.3469, 2.3249, 2.3030, 2.2805, 2.2579, 2.2364, 2.2155,
    2.1960, 2.1767, 2.1578, 2.1390, 2.1209, 2.1040, 2.0870, 2.0700, 2.0528, 2.0355,
    2.0182, 2.0007, 1.9828, 1.9648, 1.9468, 1.9289, 1.9113, 1.8933, 1.8752, 1.8568,
    1.8382, 1.8192, 1.8001, 1.7809, 1.7621, 1.7431, 1.7240, 1.7049, 1.6856, 1.6662
]

eval_numel_loss = [
    1.7244, 1.6751, 1.7006, 1.7573, 1.6933, 1.6892, 1.6747, 1.7268, 1.8042, 1.7868,
    1.7781, 1.8144, 1.7896, 1.8747, 1.7948, 1.8404, 1.8610, 1.8879, 1.8775, 1.9045,
    1.8799, 1.8986, 1.9363, 1.9314, 1.9433, 1.9082, 1.9112, 1.9579, 1.9203, 1.9453,
    1.9753, 1.9712, 2.0107, 1.9644, 2.0101, 2.0168, 2.0415, 2.0842, 2.0567, 2.0855,
    2.1184, 2.1327, 2.1257, 2.1751, 2.1685, 2.1912, 2.2239, 2.2452, 2.2453, 2.2542
]
# ----------------------------------------------------

# --- Criar o Gráfico ---
plt.figure(figsize=(12, 7))

# Plotar a Perda Numel de Treinamento
plt.plot(epochs, train_numel_loss, 'bo-', label='Perda de Treinamento', markersize=4)

# Plotar a Perda Numel de Avaliação
plt.plot(epochs, eval_numel_loss, 'rs-', label='Perda de Avaliação', markersize=4)

# --- Adicionar Títulos e Legendas ---
plt.title('Curva de Perda Média por Epoch', fontsize=16)
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Valor da Perda', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend(fontsize=12)

# Ajustar os marcadores do eixo X para mostrar mais números
plt.xticks(np.arange(0, 51, 5)) # Marcadores a cada 5 épocas (0, 5, 10, ...)

# Exibir o gráfico
plt.show()

In [None]:
mid = pretty_midi.PrettyMIDI("song 0.mid")
IPython.display.Audio(mid.synthesize(fs=fs), rate=fs)

In [None]:
mid = pretty_midi.PrettyMIDI("song 1.mid")
IPython.display.Audio(mid.synthesize(fs=fs), rate=fs)

# Discriminator
Since the transformer use dropout when generating new songs, their is some randomness in the generated sample. To take advantage of with this a Discriminator picks the best sample out of a few generated samples. The Discriminator is an lstm followed by convulations, that's heavly normalized (50% dropout).

In [None]:
class discriminator(nn.Module):
    def __init__(self, input_dim=3, lin_dim=256, lstm_dim=256, lstm_layers=4, dropout=0.5):
        super().__init__()
        self.droupout = nn.Dropout(dropout)
        self.mlp = nn.Sequential(
            nn.Linear(input_dim, lin_dim // 2),
            nn.LeakyReLU(0.2),
            nn.Linear(lin_dim // 2, lin_dim),
            nn.LeakyReLU(0.2))
        self.conv = nn.Sequential(
            nn.Conv1d(lstm_dim, 8, 8, 4),
            nn.LeakyReLU(0,1),
            nn.Conv1d(8, 1, 8, 8),
            nn.LeakyReLU(0,1),
            nn.Linear(3, 1))
        self.act= nn.Sigmoid()
        self.lin = nn.Sequential(nn.Linear(4, 1))
        self.lstm = nn.LSTM(input_size=lin_dim, hidden_size=lstm_dim, num_layers=lstm_layers, batch_first=True, dropout=dropout, bidirectional=False)

    def forward(self, x):
        x = x.type(torch.float)
        x = self.mlp(x)
        x, h = self.lstm(x)
        x = self.conv(x.permute(0, 2, 1)).permute(0, 1, 2)
        return self.act(x.squeeze())

In [None]:


def traindis(gen, dis, epochs=1, lr=0.001, noise_scale=10):
    dis_opt =  optim.Adam(dis.parameters(), lr=lr)
    gen.train().to(device)
    dis.train().to(device)
    lossfunc = nn.BCELoss().to(device)
    for a in range(epochs):
        for x_batch, y_batch  in tqdm(train):


            x = x_batch.to(device)

            y_real = x.type(torch.float32)

            dis_opt.zero_grad()
            with torch.no_grad():
                fake = gen(x, generate=True).type(torch.float32)

            fake_preds = dis(fake)
            real_preds = dis(y_real)

            fake_loss = lossfunc(fake_preds, (torch.zeros_like(fake_preds)))
            real_loss = lossfunc(real_preds,(torch.ones_like(real_preds)))
            dis_loss = (fake_loss + real_loss)/2
            dis_loss.backward()
            dis_opt.step()
        eval(model, dis)

In [None]:
dis = discriminator()
traindis(model, dis)
make_song(model, dis=dis)
mid = pretty_midi.PrettyMIDI("song 2.mid")
IPython.display.Audio(mid.synthesize(fs=fs), rate=fs)

In [None]:
from midi2audio import FluidSynth
fs = FluidSynth('/content/drive/MyDrive/soundfonts/FluidR3_GM.sf2')  # certifique-se que o arquivo .sf2 está presente
fs.midi_to_audio('test.mid', 'test.wav')


# Conclusion
The music is alright, about a quater the time the music localy compareble to human made samples, but it lacks any  long term conistancy. This was a good project still with lots of ways to impove this model.

In [None]:
# Célula de Finalização e Plotagem (adicionar no final)

# 1. Parar o cronômetro e calcular o tempo total
tempo_final = time.time()
tempo_total_segundos = tempo_final - tempo_inicial

# Formatando o tempo para horas, minutos e segundos
horas = int(tempo_total_segundos // 3600)
minutos = int((tempo_total_segundos % 3600) // 60)
segundos = int(tempo_total_segundos % 60)

print("\n--- Relatório Final de Execução ---")
print(f"Tempo de Execução Total: {horas}h {minutos}min {segundos}s")

# 2. Parar o monitor e gerar o gráfico de uso de recursos
# A função plot() já chama o stop() e processa os dados
monitor.plot()