In [3]:
import sys
print("Python version:", sys.version)

import numpy as np
print("NumPy version:", np.__version__)

import torch
print("PyTorch version:", torch.__version__)

import pretty_midi
print("pretty_midi version:", pretty_midi.__version__)

cuda_available = torch.cuda.is_available()
print("CUDA available:", cuda_available)
if cuda_available:
    print("CUDA version:", torch.version.cuda)
    
    # Graphics card models
    num_gpus = torch.cuda.device_count()
    print(f"Number of GPUs: {num_gpus}")
    for i in range(num_gpus):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
else:
    print("No CUDA-compatible GPU detected.")


Python version: 3.10.9 (tags/v3.10.9:1dd9be6, Dec  6 2022, 20:01:21) [MSC v.1934 64 bit (AMD64)]
NumPy version: 1.26.4
PyTorch version: 2.2.2+cu118
pretty_midi version: 0.2.10
CUDA available: True
CUDA version: 11.8
Number of GPUs: 1
GPU 0: NVIDIA GeForce RTX 3060 Laptop GPU


In [None]:
import torch
import torch.nn as nn


class Discriminator(nn.Module):
    def __init__(self, note_dim, time_steps, num_conditions):
        super(Discriminator, self).__init__()
        self.condition_dim = num_conditions

        # Condition embedding
        self.condition_embed = nn.Embedding(num_conditions, 32)

        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 64, kernel_size=(4, 4), stride=(
            2, 2), padding=(1, 1))  # Output: (64, 16)
        self.leaky_relu1 = nn.LeakyReLU(0.2, inplace=True)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=(
            4, 4), stride=(2, 2), padding=(1, 1))  # Output: (32, 8)
        self.leaky_relu2 = nn.LeakyReLU(0.2, inplace=True)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=(
            4, 4), stride=(2, 2), padding=(1, 1))  # Output: (16, 4)
        self.leaky_relu3 = nn.LeakyReLU(0.2, inplace=True)
        self.conv4 = nn.Conv2d(256, 512, kernel_size=(
            4, 4), stride=(2, 2), padding=(1, 1))  # Output: (8, 2)
        self.leaky_relu4 = nn.LeakyReLU(0.2, inplace=True)
        self.flatten = nn.Flatten()

        # Fully connected layer
        self.fc = nn.Linear(512 * 8 * 2 + 32, 1)

    def forward(self, x, condition):
        if x.dim() != 4 or x.size(1) != 1:
            x = x.unsqueeze(1)  # [batch_size, 1, 128, 32]

        # Embed condition
        c = self.condition_embed(condition)  # [batch_size, 32]

        # Apply convolutional layers
        x = self.conv1(x)
        x = self.leaky_relu1(x)
        x = self.conv2(x)
        x = self.leaky_relu2(x)
        x = self.conv3(x)
        x = self.leaky_relu3(x)
        x = self.conv4(x)
        x = self.leaky_relu4(x)
        x = self.flatten(x)  # [batch_size, 512 * 8 * 2]

        # Concatenate condition embedding and apply fully connected layer
        x = torch.cat([x, c], dim=1)  # [batch_size, 512 * 8 * 2 + 32]
        return self.fc(x)  # [batch_size, 1]


In [None]:
import torch
import torch.nn as nn


class Generator(nn.Module):
    def __init__(self, latent_dim, note_dim, time_steps, num_conditions):
        super(Generator, self).__init__()
        self.note_dim = note_dim  # 128
        self.time_steps = time_steps  # 32
        self.condition_dim = num_conditions

        # Condition embedding
        self.condition_embed = nn.Embedding(num_conditions, 32)

        # Project latent vector + condition
        self.project = nn.Linear(latent_dim + 32, 256 * 8 * 4)

        # Convolutional layers
        self.batch_norm1 = nn.BatchNorm2d(256)
        self.relu1 = nn.ReLU(True)
        self.conv_transpose1 = nn.ConvTranspose2d(
            256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))  # (16, 8)
        self.batch_norm2 = nn.BatchNorm2d(128)
        self.relu2 = nn.ReLU(True)
        self.conv_transpose2 = nn.ConvTranspose2d(128, 64, kernel_size=(
            4, 4), stride=(2, 2), padding=(1, 1))  # (32, 16)
        self.batch_norm3 = nn.BatchNorm2d(64)
        self.relu3 = nn.ReLU(True)
        self.conv_transpose3 = nn.ConvTranspose2d(64, 32, kernel_size=(
            4, 4), stride=(2, 2), padding=(1, 1))  # (64, 32)
        self.batch_norm4 = nn.BatchNorm2d(32)
        self.relu4 = nn.ReLU(True)
        self.conv_transpose4 = nn.ConvTranspose2d(32, 1, kernel_size=(
            2, 1), stride=(2, 1), padding=(0, 0))  # (128, 32)
        self.sigmoid = nn.Sigmoid()

    def forward(self, z, condition):
        # Embed condition
        c = self.condition_embed(condition)  # (batch_size, 32)
        x = torch.cat([z, c], dim=1)  # (batch_size, latent_dim + 32)
        x = self.project(x).view(-1, 256, 8, 4)  # (batch_size, 256, 8, 4)

        # Apply layers explicitly
        x = self.batch_norm1(x)
        x = self.relu1(x)
        x = self.conv_transpose1(x)
        x = self.batch_norm2(x)
        x = self.relu2(x)
        x = self.conv_transpose2(x)
        x = self.batch_norm3(x)
        x = self.relu3(x)
        x = self.conv_transpose3(x)
        x = self.batch_norm4(x)
        x = self.relu4(x)
        x = self.conv_transpose4(x)
        x = self.sigmoid(x)

        return x.squeeze(1)  # (batch_size, 128, 32)


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import os
import visualkeras

class GANTrainer:
    def __init__(self, config):
        self.config = config
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        self.generator = Generator(
            config.latent_dim, config.note_dim, config.time_steps, config.num_keys).to(self.device)
        self.discriminator = Discriminator(
            config.note_dim, config.time_steps, config.num_keys).to(self.device)
        self.optimizer_G = optim.Adam(self.generator.parameters(
        ), lr=config.lr, betas=(config.beta1, config.beta2))
        self.optimizer_D = optim.Adam(self.discriminator.parameters(
        ), lr=config.lr, betas=(config.beta1, config.beta2))
        self.data_loader = DataLoader(
            config.data_dir, max_files=config.max_files)
        self.midi_converter = MidiConverter()
        os.makedirs(config.output_dir, exist_ok=True)

    def compute_gradient_penalty(self, real_samples, fake_samples, condition):
        if real_samples.shape != fake_samples.shape:
            raise ValueError(
                f"Shape mismatch: real_samples {real_samples.shape}, fake_samples {fake_samples.shape}")
        alpha = torch.rand(real_samples.size(0), 1, 1, device=self.device)
        alpha = alpha.expand(real_samples.size(
            0), real_samples.size(1), real_samples.size(2))
        interpolates = (alpha * real_samples + (1 - alpha)
                        * fake_samples).requires_grad_(True)
        d_interpolates = self.discriminator(interpolates, condition)
        fake = torch.ones(real_samples.size(0), 1, device=self.device)
        gradients = torch.autograd.grad(
            outputs=d_interpolates,
            inputs=interpolates,
            grad_outputs=fake,
            create_graph=True,
            retain_graph=True,
            only_inputs=True,
        )[0]
        gradients = gradients.view(gradients.size(0), -1)
        gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean()
        return gradient_penalty

    def evaluate_piano_roll(self, piano_roll):
        note_density = np.mean(piano_roll)
        pitch_counts = np.sum(piano_roll, axis=1)
        pitch_probs = pitch_counts / (pitch_counts.sum() + 1e-10)
        pitch_entropy = -np.sum([p * np.log2(p + 1e-10)
                                for p in pitch_probs if p > 0])
        return {"note_density": note_density, "pitch_entropy": pitch_entropy}

    def validate_generator_output(self):
        with torch.no_grad():
            test_z = torch.randn(1, self.config.latent_dim, device=self.device)
            test_key = torch.tensor([0], device=self.device)
            test_output = self.generator(test_z, test_key)
            assert test_output.shape[1:] == (self.config.note_dim, self.config.time_steps), \
                f"Generator output shape mismatch: {test_output.shape}"

    def train(self):
        self.validate_generator_output()
        data, key_labels = self.data_loader.load_data()
        data = data.to(self.device)
        key_labels = key_labels.to(self.device)

        for epoch in range(self.config.epochs):
            # Train Discriminator
            for _ in range(self.config.n_critic):
                idx = np.random.randint(
                    0, data.shape[0], self.config.batch_size)
                real_samples = data[idx]
                key_cond = key_labels[idx]
                z = torch.randn(self.config.batch_size,
                                self.config.latent_dim, device=self.device)
                fake_samples = self.generator(z, key_cond)
                real_loss = - \
                    torch.mean(self.discriminator(real_samples, key_cond))
                fake_loss = torch.mean(self.discriminator(
                    fake_samples.detach(), key_cond))
                gradient_penalty = self.compute_gradient_penalty(
                    real_samples, fake_samples, key_cond)
                d_loss = real_loss + fake_loss + \
                    self.config.gradient_penalty_weight * gradient_penalty
                self.optimizer_D.zero_grad()
                d_loss.backward()
                self.optimizer_D.step()

            # Train Generator
            z = torch.randn(self.config.batch_size,
                            self.config.latent_dim, device=self.device)
            key_cond = torch.randint(
                0, self.config.num_keys, (self.config.batch_size,), device=self.device)
            fake_samples = self.generator(z, key_cond)
            g_loss = -torch.mean(self.discriminator(fake_samples, key_cond))
            self.optimizer_G.zero_grad()
            g_loss.backward()
            self.optimizer_G.step()

            # Log and save
            if epoch % 50 == 0:
                print(
                    f"Epoch {epoch}, D Loss: {d_loss.item():.4f}, G Loss: {g_loss.item():.4f}")
                self.generator.eval()
                with torch.no_grad():
                    z = torch.randn(1, self.config.latent_dim,
                                    device=self.device)
                    key_cond = torch.tensor([0], device=self.device)  # C major
                    gen = self.generator(z, key_cond).cpu().numpy().squeeze(0)
                    binary = (gen > np.random.uniform(
                        0.3, 0.7, gen.shape)).astype(int)
                    self.midi_converter.piano_roll_to_midi(
                        binary, f"{self.config.output_dir}/generated_epoch_{epoch:04}_cmajor.mid")
                self.generator.train()
                torch.save(self.generator.state_dict(
                ), f"{self.config.output_dir}/generator_epoch_{epoch:04}.pth")
        #model = self.generator.state_dict()
        #visualkeras.layered_view(model).show() # display using your system viewer
        #visualkeras.layered_view(model, to_file='output.png') # write to disk
        #visualkeras.layered_view(model, to_file='output.png').show() # write and show

        #visualkeras.layered_view(model)

        # Final output for multiple keys
        self.generate_final_outputs()

    def generate_final_outputs(self):
        self.generator.eval()
        key_names = ["C_major", "C#_major", "D_major", "D#_major", "E_major", "F_major", "F#_major",
                     "G_major", "G#_major", "A_major", "A#_major", "B_major",
                     "C_minor", "C#_minor", "D_minor", "D#_minor", "E_minor", "F_minor",
                     "F#_minor", "G_minor", "G#_minor", "A_minor", "A#_minor", "B_minor"]
        with torch.no_grad():
            for key_idx in range(min(self.config.num_keys, 3)):
                z = torch.randn(1, self.config.latent_dim, device=self.device)
                key_cond = torch.tensor([key_idx], device=self.device)
                gen = self.generator(z, key_cond).cpu().numpy().squeeze(0)
                binary = (gen > 0.5).astype(int)
                self.midi_converter.piano_roll_to_midi(
                    binary, f"{self.config.output_dir}/generated_final_{key_names[key_idx]}.mid")


In [None]:
import pretty_midi
import numpy as np
import torch  # Added import for torch
import os


class DataLoader:
    def __init__(self, folder, max_files=1000):
        self.folder = folder
        self.max_files = max_files

    #The estimate_key function in the DataLoader class analyzes a MIDI file's chroma distribution to determine its musical key by 
    #correlating it with major and minor key profiles. It returns an index representing the detected key (0–23 for 12 major and 12 minor keys), defaulting to C major (0) 
    #if an error occurs during processing.
    #Musical key signatures
    def estimate_key(self, midi_data):
        try:
            chroma = midi_data.get_chroma(fs=8)
            chroma_sum = np.sum(chroma, axis=1)
            major_profile = np.array(
                [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
            minor_profile = np.array(
                [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
            scores = []
            for i in range(12):
                shifted_major = np.roll(major_profile, i)
                shifted_minor = np.roll(minor_profile, i)
                scores.append((np.correlate(chroma_sum, shifted_major)[0], i))
                scores.append(
                    (np.correlate(chroma_sum, shifted_minor)[0], i + 12))
            best_score, key_idx = max(scores)
            return key_idx
        except:
            return 0  # Default to C major


    #The midi_to_piano_roll function in the DataLoader class converts a MIDI file into a binary piano roll representation using pretty_midi, 
    #with dimensions (128 notes, specified time steps) at a given sampling frequency (e.g., 8 Hz). 
    # It pads or truncates the sequence to match the desired length, ensuring compatibility with the model’s input requirements, 
    #and returns None if processing fails due to invalid MIDI data.
    def midi_to_piano_roll(self, file_path, fs=8, n_notes=128, length=32):
        try:
            midi_data = pretty_midi.PrettyMIDI(file_path)
            piano_roll = midi_data.get_piano_roll(fs=fs)
            piano_roll = (piano_roll > 0).astype(np.float32)
            if piano_roll.shape[1] < length:
                pad = length - piano_roll.shape[1]
                piano_roll = np.pad(
                    piano_roll, ((0, 0), (0, pad)), mode='constant')
            else:
                piano_roll = piano_roll[:, :length]
            if piano_roll.shape != (n_notes, length):
                raise ValueError(
                    f"Invalid piano roll shape: {piano_roll.shape}, expected ({n_notes}, {length})")
            return piano_roll
        except Exception as e:
            print(f"Error processing {file_path}: {e}")
            return None

    # Data set -> https://codeload.github.com/jukedeck/nottingham-dataset/zip/refs/heads/master
    def load_data(self):
        X, keys = [], []
        for i, file in enumerate(os.listdir(self.folder)):
            if file.endswith((".mid", ".midi")):
                file_path = os.path.join(self.folder, file)
                roll = self.midi_to_piano_roll(file_path)
                if roll is not None:
                    X.append(roll)
                    midi_data = pretty_midi.PrettyMIDI(file_path)
                    key_idx = self.estimate_key(midi_data)
                    keys.append(key_idx)
                if i >= self.max_files - 1:
                    break
        if not X:
            raise ValueError("No valid MIDI files loaded")
        X = np.array(X)
        assert X.shape[1:] == (
            128, 32), f"Loaded data shape mismatch: {X.shape}"
        return torch.tensor(X, dtype=torch.float32).reshape(-1, 128, 32), torch.tensor(keys, dtype=torch.long)


class MidiConverter:
    def piano_roll_to_midi(self, piano_roll, output_path, fs=8, min_duration=2):
        if len(piano_roll.shape) == 1 and piano_roll.shape[0] == 128 * 32:
            print("Auto reshaping piano roll from (4096,) to (128, 32)")
            piano_roll = piano_roll.reshape(128, 32)
        if len(piano_roll.shape) != 2 or piano_roll.shape != (128, 32):
            print("Error: piano_roll should be shape (128, 32). Got:",
                  piano_roll.shape)
            return
        midi = pretty_midi.PrettyMIDI()
        instrument = pretty_midi.Instrument(
            program=0, name="Acoustic Grand Piano")
        time_step = 1.0 / fs
        for pitch in range(piano_roll.shape[0]):
            is_note_on = False
            start = 0
            for t in range(piano_roll.shape[1]):
                if piano_roll[pitch, t] > 0 and not is_note_on:
                    start = t
                    is_note_on = True
                elif (piano_roll[pitch, t] == 0 or t == piano_roll.shape[1] - 1) and is_note_on:
                    end = t if piano_roll[pitch, t] == 0 else t + 1
                    if end - start >= min_duration:
                        note = pretty_midi.Note(
                            velocity=100,
                            pitch=pitch,
                            start=start * time_step,
                            end=end * time_step
                        )
                        instrument.notes.append(note)
                    is_note_on = False
        midi.instruments.append(instrument)
        midi.write(output_path)


In [None]:
class Config:
    def __init__(self):
        self.latent_dim = 256
        self.note_dim = 128
        self.time_steps = 32
        self.num_keys = 24
        self.batch_size = 64
        self.epochs = 5000
        self.n_critic = 5
        self.lr = 0.0001
        self.beta1 = 0.5
        self.beta2 = 0.9
        self.gradient_penalty_weight = 10
        self.output_dir = "C:/mtechpracticals/semester-3/gen-ai/genai-midi-generator-v1-pynb/output"
        self.data_dir = "C:/mtechpracticals/semester-3/gen-ai/genai-midi-generator-v3-oop/data"
        self.max_files = 1000


In [None]:

# public static void main(String args[]) . . as of now leave the arguements part. 
if __name__ == "__main__":
    config = Config()  # Create an instance of the Config class
    trainer = GANTrainer(config)
    trainer.train()
