In [1]:
import numpy as np
import pandas as pd
import os
import torch
from torch.utils.data import Dataset, DataLoader
from multiprocessing import Pool
import pretty_midi as pm
import fluidsynth as fs

In [7]:
# Check if PyTorch has access to CUDA
if torch.cuda.is_available():
    device_count = torch.cuda.device_count()
    print(f"PyTorch has access to {device_count} CUDA device(s).")

    for i in range(device_count):
        print(f"Device {i}: {torch.cuda.get_device_name(i)}")
else:
    print("PyTorch does not have access to CUDA.")

PyTorch has access to 1 CUDA device(s).
Device 0: NVIDIA GeForce RTX 4060 Laptop GPU


In [18]:
# Load the dataset using torch.utils.data.Dataset
class LoadData(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        # Get the row at the specified index
        row = self.data.iloc[idx]
        
        # Get the MIDI file path
        midi_file = row["midi_file"]
        
        # Get the MIDI file as a pretty_midi.PrettyMIDI object
        midi = pm.PrettyMIDI(midi_file)
        
        # Get the MIDI file as a numpy array
        midi_array = midi.get_piano_roll()
        
        # Get the MIDI file as a torch.Tensor
        midi_tensor = torch.from_numpy(midi_array)
        
        # Get the label
        label = row["label"]
        
        # Get the label as a torch.Tensor
        label_tensor = torch.tensor(label)
        
        sample = {"midi": midi_tensor, "label": label_tensor}
        
        if self.transform:
            sample = self.transform(sample)
        
        return sample

In [None]:
# Load the dataset using torch.utils.data
im = "Data\\Preprocessed\\preprocessed_data.csv"

data = LoadData(im)
batch_size = 10

dataLoad = DataLoader(data, batch_size=batch_size, shuffle=True)