In [1]:
# Tim: Make source/target folders. I will go for some soundfiles for two different instruments (keyboard and brass)
import torch
from mlp64 import data
from mlp64 import experiment
from mlp64 import models
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from IPython.display import Audio
import soundfile as sf

path = Path("/home/tim/Desktop/MLP64/dataset/nsynth-test/")
first = data.create_dataset_df(path / "examples.json", instrument_family_str="keyboard", instrument_source_str="acoustic", exclude_qualities=["distortion", "percussive"], velocity=[75, 75])
second = data.create_dataset_df(path / "examples.json", instrument_family_str="brass", instrument_source_str="acoustic", exclude_qualities=["distortion", "percussive"], velocity=[75, 75])
second = second[second.instrument_str == "brass_acoustic_046"]

JSON loaded into DataFrame!
JSON loaded into DataFrame!


In [6]:
a = [[1,2,3],[2,3,4]]
b = pd.DataFrame(a, columns=["Total", "AC", "Freq"])
b.index.name = "Epoch"
b

Unnamed: 0_level_0,Total,AC,Freq
Epoch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,1,2,3
1,2,3,4


In [9]:
from time import time

In [11]:
a = time()
import sounddevice as sd
time() - a

0.4337301254272461

In [2]:
rawpath = "/home/tim/Desktop/MLP64/dataset/nsynth-{}/"
test = data.create_dataset_df(Path(rawpath.format("test")) / "examples.json", instrument=327, exclude_qualities=["distortion", "percussive"])
valid = data.create_dataset_df(Path(rawpath.format("valid")) / "examples.json", instrument=327, exclude_qualities=["distortion", "percussive"])

JSON loaded into DataFrame!
JSON loaded into DataFrame!


In [3]:
df = pd.concat((test, valid))

In [4]:
df[df.velocity == 75].pitch.sort_values().unique()

array([21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
       38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
       55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
       72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88,
       89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 102, 104, 105, 106,
       108], dtype=object)

In [5]:
# Copy files 
import shutil
for f in data.dftostr(first):
    shutil.copy(path / "audio" / f, "../baselines/melganvc/a/" + f)
    
for f in data.dftostr(second):
    shutil.copy(path / "audio" / f, "../baselines/melganvc/b/" + f)

In [6]:
from torch.utils.data import Dataset
import torchaudio as ta

class NSynth(Dataset):
    """PyTorch Dataset for the NSynth dataset."""
    
    def __init__(self, root, df: pd.DataFrame, target_field: str, transform=None, normalise=True, n=1):
        """
        Creates NSynth dataset for PyTorch. Concatenates n notes to create small tunes. Set seed before loading to
        ensure reproducibility.
        :param root: Path or str to an nsynth-.../audio folder
        :param df: output of create_dataset_df(..)
        :param target_field: one from ["instrument", "instrument_source", "instrument_family", "instrument_class"] to
                            make class labels based on instrument (out of the 1006 instrument), source (acoustic,
                            electronic or synthetic), family (out of the 10: guitar, reed, ...) or the 33 possibilites
                            when using both source and family (instrument_class). As there are e.g. no acoustic synth-
                            lead instruments, there will be a total of 28 classes (in the nsynth-train dataset).
        :param transform: function used to transform input data (e.g. torchaudio.transforms.MelSpectrogram())
        """
        if isinstance(root, (str, Path)):
            root = Path(root)
            assert root.parts[-1] == "audio", "root needs to point to the nsynth-***/audio folder."
        assert target_field in ["instrument", "instrument_source", "instrument_family", "instrument_class"]
        
        self.root = root
        self.filenames = dftostr(df)
        
        if target_field == "instrument_class":
            # Encode source and family together to make 33 distinct classes
            df["instrument_class"] = df.apply(lambda x: x["instrument_source"] * 11 + x["instrument_family"], axis=1)
        self.df = df
        
        self.targets = df[target_field].to_list()
        self.transform = transform
        self.normalise = normalise
        self.n = n
    
    def __len__(self):
        return len(self.filenames)
    
    def __getitem__(self, index):
        path = self.root / self.filenames[index]
        # Every NSynth note has 64000 values
        x = torch.empty(64000 * self.n)
        x[:64000], _ = ta.load(path, normalization=self.normalise)
        # Choose self.n - 1 other notes to concatenate
        j = 1
        for i in np.random.choice(range(len(self.filenames)), self.n - 1):
            x[64000*j:64000*(j+1)], _ = ta.load(self.root / self.filenames[i], normalization=self.normalise)
            j += 1
        if self.transform is not None:
            x = self.transform(x)
        return x, self.targets[index]
    
def dftostr(df: pd.DataFrame):
    """
    Returns the notes present in a DataFrame as list of NSynth filenames (e.g. ["bass_synthetic_033-022-050.wav", ...]
    :param df: output of create_dataset_df(..)
    :return:
    """
    return df["note_str"].apply(lambda x: x + ".wav").to_list()


In [11]:
ds = NSynth(path / "audio", test, "instrument", n=5)

In [19]:
a = ds.__getitem__(4)[0].numpy()

In [20]:
Audio(a, rate=16000)

In [25]:
sf.write("../baselines/keys_5n.wav", a, samplerate=16000)