In [None]:
import sys
import os
import torch
from einops import rearrange
import numpy as np
import torchaudio
import matplotlib.pyplot as plt

from scipy.io.wavfile import write

sys.path.append('../')

from src.datamodule.maestro_datamodule import MaestroDataModule

# Maestro Reference

In [None]:
datamodule = MaestroDataModule(root_dir=os.environ["MAESTRO_DATASET_DIR"], batch_size=64, num_workers=4, sample_length=44100*5)
datamodule.setup()
dataloader = datamodule.val_dataloader()
dataloader.shuffle = True
data_iter = iter(dataloader)
batch = next(data_iter)


In [None]:

for i, audio in enumerate(batch):
    audio = audio.transpose(0, 1)
    torchaudio.save(f"../eval/maestro/test/test_{i}.wav", audio, sample_rate=44100)

## MAESTRO LVL1

In [None]:
from src.model.jukebox_vqvae import JukeboxVQVAEModel

vqvae = JukeboxVQVAEModel().to("cpu")
vqvae.eval()

#batch = batch.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
embeddings = vqvae.encode(batch, 1)
batch_lvl1 = vqvae.decode(embeddings, 1)

for i, audio in enumerate(batch_lvl1):    
    audio = audio.transpose(0, 1)
    torchaudio.save(f"../eval/maestro/validation_lvl1/test_{i}.wav", audio, sample_rate=44100)

## LVL0

In [None]:
from src.model.jukebox_vqvae import JukeboxVQVAEModel

vqvae = JukeboxVQVAEModel().to("cpu")
vqvae.eval()

#batch = batch.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
embeddings = vqvae.encode(batch, 0)
batch_lvl1 = vqvae.decode(embeddings, 0)

for i, audio in enumerate(batch_lvl1):    
    audio = audio.transpose(0, 1)
    torchaudio.save(f"../eval/maestro/validation_lvl1/test_{i}.wav", audio, sample_rate=44100)

In [None]:
# move all files from eval/maestro/validatation_lvl1 that do not contain "16k" to eval/maestro/validation_lvl0

for f in Path("../eval/maestro/validation_lvl1").glob("*"):
    if "16k" not in f.name:
        f.rename(f"../eval/maestro/validation_lvl0/{f.name}")

In [None]:
from pathlib import Path

p = Path("../eval/maestro/")

In [None]:
for f in p.glob("*/*"):
    if "16k" not in f.name:
        f.unlink()
    if "16k.wav_16k" in f.name:
        f.unlink()
    if "16k_16k" in f.name:
        f.unlink()

In [None]:
for f in p.glob("*/*"):
    # rename test_20.wav_16k.wav to test_20_16k.wav
    f.rename(f.parent / f.name.replace(".wav_16k", "_16k"))

In [None]:
def resample_files(source_dir):
    files = list(Path(source_dir).glob("**/*.wav"))
    for f in files:
        if "16k" not in f.name:
            if (f.parent / f"{f.stem}_16k.wav").exists():
                continue
            # resample with ffmpeg: ffmpeg -i '{}' -ar 16000 -ac 1 -vn -c:a pcm_s16le -y '{}'
            print(f"Resampling {f} to 16k")
            os.system(f"/usr/stud/steiger/miniconda3/bin/ffmpeg -i '{f}' -ar 16000 -ac 1 -vn -c:a pcm_s16le -y '{f.parent / f.stem}_16k.wav'")
            os.remove(f)


In [None]:
resample_files("../eval/maestro/validation_lvl0")