In [9]:
import os
import json
import math
import sys
import numpy as np
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader

import wandb

from accelerate import Accelerator

from config import Config
from audiomodel import AudioProcessing
from audiodataset import AudioDataset, TestDataset

def make_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

def build_model(cfg):
        from audiocraft.models.loaders import load_compression_model, load_lm_model
        """Instantiate models and optimizer."""     
        compression_model = load_compression_model('facebook/audiogen-medium', device=cfg.device)
        lm = load_lm_model('facebook/audiogen-medium', device=cfg.device)
        return compression_model, lm
    
def main(prompts=["beep"]):

    cfg = Config()
    cfg.update(**{"prompts": [p for p in prompts]})
    
    accelerator = Accelerator(gradient_accumulation_steps=cfg.gradient_accumulation_steps)
    save_path = "./test"
    make_dir(save_path)
    cfg.update(**{"save_path": save_path})
    
    compression_model, lm = build_model(cfg)
    model = AudioProcessing(cfg, lm)
    
    test_dataset = TestDataset(cfg)
    test_dataloader = DataLoader(test_dataset, batch_size=1)

    model, compression_model = accelerator.prepare(model, compression_model)
    model_path = os.path.join(cfg.output_dir, "footsteps.pth")
    model.load_state_dict(torch.load(model_path))
    
    model.eval()
    compression_model.eval()
    if accelerator.is_main_process:         
        unwrapped_model = accelerator.unwrap_model(model)
        unwrapped_vae = accelerator.unwrap_model(compression_model)
        for test_step, batch in enumerate(test_dataloader):
            gen_tokens, gen_audio = unwrapped_model.inference(batch, unwrapped_vae)
            #audio_filename = f"epoch_{test_step}.wav"
            #unwrapped_model.save_audio(gen_audio, audio_filename, cfg)
            from IPython.display import Audio
            print(gen_audio.shape)
            display(Audio(data=gen_audio[0].detach().cpu().numpy(), rate=cfg.sample_rate))

In [None]:
from accelerate import notebook_launcher
prompts = ["footsteps"] * 4
args = (prompts, )
notebook_launcher(main, args, num_processes=1)

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Launching training on one GPU.




In [21]:
from IPython.display import Audio
Audio("./generated_audios3/epoch_6_4.wav")