In [None]:
import os
import json
import math
import sys
import numpy as np
from tqdm import tqdm
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader
from accelerate import Accelerator
from config import Config
from audiomodel import AudioProcessing
from audiodataset import AudioDataset, TestDataset

def make_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

def build_model(cfg):
        from audiocraft.models.loaders import load_compression_model, load_lm_model
        """Instantiate models and optimizer."""     
        compression_model = load_compression_model('facebook/audiogen-medium', device=cfg.device)
        lm = load_lm_model('facebook/audiogen-medium', device=cfg.device)
        return compression_model, lm

In [None]:
prompts = [
    "frog followed by woosh",
    "rain, frog",
    "fire, wood building falling",
    "gun sound and then child crying",
    "crying monkey",
    "busy office, ambience",
    "restaurant kitchen, ambience",
    "big, church bell",
    "cartoon, crying",
    "morning alarm",
    "machine exploding, parts falling",
    "missile firing and exploding",
    "whoosh, ice",
    "dragon wings flapping",
    "biting an apple",
    "walking on the shallow water",
    "engine starting up",
    "violin, concert hall",
    "rolling dice",
    "wine glass falling",
    "printer printing paper",
    "laser gun",
    "multimedia, notification",
    "truck accelerating",
    "frying chicken in a oil",
    "running, basketball court",
    "lightning hits tree",
    "rifle reloading",
    "fart sound machine gun",
    "crickets chirping",
    "chainsaw cutting tree",
    "girl, whispering"
]

In [None]:
def main(prompts=["beep"], n=5):
    cfg = Config()
    cfg.update(**{"prompts": [p for p in prompts for _ in range(3)]})
    
    accelerator = Accelerator(gradient_accumulation_steps=cfg.gradient_accumulation_steps)
    save_path = "./test22"
    make_dir(save_path)
    cfg.update(**{"save_path": save_path})
    
    compression_model, lm = build_model(cfg)
    model = AudioProcessing(cfg, lm)
    
    test_dataset = TestDataset(cfg)
    test_dataloader = DataLoader(test_dataset, batch_size=1)
    
    model, compression_model = accelerator.prepare(model, compression_model)
    model_path = os.path.join("./compare/44.pth")
    model.load_state_dict(torch.load(model_path))
    
    model.eval()
    compression_model.eval()
    if accelerator.is_main_process:         
        unwrapped_model = accelerator.unwrap_model(model)
        unwrapped_vae = accelerator.unwrap_model(compression_model)
        audio_num = 1
        for test_step, batch in enumerate(test_dataloader):
            gen_tokens, gen_audio = unwrapped_model.inference(batch, unwrapped_vae)
            prompt = batch[0]
            print(prompt)
            audio_filename = f"{prompt}_{audio_num}.wav"
            unwrapped_model.save_audio(gen_audio, audio_filename, cfg)
            from IPython.display import Audio
            display(Audio(data=gen_audio[0].detach().cpu().numpy(), rate=cfg.sample_rate))
            audio_num += 1
            if audio_num > 5:
                audio_num = 1

In [None]:
from audiotools import AudioSignal
torch.cuda.empty_cache()

cfg = Config()
cfg.update(**{"prompts": [p for p in prompts for _ in range(3)]})

accelerator = Accelerator(gradient_accumulation_steps=cfg.gradient_accumulation_steps)
save_path = "./test_mix"
make_dir(save_path)
cfg.update(**{"save_path": save_path})

compression_model, lm = build_model(cfg)
model = AudioProcessing(cfg, lm)

test_dataset = TestDataset(cfg)
test_dataloader = DataLoader(test_dataset, batch_size=1)

model, compression_model = accelerator.prepare(model, compression_model)
model_path = os.path.join("./compare/base_19.pth")
model.load_state_dict(torch.load(model_path))

model.eval()
compression_model.eval()
if accelerator.is_main_process:
    unwrapped_model = accelerator.unwrap_model(model)
    unwrapped_vae = accelerator.unwrap_model(compression_model)
    audio_num = 1
    for test_step, batch in tqdm(enumerate(test_dataloader)):
        gen_tokens, gen_audio = unwrapped_model.inference(batch, unwrapped_vae)
        prompt = batch[0]
        print(prompt)
        audio_filename = f"{prompt}_{audio_num}.wav"
        unwrapped_model.save_audio(gen_audio, audio_filename, cfg)
        from IPython.display import Audio
        # display(Audio(data=gen_audio[0].detach().cpu().numpy(), rate=cfg.sample_rate))
        AudioSignal(gen_audio[0].detach().cpu().numpy(), sample_rate=cfg.sample_rate).widget()
        audio_num += 1
        if audio_num > 5:
            audio_num = 1

In [None]:
from audiotools import AudioSignal
del model
del compression_model

torch.cuda.empty_cache()

cfg = Config()
cfg.update(**{"prompts": [p for p in prompts for _ in range(3)]})

accelerator = Accelerator(gradient_accumulation_steps=cfg.gradient_accumulation_steps)
save_path = "./test_mix"
make_dir(save_path)
cfg.update(**{"save_path": save_path})

compression_model, lm = build_model(cfg)
model = AudioProcessing(cfg, lm)

test_dataset = TestDataset(cfg)
test_dataloader = DataLoader(test_dataset, batch_size=1)

model, compression_model = accelerator.prepare(model, compression_model)
model_path = os.path.join("./output_dir_total22/19.pth")
model.load_state_dict(torch.load(model_path))

model.eval()
compression_model.eval()
if accelerator.is_main_process:
    unwrapped_model = accelerator.unwrap_model(model)
    unwrapped_vae = accelerator.unwrap_model(compression_model)
    audio_num = 1
    for test_step, batch in tqdm(enumerate(test_dataloader)):
        gen_tokens, gen_audio = unwrapped_model.inference(batch, unwrapped_vae)
        prompt = batch[0]
        print(prompt)
        audio_filename = f"{prompt}_{audio_num}.wav"
        unwrapped_model.save_audio(gen_audio, audio_filename, cfg)
        from IPython.display import Audio
        # display(Audio(data=gen_audio[0].detach().cpu().numpy(), rate=cfg.sample_rate))
        AudioSignal(gen_audio[0].detach().cpu().numpy(), sample_rate=cfg.sample_rate).widget()
        audio_num += 1
        if audio_num > 5:
            audio_num = 1

In [None]:
from audiotools import AudioSignal
torch.cuda.empty_cache()

cfg = Config()
cfg.update(**{"prompts": [p for p in prompts for _ in range(3)]})

accelerator = Accelerator(gradient_accumulation_steps=cfg.gradient_accumulation_steps)
save_path = "./test_mix"
make_dir(save_path)
cfg.update(**{"save_path": save_path})

compression_model, lm = build_model(cfg)
model = AudioProcessing(cfg, lm)

test_dataset = TestDataset(cfg)
test_dataloader = DataLoader(test_dataset, batch_size=1)

model, compression_model = accelerator.prepare(model, compression_model)
model_path = os.path.join("./compare/base_19.pth")
model.load_state_dict(torch.load(model_path))

model.eval()
compression_model.eval()
if accelerator.is_main_process:
    unwrapped_model = accelerator.unwrap_model(model)
    unwrapped_vae = accelerator.unwrap_model(compression_model)
    audio_num = 1
    for test_step, batch in tqdm(enumerate(test_dataloader)):
        gen_tokens, gen_audio = unwrapped_model.inference(batch, unwrapped_vae)
        prompt = batch[0]
        print(prompt)
        audio_filename = f"{prompt}_{audio_num}.wav"
        unwrapped_model.save_audio(gen_audio, audio_filename, cfg)
        from IPython.display import Audio
        # display(Audio(data=gen_audio[0].detach().cpu().numpy(), rate=cfg.sample_rate))
        AudioSignal(gen_audio[0].detach().cpu().numpy(), sample_rate=cfg.sample_rate).widget()
        audio_num += 1
        if audio_num > 5:
            audio_num = 1

In [None]:
import torch

del model
del compression_model

torch.cuda.empty_cache()

In [None]:
from audiotools import AudioSignal

cfg = Config()
cfg.update(**{"prompts": [p for p in prompts for _ in range(3)]})

accelerator = Accelerator(gradient_accumulation_steps=cfg.gradient_accumulation_steps)
save_path = "./test_concat"
make_dir(save_path)
cfg.update(**{"save_path": save_path})

compression_model, lm = build_model(cfg)
model = AudioProcessing(cfg, lm)

test_dataset = TestDataset(cfg)
test_dataloader = DataLoader(test_dataset, batch_size=1)

model, compression_model = accelerator.prepare(model, compression_model)
model_path = os.path.join("./compare/concat_20.pth")
model.load_state_dict(torch.load(model_path))

model.eval()
compression_model.eval()
if accelerator.is_main_process:
    unwrapped_model = accelerator.unwrap_model(model)
    unwrapped_vae = accelerator.unwrap_model(compression_model)
    audio_num = 1
    for test_step, batch in tqdm(enumerate(test_dataloader)):
        gen_tokens, gen_audio = unwrapped_model.inference(batch, unwrapped_vae)
        prompt = batch[0]
        print(prompt)
        audio_filename = f"{prompt}_{audio_num}.wav"
        unwrapped_model.save_audio(gen_audio, audio_filename, cfg)
        from IPython.display import Audio
        # display(Audio(data=gen_audio[0].detach().cpu().numpy(), rate=cfg.sample_rate))
        AudioSignal(gen_audio[0].detach().cpu().numpy(), sample_rate=cfg.sample_rate).widget()
        audio_num += 1
        if audio_num > 5:
            audio_num = 1