# Setup

In [None]:
# Ubuntu environment only
! apt install libasound2-dev portaudio19-dev

In [None]:
!git clone https://github.com/facebookresearch/audiocraft.git
%cd audiocraft
!pip install -e .

!pip install dora-search numba
!pip install git+https://github.com/tnadav/prompt-synth.git#subdirectory=audiomanip
# Important to avoid version conflict
!pip install torchvision==0.16

In [None]:
from google.colab import drive

drive.mount("/content/drive/")

# Note Generation

In [None]:
import os

import librosa
import matplotlib.pyplot as plt
import numpy as np
import soundfile as sf
from IPython.display import Audio, Markdown, display


def plot_spectrogram(y, sr, title):
    plt.figure(figsize=(10, 4))
    spec = librosa.feature.melspectrogram(y=y, sr=sr)
    spec_db = librosa.power_to_db(spec, ref=np.max)
    librosa.display.specshow(spec_db, sr=sr, x_axis="time", y_axis="mel")
    plt.colorbar(format="%+2.0f dB")
    plt.title(f"{title} - Mel-frequency spectrogram")
    plt.xlabel("Time (s)")
    plt.ylabel("Frequency (Hz)")
    plt.show()


def display_audio_widget(y, sr):
    display(Audio(y, rate=sr, autoplay=False))


def display_label(title):
    display(Markdown(f"### {title}"))


def display_sample(y, sr, title):
    display_label(title)
    plot_spectrogram(y, sr, title)
    display_audio_widget(y, sr)


def display_note_gen(note_gen, title, target_file=None):
    y = note_gen.gen_note(
        440.0,
        2,
    )
    display_sample(y, note_gen.sample_rate, title)
    if target_file is not None:
        sf.write(target_file, y, note_gen.sample_rate)
        print(f"{title}: Audio saved to: {target_file}")

In [None]:
AUDIO_SAMPLES_EXPORT_DIR = "/content/drive/MyDrive/prompt-synth/samples"
os.makedirs(AUDIO_SAMPLES_EXPORT_DIR, exist_ok=True)

## Single note evaluation

### Reference

In [None]:
from audiomanip import NSynthDataset, SampleNoteGenerator, make_osc

nsynth_data = NSynthDataset(
    os.path.join("/content/drive/MyDrive/prompt-synth/nsynth-test")
)
note = list(nsynth_data)[0]
sample, sample_rate = librosa.load(note.wav_path)

reference_note_genrators = [
    ("Sine Wave", make_osc("sine")),
    ("Sawtooth Wave", make_osc("sawtooth")),
    ("Square Wave", make_osc("square")),
    ("Triangle Wave", make_osc("triangle")),
    (note.name, SampleNoteGenerator.from_sample(sample, note.base_freq, sample_rate)),
]

In [None]:
export_reference = True
output_dir = os.path.join(AUDIO_SAMPLES_EXPORT_DIR, "reference")

if export_reference:
    os.makedirs(output_dir, exist_ok=True)

for title, note_gen in reference_note_genrators:
    target_file = os.path.join(output_dir, f"{title}.wav") if export_reference else None
    display_note_gen(note_gen, title, target_file=target_file)

### AudioCraft

In [None]:
import os

from audiomanip import ModelType, make_audiocraft_note_generator

models = [
    (ModelType.MAGNeT, "facebook/magnet-small-10secs"),
    (ModelType.MAGNeT, "magnet-nsynth-full-fixed-ext-5-epochs-8ddef1d4"),
    (ModelType.MAGNeT, "magnet-nsynth-full-fixed-ext-20-epochs-d0d4466c"),
    (ModelType.MusicGen, "facebook/musicgen-small"),
    (ModelType.MusicGen, "musicgen-nsynth-full-fixed-ext-5-epochs-2cc84dbe"),
    (ModelType.MusicGen, "musicgen-nsynth-full-fixed-ext-20-epochs-965a196a"),
]

prompts = [
    "flute",
    "dark flute",
    "bright flute",
    "keyboard flute with reverb",
    "flute keyboard with reverb",
    "keyboard with distortion",
]


def get_audiocraft_note_generators():
    for model_type, model_name in models:
        path = (
            model_name
            if model_name.startswith("facebook")
            else os.path.join("/content/drive/MyDrive/prompt-synth/exports", model_name)
        )
        model_note_gen = make_audiocraft_note_generator(
            model_type,
            path,
        )

        for prompt in prompts:
            note_gen = model_note_gen.from_prompt(prompt)
            yield f"{model_name} - {prompt}", note_gen

In [None]:
output_dir = os.path.join(AUDIO_SAMPLES_EXPORT_DIR, "audiocraft")
os.makedirs(output_dir, exist_ok=True)

for title, note_gen in get_audiocraft_note_generators():
    title = title.replace("/", "_")
    target_file = os.path.join(output_dir, f"{title}.wav")
    display_note_gen(note_gen, title, target_file=target_file)

# MIDI to Audio

## Reference

In [None]:
import glob

from audiomanip.midi2audio import midi2audio

midi_files = glob.glob("/content/drive/MyDrive/prompt-synth/midi/*.mid")

note_gen = make_osc("triangle")
with open(midi_files[2], "rb") as f:
    audio = midi2audio(f, note_gen)

display_audio_widget(audio, note_gen.sample_rate)

## AudioCraft

In [None]:
model_note_gen = make_audiocraft_note_generator(
    ModelType.MAGNeT,
    os.path.join(
        "/content/drive/MyDrive/prompt-synth/exports",
        "musicgen-nsynth-full-fixed-ext-20-epochs-965a196a",
    ),
)

with open(midi_files[2], "rb") as f:
    audio = midi2audio(f, note_gen)

display_audio_widget(audio, note_gen.sample_rate)