# Audio Generation from Descriptions

This Python script utilizes the `audiocraft` library to generate audio samples based on textual descriptions using the `facebook/audiogen-medium`(a 1.5B model for text to sound) model. The prompt texts are read from the `descriptions.txt` file, and the generated audio samples will be saved as WAV files in the 'output' folder.This audio generation script has been successfully tested on the NVIDIA Jetson AGX Orin 64GB Developer Kit.

If you encounter out-of-memory errors, consider reducing the `chunk_size` to decrease the amount of GPU memory required in each iteration. The purge function in the provided code is responsible for freeing up GPU resources.

In [None]:
import torchaudio
from audiocraft.models import AudioGen
from audiocraft.data.audio import audio_write
import os
import torch
import gc

def purge():
    torch.cuda.empty_cache()
    torch.cuda.ipc_collect()
    gc.collect()

# Load descriptions from a text file
descriptions_file = 'descriptions.txt'  # Update with your file name
with open(descriptions_file, 'r') as file:
    descriptions = file.read().splitlines()

# Initialize the AudioGen model
model = AudioGen.get_pretrained('facebook/audiogen-medium')
model.set_generation_params(duration=2)  # generate 2 seconds.

# Create an output folder if it doesn't exist
output_folder = 'output'
os.makedirs(output_folder, exist_ok=True)

# Generate and save audio samples in chunks of 50 descriptions
chunk_size = 50
num_chunks = (len(descriptions) + chunk_size - 1) // chunk_size

for chunk_idx in range(num_chunks):
    start_idx = chunk_idx * chunk_size
    end_idx = (chunk_idx + 1) * chunk_size
    current_descriptions = descriptions[start_idx:end_idx]

    # Generate audio samples based on descriptions
    wav = model.generate(current_descriptions, progress=True)

    # Save generated audio samples to the output folder
    for idx, one_wav in enumerate(wav):
        # Save each sample as a WAV file in the output folder
        output_file_path = os.path.join(output_folder, f'{start_idx + idx}')
        audio_write(output_file_path, one_wav.cpu(), model.sample_rate, strategy="loudness", loudness_compressor=True)

    # Purge memory after processing each chunk
    purge()

print(f'Audio samples saved in the "{output_folder}" folder.')

# Audio Generation from Descriptions

MusicGen comes with four pretrained models:
- facebook/musicgen-small: A 300M model, for text to music generation only.
- facebook/musicgen-medium: A 1.5B model, for text to music generation only.
- facebook/musicgen-melody: A 1.5B model, for text to music and text+melody to music.
- facebook/musicgen-large: A 3.3B model, for text to music generation only.

In [None]:
import torchaudio
from audiocraft.models import MusicGen
from audiocraft.data.audio import audio_write
import os
import torch
import gc

def purge():
    torch.cuda.empty_cache()
    torch.cuda.ipc_collect()
    gc.collect()

# Load descriptions from a text file
descriptions_file = 'descriptions.txt'  # Update with your file name
with open(descriptions_file, 'r') as file:
    descriptions = file.read().splitlines()

# Initialize the AudioGen model
model = MusicGen.get_pretrained('facebook/musicgen-small')
model.set_generation_params(duration=2)  # generate 2 seconds of music.
#wav = model.generate_unconditional(4)  

# Create an output folder if it doesn't exist
output_folder = 'output'
os.makedirs(output_folder, exist_ok=True)

# Generate and save audio samples in chunks of 50 descriptions
chunk_size = 50
num_chunks = (len(descriptions) + chunk_size - 1) // chunk_size

for chunk_idx in range(num_chunks):
    start_idx = chunk_idx * chunk_size
    end_idx = (chunk_idx + 1) * chunk_size
    current_descriptions = descriptions[start_idx:end_idx]

    # Generate audio samples based on descriptions
    wav =model.generate(current_descriptions, progress=True)

    # Save generated audio samples to the output folder
    for idx, one_wav in enumerate(wav):
        # Save each sample as a WAV file in the output folder
        output_file_path = os.path.join(output_folder, f'{start_idx + idx}')
        audio_write(output_file_path, one_wav.cpu(), model.sample_rate, strategy="loudness", loudness_compressor=True)

    # Purge memory after processing each chunk
    purge()

print(f'Audio samples saved in the "{output_folder}" folder.')