```
Generative AI with Python, by Fernando Amaral
```

# Setup

In [1]:
#gpu
!pip install diffusers transformers accelerate safetensors

Collecting diffusers
  Downloading diffusers-0.30.3-py3-none-any.whl.metadata (18 kB)
Downloading diffusers-0.30.3-py3-none-any.whl (2.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.7/2.7 MB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: diffusers
Successfully installed diffusers-0.30.3


In [2]:
from diffusers import AudioLDM2Pipeline
import torch
import scipy
from google.colab import files

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

In [3]:
def download_file(file_path):
    files.download(file_path)

# Code

In [4]:
def generate_audio_from_prompt(model_path, prompt, num_steps, audio_length, num_waveforms, save_path, rate=32000):
    pipeline = AudioLDM2Pipeline.from_pretrained(
        pretrained_model_name_or_path=model_path,
        torch_dtype=torch.float16
    ).to("cuda")

    audios = pipeline(
        prompt=prompt,
        num_inference_steps=num_steps,
        audio_length_in_s=audio_length,
        num_waveforms_per_prompt=num_waveforms
    ).audios

    scipy.io.wavfile.write(save_path, rate=rate, data=audios[0])


# Execution

In [5]:
pretrained_model_name_or_path = "cvssp/audioldm2-large"
prompt = "Audio simulation of a busy and huge city, with guns shots and cars "
save_path="city.wav"

num_inference_steps = 200
audio_length_in_s = 10.0
num_waveforms_per_prompt = 3

generate_audio_from_prompt(
    pretrained_model_name_or_path,
    prompt,
    num_inference_steps,
    audio_length_in_s,
    num_waveforms_per_prompt,
    save_path=save_path
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model_index.json:   0%|          | 0.00/805 [00:00<?, ?B/s]

Fetching 26 files:   0%|          | 0/26 [00:00<?, ?it/s]

language_model/config.json:   0%|          | 0.00/902 [00:00<?, ?B/s]

scheduler/scheduler_config.json:   0%|          | 0.00/507 [00:00<?, ?B/s]

projection_model/config.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

(…)ature_extractor/preprocessor_config.json:   0%|          | 0.00/541 [00:00<?, ?B/s]

text_encoder/config.json:   0%|          | 0.00/801 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/498M [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/4.74M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/776M [00:00<?, ?B/s]

text_encoder_2/config.json:   0%|          | 0.00/766 [00:00<?, ?B/s]

tokenizer/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer/special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

tokenizer/tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.36G [00:00<?, ?B/s]

tokenizer/tokenizer_config.json:   0%|          | 0.00/494 [00:00<?, ?B/s]

tokenizer/vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

tokenizer_2/special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

tokenizer_2/tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer_2/tokenizer_config.json:   0%|          | 0.00/2.35k [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/2.87G [00:00<?, ?B/s]

unet/config.json:   0%|          | 0.00/1.56k [00:00<?, ?B/s]

vae/config.json:   0%|          | 0.00/558 [00:00<?, ?B/s]

vocoder/config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/222M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/221M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

In [7]:
download_file(save_path)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>