In [None]:
!python3 -m pip install -U git+https://github.com/facebookresearch/audiocraft

from audiocraft.models import musicgen
from audiocraft.utils.notebook import display_audio
import torch
import torchaudio

In [None]:
model = musicgen.MusicGen.get_pretrained('large', device='cuda')

In [37]:
# PROMPT ARCHIVE
pa=[
    'lofi, lofi hip hop, high quality, chill, otherworldy, enchanted forest, soft drums, relaxing, late night'
    'lofi, lofi hip hop, high quality, chill, trippy sample, japanese city pop sample, soft drums, relaxing, late night'
    ]

In [51]:
# USER PARAMETERS
# Run this block everytime you update a parameter

prompt = ['lofi, lofi hip hop, high quality, chill, jazz fusion sample, japanese city pop sample, soft drums, relaxing, late night'] # Used for both types of generation
gen_duration = 30 # Seconds
filepath = '/notebooks/output/'

# For continuation:
prompt_audio = '/notebooks/output/drained ext3.wav'
num_generations = 1 # Number of times to generate audio when using continuation
prompt_duration = 20 # Select how much of the audio is used as context each cycle

prompt_waveform, prompt_sr = torchaudio.load(prompt_audio)
output_duration = gen_duration - prompt_duration

# Other model parameters:
user_temp = 0.90

model.set_generation_params(
    use_sampling=True,
    temperature=user_temp,
    top_k=250,
    duration=gen_duration
)

print('Success')

Success


In [54]:
# GENERATE SINGLE OUTPUT FROM TEXT

single_output = model.generate(descriptions=prompt, progress=True)
display_audio(single_output, sample_rate=32000)

  1503 /   1500

In [48]:
# SAVE SINGLE OUTPUT

# Name your audio file:
output_name = input('Song name?:')

output_cpu = single_output.squeeze().unsqueeze(0).cpu()
torchaudio.save(f"{filepath}{output_name}.wav", output_cpu, 32000)
print(f"Saved to: {filepath}{output_name}.wav")

Saved to: /notebooks/output/cool sample.wav


In [None]:
# Trim Audio (optional)
length = 41

length_samples = int(length * prompt_sr)
prompt_waveform = prompt_waveform[..., :length_samples]
print(f"Audio trimmed to {length} seconds")

In [None]:
# GENERATE CONTINUOUS AUDIO OF ANY LENGTH

output_waveform = prompt_waveform

for _ in range(num_generations):
    context_waveform = output_waveform[..., -int(prompt_duration * prompt_sr):]  # Trim context
    output = model.generate_continuation(context_waveform, prompt_sample_rate=prompt_sr, descriptions=prompt, progress=True)
    output = output.squeeze().unsqueeze(0).cpu() # Make 3D output tensor 2D
    output = output[..., -int(output_duration * prompt_sr):] # Select only the newly generated audio
    output_waveform = torch.cat([output_waveform, output], dim=1)  # Append generated output to the context
    print(f"Cycle {_+1}/{num_generations} completed")

display_audio(output_waveform, sample_rate=32000)

In [None]:
# SAVE CONTINUOUS OUTPUT

# Name your audio file
name = input('Song name?:')

torchaudio.save(f"{filepath}{name}.wav", output_waveform, 32000)
print(f"Saved to: {filepath}{name}.wav")

In [None]:
# GENERATE AUDIO ***BEFORE*** THE PROMPT AUDIO

reversed_waveform = prompt_waveform.flip(dims=(-1,))

for _ in range(num_generations):
    context_waveform = reversed_waveform[..., -int(prompt_duration * prompt_sr):]
    output = model.generate_continuation(context_waveform, prompt_sample_rate=prompt_sr, descriptions=prompt, progress=True)
    output = output.squeeze().unsqueeze(0).cpu() # Make 3D output tensor 2D
    output = output[..., -int(output_duration * prompt_sr):] # Select only the newly generated audio
    reversed_waveform = torch.cat([reversed_waveform, output], dim=1)  # Append generated output to the context
    print(f"Cycle {_+1}/{num_generations} completed")

output_waveform = reversed_waveform.flip(dims=(-1,))
display_audio(output_waveform, sample_rate=32000)


In [None]:
# SAVE CONTINUOUS OUTPUT

# Name your audio file
name = input('Song name?:')

torchaudio.save(f"{filepath}{name}.wav", output_waveform, 32000)
print(f"Saved to: {filepath}{name}.wav")