<font color=DeepSkyBlue size=7> 🎶 **Music Generation** ➕ **- *AudioCraft***</font>

In [ ]:
#@title # <font color="lime">**1.**</font> Install requirements. 📥
#@markdown - Modules
#@markdown   - [**`AudioCraft`**](https://github.com/facebookresearch/audiocraft) - Audio generation model 🎶

from tensorflow import config
GPU = config.list_physical_devices("GPU")
GPU = GPU[0].device_type == "GPU" if GPU else 0

if not GPU:
	raise SystemExit("GPU unavailable!\nThe continuation is pointless.")

#-=-=-=-#

print('Installing dependencies for "AudioCraft"...')
!pip install -U git+https://github.com/facebookresearch/audiocraft#egg=audiocraft --quiet --exists-action i | grep -v "already"

import torch, torchaudio
from audiocraft.models import musicgen
from audiocraft.utils.notebook import display_audio

In [ ]:
#@title # <font color=gold>**2.**</font> Configure. ⚙️
Model_Name = "Medium" #@param ["Small", "Medium", "Melody", "Large"]
Model_Name = Model_Name.lower()

#@markdown Notices:
#@markdown - ### ⛔ <font color="red">**Can't be less than `2` prompts</font>. (RAM failure)**
#@markdown - ### 🙊 **Unable to generate realistic vocals**.
#@markdown - ### 🇬🇧 Has been trained with **English descriptions <font color=red>and will not</font>** perform as well in other languages.
#@markdown - ### 🎶 <font color=gold>Does not perform **equally well for all music styles and cultures**</font>.
#@markdown - 📉 <font color=gold>**Sometimes generates end of songs, collapsing to silence**</font>.
#@markdown - 🤷‍♂️ It is sometimes difficult to assess what types of text descriptions provide the best generations.<br>Prompt engineering may be required to obtain satisfying results.

if not "musicgen" in locals():
	raise SystemExit("Requirements were not installed!\nPlease run the first cell.")

Model = musicgen.MusicGen.get_pretrained(Model_Name, device = "cuda")

# <font color="lime">**3.**</font> Generate. 🎶
### ℹ️ For good quality results, **provide as much details as possible.**

## Text to audio 📝

In [ ]:
#@markdown ## Audio Prompts 📝
#@markdown Can be empty.
Description_1 = "Crazy EDM, heavy bang" #@param {"type": "string"}
Description_2 = "Classic Reggae track with an electronic guitar solo" #@param {"type": "string"}
Description_3 = "LoFi electro chill with organic samples" #@param {"type": "string"}
Description_4 = "Rock with saturated guitars, a heavy bass line and crazy drum break and fills" #@param {"type": "string"}
Description_5 = "Earthy tones, environmentally conscious, ukulele-infused, harmonic, breezy, easygoing, organic instrumentation, gentle grooves" #@param {"type": "string"}
Remove_Duplicated_Descriptions = False #@param {"type": "boolean"}

#@markdown <br>

#@markdown ## Generation parameters
#@markdown #### <font color=red>**For developers only!**</font>
Use_ArgMax_Decoding = False #@param {type: "boolean"}
Top_P = 250 #@param {type: "slider", min: 100, max: 500.0, step: 1}
Top_K = 0.0 #@param {type: "slider", min: 0.0, max: 10.0, step: 0.1}
SoftMax_Temperature = 1.0 #@param {type: "slider", min: 1.0, max: 10.0, step: 0.1}
CFG_Coefficient = 3.0 #@param {type: "slider", min: 1.0, max: 10.0, step: 0.1}
Use_2_CFG = False #@param {type: "boolean"}

#@markdown <br>

#@markdown ## Music related settings
#@markdown Experimental, do not rely.
BPM = 135 #@param {type: "slider", min: 60, max: 170, step: 1}
BPM = f"{BPM} BPM", f"{BPM} Beats Per Minute"
Time_Signature = "4/4" #@param ["3/4", "3/8", "4/4", "6/8"]
Time_Signature = f"{Time_Signature} Time Signature"
Duration_Seconds = 25 #@param {type: "slider", min: 1.0, max: 60.0, step: 0.1}
Target_Key_Note = "None" #@param ["C", "C♯ (D♭)", "D", "D♯ (E♭)", "E", "F", "F♯ (G♭)", "G", "G♯ (A♭)", "A", "A♯ (B♭)", "B", "None"]
Target_Key_Mode = "Major" #@param ["Major", "Minor", "Dorian", "Phyrgian", "Mixolydian"]
if Target_Key_Note == "None": Target_Key_Note = ""
Target_Key = "in {0} {1} key scale".format(Target_Key_Note, Target_Key_Mode)

#-=-=-=-#

if not "Model" in locals():
	raise SystemExit("Model was not downloaded!\nPlease run the previous cell.")

Descriptions_ = Description_1, Description_2, Description_3, Description_4, Description_5
Descriptions = [", ".join((Prompt.strip(), *BPM, Time_Signature, Target_Key)) for Prompt in Descriptions_ if Prompt]

if Remove_Duplicated_Descriptions:
	Descriptions = list(set(Descriptions))
if len(Descriptions) < 2:
	raise SystemExit("Amount of prompts have to be > 1.")

Model.set_generation_params(
	0 if Use_ArgMax_Decoding else 1,
	Top_K, Top_P, SoftMax_Temperature,
	Duration_Seconds, CFG_Coefficient, Use_2_CFG
)

#-=-=-=-#

print("Final prompts:")
print("\t" + "\n\t".join(Descriptions))
print("\n" + "━" * 64 + "\n")

try:
	Result = Model.generate(Descriptions, progress = 1)
	display_audio(Result, 32E3)
except KeyboardInterrupt:
	raise SystemExit("Cancelled.")