Release 15.02.2024

kubinka0505 · Jan 2, 2024 · 136e196 · 136e196
commit 136e196
Show file tree

Hide file tree

Showing 24 changed files with 8,053 additions and 0 deletions.
diff --git a/Documents/Pictures/Google_Colab.svg b/Documents/Pictures/Google_Colab.svg
diff --git a/License.txt b/License.txt
diff --git a/Notebooks/AI/Audio/Generate/AudioCraft.ipynb b/Notebooks/AI/Audio/Generate/AudioCraft.ipynb
@@ -0,0 +1,178 @@
+{
+	"nbformat": 4,
+	"nbformat_minor": 0,
+	"metadata": {
+		"colab": {
+			"name": "Music Generation - AudioCraft",
+			"collapsed_sections": [
+				"pcKlg7MAdaBl"
+			],
+			"provenance": []
+		},
+		"kernelspec": {
+			"name": "python3",
+			"display_name": "Python 3"
+		},
+		"language_info": {
+			"name": "python"
+		},
+		"accelerator": "GPU",
+		"gpuClass": "standard"
+	},
+	"cells": [
+		{
+			"cell_type": "markdown",
+			"source": [
+				"<font color=DeepSkyBlue size=7> 🎶 **Music Generation** ➕ **- *AudioCraft***</font>"
+			],
+			"metadata": {
+				"id": "SV_zN6b_8UK9"
+			}
+		},
+		{
+			"cell_type": "code",
+			"metadata": {
+				"cellView": "form",
+				"id": "WO1EH3sEbcdM"
+			},
+			"source": [
+				"#@title # <font color=\"lime\">**1.**</font> Install requirements. 📥\n",
+				"#@markdown - Modules\n",
+				"#@markdown   - [**`AudioCraft`**](https://github.com/facebookresearch/audiocraft) - Audio generation model 🎶\n",
+				"\n",
+				"from tensorflow import config\n",
+				"GPU = config.list_physical_devices(\"GPU\")\n",
+				"GPU = GPU[0].device_type == \"GPU\" if GPU else 0\n",
+				"\n",
+				"if not GPU:\n",
+				"\traise SystemExit(\"GPU unavailable!\\nThe continuation is pointless.\")\n",
+				"\n",
+				"#-=-=-=-#\n",
+				"\n",
+				"print('Installing dependencies for \"AudioCraft\"...')\n",
+				"!pip install -U git+https://github.com/facebookresearch/audiocraft#egg=audiocraft --quiet --exists-action i | grep -v \"already\"\n",
+				"\n",
+				"import torch, torchaudio\n",
+				"from audiocraft.models import musicgen\n",
+				"from audiocraft.utils.notebook import display_audio"
+			]
+		},
+		{
+			"cell_type": "code",
+			"source": [
+				"#@title # <font color=gold>**2.**</font> Configure. ⚙️\n",
+				"Model_Name = \"Medium\" #@param [\"Small\", \"Medium\", \"Melody\", \"Large\"]\n",
+				"Model_Name = Model_Name.lower()\n",
+				"\n",
+				"#@markdown Notices:\n",
+				"#@markdown - ### ⛔ <font color=\"red\">**Can't be less than `2` prompts</font>. (RAM failure)**\n",
+				"#@markdown - ### 🙊 **Unable to generate realistic vocals**.\n",
+				"#@markdown - ### 🇬🇧 Has been trained with **English descriptions <font color=red>and will not</font>** perform as well in other languages.\n",
+				"#@markdown - ### 🎶 <font color=gold>Does not perform **equally well for all music styles and cultures**</font>.\n",
+				"#@markdown - 📉 <font color=gold>**Sometimes generates end of songs, collapsing to silence**</font>.\n",
+				"#@markdown - 🤷‍♂️ It is sometimes difficult to assess what types of text descriptions provide the best generations.<br>Prompt engineering may be required to obtain satisfying results.\n",
+				"\n",
+				"if not \"musicgen\" in locals():\n",
+				"\traise SystemExit(\"Requirements were not installed!\\nPlease run the first cell.\")\n",
+				"\n",
+				"Model = musicgen.MusicGen.get_pretrained(Model_Name, device = \"cuda\")"
+			],
+			"metadata": {
+				"cellView": "form",
+				"id": "jY8jPyEncCWs"
+			}
+		},
+		{
+			"cell_type": "markdown",
+			"source": [
+				"# <font color=\"lime\">**3.**</font> Generate. 🎶\n",
+				"### ℹ️ For good quality results, **provide as much details as possible.**"
+			],
+			"metadata": {
+				"id": "pcKlg7MAdaBl"
+			}
+		},
+		{
+			"cell_type": "markdown",
+			"source": [
+				"## Text to audio 📝"
+			],
+			"metadata": {
+				"id": "Qh52G6fBdlCz"
+			}
+		},
+		{
+			"cell_type": "code",
+			"source": [
+				"#@markdown ## Audio Prompts 📝\n",
+				"#@markdown Can be empty.\n",
+				"Description_1 = \"Crazy EDM, heavy bang\" #@param {\"type\": \"string\"}\n",
+				"Description_2 = \"Classic Reggae track with an electronic guitar solo\" #@param {\"type\": \"string\"}\n",
+				"Description_3 = \"LoFi electro chill with organic samples\" #@param {\"type\": \"string\"}\n",
+				"Description_4 = \"Rock with saturated guitars, a heavy bass line and crazy drum break and fills\" #@param {\"type\": \"string\"}\n",
+				"Description_5 = \"Earthy tones, environmentally conscious, ukulele-infused, harmonic, breezy, easygoing, organic instrumentation, gentle grooves\" #@param {\"type\": \"string\"}\n",
+				"Remove_Duplicated_Descriptions = False #@param {\"type\": \"boolean\"}\n",
+				"\n",
+				"#@markdown <br>\n",
+				"\n",
+				"#@markdown ## Generation parameters\n",
+				"#@markdown #### <font color=red>**For developers only!**</font>\n",
+				"Use_ArgMax_Decoding = False #@param {type: \"boolean\"}\n",
+				"Top_P = 250 #@param {type: \"slider\", min: 100, max: 500.0, step: 1}\n",
+				"Top_K = 0.0 #@param {type: \"slider\", min: 0.0, max: 10.0, step: 0.1}\n",
+				"SoftMax_Temperature = 1.0 #@param {type: \"slider\", min: 1.0, max: 10.0, step: 0.1}\n",
+				"CFG_Coefficient = 3.0 #@param {type: \"slider\", min: 1.0, max: 10.0, step: 0.1}\n",
+				"Use_2_CFG = False #@param {type: \"boolean\"}\n",
+				"\n",
+				"#@markdown <br>\n",
+				"\n",
+				"#@markdown ## Music related settings\n",
+				"#@markdown Experimental, do not rely.\n",
+				"BPM = 135 #@param {type: \"slider\", min: 60, max: 170, step: 1}\n",
+				"BPM = f\"{BPM} BPM\", f\"{BPM} Beats Per Minute\"\n",
+				"Time_Signature = \"4/4\" #@param [\"3/4\", \"3/8\", \"4/4\", \"6/8\"]\n",
+				"Time_Signature = f\"{Time_Signature} Time Signature\"\n",
+				"Duration_Seconds = 25 #@param {type: \"slider\", min: 1.0, max: 60.0, step: 0.1}\n",
+				"Target_Key_Note = \"None\" #@param [\"C\", \"C♯ (D♭)\", \"D\", \"D♯ (E♭)\", \"E\", \"F\", \"F♯ (G♭)\", \"G\", \"G♯ (A♭)\", \"A\", \"A♯ (B♭)\", \"B\", \"None\"]\n",
+				"Target_Key_Mode = \"Major\" #@param [\"Major\", \"Minor\", \"Dorian\", \"Phyrgian\", \"Mixolydian\"]\n",
+				"if Target_Key_Note == \"None\": Target_Key_Note = \"\"\n",
+				"Target_Key = \"in {0} {1} key scale\".format(Target_Key_Note, Target_Key_Mode)\n",
+				"\n",
+				"#-=-=-=-#\n",
+				"\n",
+				"if not \"Model\" in locals():\n",
+				"\traise SystemExit(\"Model was not downloaded!\\nPlease run the previous cell.\")\n",
+				"\n",
+				"Descriptions_ = Description_1, Description_2, Description_3, Description_4, Description_5\n",
+				"Descriptions = [\", \".join((Prompt.strip(), *BPM, Time_Signature, Target_Key)) for Prompt in Descriptions_ if Prompt]\n",
+				"\n",
+				"if Remove_Duplicated_Descriptions:\n",
+				"\tDescriptions = list(set(Descriptions))\n",
+				"if len(Descriptions) < 2:\n",
+				"\traise SystemExit(\"Amount of prompts have to be > 1.\")\n",
+				"\n",
+				"Model.set_generation_params(\n",
+				"\t0 if Use_ArgMax_Decoding else 1,\n",
+				"\tTop_K, Top_P, SoftMax_Temperature,\n",
+				"\tDuration_Seconds, CFG_Coefficient, Use_2_CFG\n",
+				")\n",
+				"\n",
+				"#-=-=-=-#\n",
+				"\n",
+				"print(\"Final prompts:\")\n",
+				"print(\"\\t\" + \"\\n\\t\".join(Descriptions))\n",
+				"print(\"\\n\" + \"━\" * 64 + \"\\n\")\n",
+				"\n",
+				"try:\n",
+				"\tResult = Model.generate(Descriptions, progress = 1)\n",
+				"\tdisplay_audio(Result, 32E3)\n",
+				"except KeyboardInterrupt:\n",
+				"\traise SystemExit(\"Cancelled.\")"
+			],
+			"metadata": {
+				"cellView": "form",
+				"id": "r19GIsZNdmO0"
+			}
+		}
+	]
+}