# Setup

In [None]:
%reload_ext autoreload
%autoreload 2

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount = True)

Mounted at /content/gdrive


In [None]:
SYS_PROJECT_DIR  = '/content/gdrive/MyDrive/Colab Notebooks/musicgen'
SYS_INPUT_DIR    = '/content/gdrive/MyDrive/Colab Notebooks/musicgen/input'
SYS_DP_DIR       = '/content/gdrive/MyDrive/Colab Notebooks/musicgen/dp'
SYS_MODELING_DIR = '/content/gdrive/MyDrive/Colab Notebooks/musicgen/modeling'
SYS_OUTPUT_DIR   = '/content/gdrive/MyDrive/Colab Notebooks/musicgen/output'

In [None]:
import os
import pandas as pd
import shutil

pd.set_option('display.max_columns', None)  # This ensures that all columns are displayed
pd.set_option('display.width', None)        # This ensures that each line of output will be as wide as necessary
pd.set_option('display.max_colwidth', None) # This ensures that columns will not be truncated

In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

# Installs

In [None]:
!pip install -q miditoolkit
!pip install -q music21

!apt-get install fluidsynth | grep -E "error|warning"
!apt-get install libfluidsynth3 | grep -E "error|warning"
!apt-get install fluid-soundfont-gm | grep -E "error|warning"
!pip install -q pyfluidsynth

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/54.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.6/54.6 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.0/53.0 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
Extracting templates from packages: 100%
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for pyfluidsynth (setup.py) ... [?25l[?25hdone


In [None]:
!pip install -q pandas
!pip install -q pydantic
!pip install -q pydantic_argparse

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
!pip install -q transformers==4.38.2
!pip install -q datasets==2.18.0
!pip install -q accelerate==0.28.0

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m290.1/290.1 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
!pip install bokeh==2.4.3
!pip install note_seq
!pip install gradio==3.48.0

In [None]:
import sys
import os

musicgen_utils_path = os.path.join(SYS_PROJECT_DIR)
if musicgen_utils_path not in sys.path:
    sys.path.append(musicgen_utils_path)

# Deployment - Generate song

## Generate

In [None]:
from typing import List, Tuple
from matplotlib.figure import Figure
from numpy import ndarray
import note_seq
from musicgen.generation.string_to_notes import token_sequence_to_note_sequence
from musicgen.generation.constants import GM_INSTRUMENTS, SAMPLE_RATE
from musicgen.generation.model import get_model_and_tokenizer
import gradio as gr

In [None]:
from pathlib import Path
model_path = Path(SYS_MODELING_DIR, "model")
model, tokenizer = get_model_and_tokenizer(model_path)

In [None]:
# ### $$$ Visualize model structure
# model

In [None]:
def create_seed_string(genre: str = "OTHER") -> str:
    """
    Creates a seed string for generating a new piece.
    Args:
        genre (str, optional): The genre of the piece. Defaults to "OTHER".
    Returns:
        str: The seed string.
    """
    if genre == "RANDOM":
        seed_string = "PIECE_START"
    else:
        seed_string = f"PIECE_START GENRE={genre} TRACK_START"
    return seed_string

In [None]:
def get_instruments(text_sequence: str) -> List[str]:
    """
    Extracts the list of instruments from a text sequence.
    Args:
        text_sequence (str): The text sequence.
    Returns:
        List[str]: The list of instruments.
    """
    instruments = []
    parts = text_sequence.split()
    for part in parts:
        if part.startswith("INST="):
            if part[5:] == "DRUMS":
                instruments.append("Drums")
            else:
                index = int(part[5:])
                instruments.append(GM_INSTRUMENTS[index])
    return instruments

In [None]:
def generate_new_instrument(seed: str, temperature: float = 0.75) -> str:
    """
    Generates a new instrument sequence from a given seed and temperature.
    Args:
        seed (str): The seed string for the generation.
        temp (float, optional): The temperature for the generation, which controls the randomness. Defaults to 0.75.
    Returns:
        str: The generated instrument sequence.
    """
    seed_length = len(tokenizer.encode(seed))

    while True:
        # Encode the conditioning tokens.
        input_ids = tokenizer.encode(seed, return_tensors="pt")

        # Move the input_ids tensor to the same device as the model
        input_ids = input_ids.to(model.device)

        # Generate more tokens.
        eos_token_id = tokenizer.encode("TRACK_END")[0]
        generated_ids = model.generate(
            input_ids,
            max_new_tokens=2048,
            do_sample=True,
            temperature=temperature,
            eos_token_id=eos_token_id,
        )
        generated_sequence = tokenizer.decode(generated_ids[0])

        # Check if the generated sequence contains "NOTE_ON" beyond the seed
        new_generated_sequence = tokenizer.decode(generated_ids[0][seed_length:])
        if "NOTE_ON" in new_generated_sequence:
            return generated_sequence

In [None]:
def get_outputs_from_string(
    generated_sequence: str, qpm: int = 120
) -> Tuple[ndarray, str, Figure, str, str]:
    """
    Converts a generated sequence into various output formats including audio, MIDI, plot, etc.

    :param generated_sequence: The generated sequence of tokens.
    :type generated_sequence: str
    :param qpm: The quarter notes per minute.
    :type qpm: int, optional
    :return: A tuple containing the audio waveform, note sequence, plot figure,
            instruments string, and number of tokens string.
    :rtype: Tuple[ndarray, str, Figure, str, str]
    """
    instruments = get_instruments(generated_sequence)
    instruments_str = "\n".join(f"- {instrument}" for instrument in instruments)
    note_sequence = token_sequence_to_note_sequence(generated_sequence, qpm=qpm)

    synth = note_seq.fluidsynth
    array_of_floats = synth(note_sequence, sample_rate=SAMPLE_RATE)
    int16_data = note_seq.audio_io.float_samples_to_int16(array_of_floats)
    fig = note_seq.plot_sequence(note_sequence, show_figure=False)
    num_tokens = str(len(generated_sequence.split()))
    audio = gr.make_waveform((SAMPLE_RATE, int16_data))

    return audio, note_sequence, fig, instruments_str, num_tokens

In [None]:
def convert_mid_audio(midi_file, out_type='wav', sf2 : str = '/usr/share/sounds/sf2/FluidR3_GM.sf2'):
    """
    Convert a single midi file to an audio file.

    Args:
        sf2 (str):        the file path for a .sf2 soundfont file
        midi_file (str):  the file path for the .mid midi file to convert
        out_dir (str):    the directory path for where to write the audio out
        out_type (str):   the output audio type (see 'fluidsynth -T help' for options)
    """
    import subprocess
    import os
    from pathlib import Path

    fbase = os.path.splitext(os.path.basename(midi_file))[0]
    pfolder = midi_file.parent
    out_file = Path(pfolder , fbase + '.' + out_type)

    subprocess.call(['fluidsynth', '-T', out_type, '-F', out_file, '-ni', sf2, midi_file])

    return out_file

In [None]:
def generate_audio(
    genre: str,
    temperature: float,
    text_sequence: str,
    qpm: int,
    midi_file_path: Path
) -> Tuple[ndarray, str, Figure, str, str, str]:
    """
    Generates a song given a genre, temperature, initial text sequence, and tempo.
    Args:
        genre (str, optional): The genre of the song. Defaults to "OTHER".
        temp (float, optional): The temperature for the generation, which controls the randomness. Defaults to 0.75.
        text_sequence (str, optional): The initial text sequence for the song. Defaults to "".
        qpm (int, optional): The quarter notes per minute. Defaults to 120.
    Returns:
        Tuple[ndarray, str, Figure, str, str, str]: The audio waveform, MIDI file name, plot figure,
                                                    instruments string, generated song string, and number of tokens string.
    """

    import pretty_midi

    if text_sequence == "":
        seed_string = create_seed_string(genre)
    else:
        seed_string = text_sequence

    generated_sequence = generate_new_instrument(seed=seed_string, temperature=temperature)
    audio, note_sequence, fig, instruments_str, num_tokens = get_outputs_from_string(
        generated_sequence, qpm
    )
    note_seq.note_sequence_to_midi_file(note_sequence, str(midi_file_path))
    midi_data = pretty_midi.PrettyMIDI(str(midi_file_path))
    audio_file_path = convert_mid_audio(midi_file_path)

    audio_data = dict()
    audio_data['audio_file_path'] = audio_file_path
    audio_data['instruments_str'] = instruments_str
    audio_data['generated_sequence'] = generated_sequence
    audio_data['num_tokens'] = num_tokens
    audio_data['duration'] = midi_data.get_end_time()

    return audio_data

In [None]:
from pathlib import Path

data_musician = "Roxette"
midi_file_name = "output.mid"
midi_file_path = Path(SYS_PROJECT_DIR, "output", data_musician, midi_file_name)

generate_audio_config = {
    'genre' : "OTHER",
    'temperature' : 0.75,
    'text_sequence' : "",
    'qpm' : 120,
    'midi_file_path' : midi_file_path
}

In [None]:
# ### $$$ Test generate_audio
# audio_data = generate_audio(**generate_audio_config)

# Interface

In [None]:
import gradio as gr

# Initial values for the text areas, defined by some variables
data_musician = "Roxette"

def generate_audio_interface():
    audio_data = generate_audio(**generate_audio_config)
    audio_file_path = audio_data['audio_file_path']
    instruments = audio_data['instruments_str']
    generated_sequence = audio_data['generated_sequence']
    duration = audio_data['duration']
    num_tokens = audio_data['num_tokens']
    return audio_file_path, instruments, generated_sequence, duration, num_tokens

# Create the Gradio app
with gr.Blocks() as app:
    with gr.Row():
        ta_musician = gr.Textbox(label="Musician", value=data_musician, lines=1, interactive=False)
        btn_generate_audio = gr.Button("Generate Audio")
        output_audio = gr.Audio(label="Play Audio", type="filepath")
    with gr.Row():
      ta_instruments = gr.Textbox(label="Instruments", placeholder="Instruments will be displayed here", lines=1, interactive=False)
      ta_duration = gr.Textbox(label="Duration (sec)", placeholder="", lines=1, interactive=False)
      ta_num_tokens = gr.Textbox(label="Number of tokens", placeholder="", lines=1, interactive=False)
    with gr.Row():
      ta_sequence = gr.Textbox(label="Sequence", placeholder="Sequence will be displayed here", lines=5, interactive=False)

    _ = btn_generate_audio.click(
        fn=generate_audio_interface,
        inputs=[],
        outputs=[output_audio, ta_instruments, ta_sequence, ta_duration, ta_num_tokens]
    )

# Launch the interface
app.launch(share=True, debug = True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://62e2fcffb73244c28c.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


  y1 = self.convert_yunits(self._y0 + self._height)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7887 <> https://62e2fcffb73244c28c.gradio.live




# Sleep

In [None]:
import time
time.sleep(7200)