In [1]:
import os
import mido
import json
import openai
from datetime import datetime
from dotenv import load_dotenv
from midi2audio import FluidSynth

load_dotenv()

False

In [7]:
!echo "inst 1" | fluidsynth FluidR3_GM.sf2 

FluidSynth runtime version 2.3.1
Copyright (C) 2000-2022 Peter Hanappe and others.
Distributed under the LGPL license.
SoundFont(R) is a registered trademark of Creative Technology Ltd.

Type 'help' for help topics.

> inst 1
000-000 Yamaha Grand Piano
000-001 Bright Yamaha Grand
000-002 Electric Piano
000-003 Honky Tonk
000-004 Rhodes EP
000-005 Legend EP 2
000-006 Harpsichord
000-007 Clavinet
000-008 Celesta
000-009 Glockenspiel
000-010 Music Box
000-011 Vibraphone
000-012 Marimba
000-013 Xylophone
000-014 Tubular Bells
000-015 Dulcimer
000-016 DrawbarOrgan
000-017 Percussive Organ
000-018 Rock Organ
000-019 Church Organ
000-020 Reed Organ
000-021 Accordian
000-022 Harmonica
000-023 Bandoneon
000-024 Nylon String Guitar
000-025 Steel String Guitar
000-026 Jazz Guitar
000-027 Clean Guitar
000-028 Palm Muted Guitar
000-029 Overdrive Guitar
000-030 Distortion Guitar
000-031 Guitar Harmonics
000-032 Acoustic Bass
000-033 Fingered Bass
000-034 Picked Bass
000-035 Fretless Bass
000-036 Sla

In [31]:
import re
from typing import List, Union

from pydantic import BaseModel, confloat, conint, constr, validator

NOTE_REGEX = r"(?P<name>[A-G][#b]?)(?P<octave>[1-9])-(?P<duration>\d+?[.]?\d+?)"
TIME_SIGNATURE_REGEX = r"(?P<numerator>\d+?)/(?P<denominator>\d+?)"


class JsonTrackNote(BaseModel):
    name: constr(regex=r"[A-G][#b]?")
    octave: conint(ge=1, le=9)
    duration: confloat(ge=0)


class JsonTrack(BaseModel):
    instrument: str
    notes: Union[constr(regex=NOTE_REGEX, min_length=1), List[JsonTrackNote]]

    @validator("notes")
    def validate_notes(cls, v) -> List[JsonTrackNote]:
        """Parse notes string into a list of Tuples(name, octave, duration)"""
        if not v:
            return []
        if isinstance(v, str):
            v = [
                JsonTrackNote.parse_obj(m.groupdict())
                for m in re.finditer(NOTE_REGEX, v)
            ]
        return v


class JsonTimeSignature(BaseModel):
    numerator: conint(ge=0, le=255)
    denominator: conint(ge=0, le=255)


class JsonAudio(BaseModel):
    tempo: conint(ge=0)  # bpm - beats per minute
    time_signature: Union[constr(regex=TIME_SIGNATURE_REGEX), JsonTimeSignature]
    tracks: List[JsonTrack]

    @validator("time_signature")
    def check_time_signature(cls, v) -> JsonTimeSignature:
        if isinstance(v, str):
            v = v.replace(" ", "")
            values = v.split("/")
            return JsonTimeSignature(numerator=values[0], denominator=values[1])
        return v


In [43]:
import json
import os
from datetime import datetime

import mido
from midi2audio import FluidSynth


class Json2Midi:
    # instrument_to_channel =
    def __init__(self):
        pass

    def convert(self, json_audio: JsonAudio):
        file_name = "temp"
        # Create a MIDI file object
        mid = mido.MidiFile()

        # create instrument_to_channel mapping
        instrument_to_channel = self.get_instrument_to_channel_mapping(json_audio)

        # Create a track for each track in the score
        for track in json_audio.tracks:
            # Add a track name meta message
            mid_track = mido.MidiTrack()
            mid_track.append(mido.MetaMessage("track_name", name=track.instrument))

            # Set the tempo meta message based on the score tempo
            mid_track.append(
                mido.MetaMessage("set_tempo", tempo=mido.bpm2tempo(json_audio.tempo))
            )

            # Set the time signature meta message based on the score time signature
            mid_track.append(
                mido.MetaMessage(
                    "time_signature",
                    numerator=json_audio.time_signature.numerator,
                    denominator=json_audio.time_signature.denominator,
                )
            )

            # Add note messages for each note in the track
            for note in track.notes:
                # Extract pitch and duration from the note string
                pitch = self.parse_note(note)
                duration = int(
                    note.duration * mid.ticks_per_beat * (json_audio.tempo / 60)
                )

                # Add note on and note off messages with velocity 64 and time 0
                mid_track.append(
                    mido.Message(
                        "note_on",
                        channel=instrument_to_channel[track.instrument],
                        note=pitch,
                        velocity=64,
                        time=0,
                    )
                )
                mid_track.append(
                    mido.Message(
                        "note_off",
                        channel=instrument_to_channel[track.instrument],
                        note=pitch,
                        velocity=64,
                        time=duration,
                    )
                )

            # Add end of track meta message
            mid_track.append(mido.MetaMessage("end_of_track"))

            # Append the track to the MIDI file object
            mid.tracks.append(mid_track)

        return mid

        # Create output directory with all relevant resources
        output_directory = f"./output/{datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}"
        os.mkdir(output_directory)
        json.dump(score, open(f"{output_directory}/{file_name}.json", "w"), indent=4)
        # Save the MIDI file object as "labyrinth_timestamp.mid"
        mid.save(f"{output_directory}/{file_name}.mid")
        # Create a FluidSynth object with a sound font file
        # fs = FluidSynth("Essential Keys-sforzando-v9.6.sf2")
        fs = FluidSynth("FluidR3_GM.sf2")
        # Convert the MIDI file to a WAV file
        fs.midi_to_audio(
            f"{output_directory}/{file_name}.mid", f"{output_directory}/{file_name}.wav"
        )

    @staticmethod
    def get_instrument_to_channel_mapping(json_audio: JsonAudio) -> dict:
        instrument_set = set(track.instrument for track in json_audio.tracks) - {
            "Drums"
        }
        channels = list(range(0, 9)) + list(range(10, 15))
        instrument_to_channel = {
            instrument: channel for channel, instrument in zip(channels, instrument_set)
        }
        instrument_to_channel["Drums"] = 0  # set Drums to channel 9
        return instrument_to_channel

    @staticmethod
    def parse_note(note: JsonTrackNote):
        """Convert a pitch name to a MIDI note number."""
        # Define a dictionary of note names and their offsets from C
        note_names = {
            "C": 0,
            "C#": 1,
            "Db": 1,
            "D": 2,
            "D#": 3,
            "Eb": 3,
            "E": 4,
            "F": 5,
            "F#": 6,
            "Gb": 6,
            "G": 7,
            "G#": 8,
            "Ab": 8,
            "A": 9,
            "A#": 10,
            "Bb": 10,
            "B": 11,
        }

        # Calculate the MIDI note number based on the formula:
        # note_number = (octave + 1) * 12 + note_names[note_name]
        note_number = (note.octave + 1) * 12 + note_names[note.name]

        # Return the MIDI note number
        return note_number


In [44]:
# import json

# with open("./output/2023_03_03_15_11_07/labyrinth.json", "r") as f:
#     score = json.load(f)

In [106]:
score = {
    "tempo": 120,
    "time_signature": "4/4",
    "tracks": [
        {
            "instrument": "Yamaha Grand Piano",
            "notes": "E5-0.5, F#5-0.5, G5-0.5, A5-0.5, G5-1.0"
        },
        # {
        #     "instrument": "Bass",
        #     "notes": "E2-1.0, A2-1.0, E2-1.0, B2-1.0, E2-1.0"
        # },
        # {
        #     "instrument": "Drums",
        #     "notes": "B2-0.25, B2-0.25, B2-0.25, B2-0.25"
        # }
    ]
} 

In [107]:
from io import BytesIO
import mido
from midi2audio import FluidSynth
import tempfile

class Midi2Wav:
    def __init__(self, sound_font: str):
        self.fs = FluidSynth(sound_font)

    def convert(self, mid: mido.MidiFile) -> BytesIO:
        with tempfile.NamedTemporaryFile(dir=".", suffix=".mid", delete=True) as midi_file:
            with tempfile.NamedTemporaryFile(dir=".", suffix=".wav", delete=True) as wav_file:
                # Save the mid object to the temporary MIDI file
                mid.save(midi_file.name)
                # Convert the MIDI file to a WAV file
                self.fs.midi_to_audio(midi_file.name, wav_file.name)
                # Return a FileIO object containing the WAV data
                return BytesIO(wav_file.read())



In [108]:
json_audio = JsonAudio.parse_obj(score)
midi_converter = Json2Midi()
midi = midi_converter.convert(json_audio)
wav_file = Midi2Wav(sound_font="FluidR3_GM.sf2").convert(midi)

FluidSynth runtime version 2.3.1
Copyright (C) 2000-2022 Peter Hanappe and others.
Distributed under the LGPL license.
SoundFont(R) is a registered trademark of Creative Technology Ltd.

Rendering audio to file '/Users/jeffrycacho/Documents/personal/projects/songGPT/notebooks/tmpx534f1fg.wav'..


In [109]:
import subprocess
import shlex
import re


def get_file_instruments(sound_font):
    command = f'fluidsynth {sound_font}'
    input_str = 'inst 1'

    args = shlex.split(command)
    process = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, text=True)
    output, _ = process.communicate(input=input_str)
    process.kill()

    INSTR_REGEX = r"\n(?P<bank>\d{3})-(?P<num>\d{3}) (?P<instrument>[\w\d ]+)\n"
    matches = [m.groupdict() for m in re.finditer(INSTR_REGEX, output)]
    instruments = {m['instrument']: {"bank": int(m["bank"]), "num": int(m["num"])}  for m in matches}
    return instruments

In [123]:
import time
import wave
import fluidsynth
from io import BytesIO
import numpy as np

# Initialize FluidSynth
fs = fluidsynth.Synth()
fs.start()

# Load a SoundFont
sound_font = 'FluidR3_GM.sf2'

instruments = get_file_instruments(sound_font)
sfid = fs.sfload(sound_font)


total_duration = 0
# Set instrument for each track
for track in json_audio.tracks:
    instrument_name = track.instrument
    instrument_num = instruments[instrument_name]["num"] # function to get MIDI program number for instrument name
    fs.program_select(0, sfid, 0, instrument_num)

    # Play notes for each track
    track_duration = 0
    for note in track.notes:
        # Convert note name and octave to MIDI note number
        midi_note_num = Json2Midi.parse_note(note) # function to convert note name and octave to MIDI note number
        track_duration += note.duration * 60 / json_audio.tempo
        
        # Play the note
        fs.noteon(0, midi_note_num, 127)
        time.sleep(track_duration)
        fs.noteoff(0, midi_note_num)

    total_duration = max(total_duration, track_duration)


    
# Get raw audio data from FluidSynth
raw_audio = fs.get_samples(int(44100 * total_duration))
audio = np.array(np.frombuffer(raw_audio, dtype='int16'), dtype='float32') / 32767.0

# Save audio to .wav file
with wave.open('output.wav', 'wb') as wav_file:
    wav_file.setparams((1, 2, 44100, 0, 'NONE', 'not compressed'))
    wav_file.writeframes(audio.tobytes())

# Clean up FluidSynth instance
fs.delete()


fluidsynth: error: Unknown integer parameter 'synth.sample-rate'
fluidsynth: panic: An error occurred while reading from stdin.
