In [1]:
import os
import mido
import json
import openai
from datetime import datetime
from dotenv import load_dotenv
from midi2audio import FluidSynth

load_dotenv()

False

In [3]:
import re
from typing import List, Union

from pydantic import BaseModel, confloat, conint, constr, validator

NOTE_REGEX = r"(?P<name>[A-G][#b]?)(?P<octave>[1-9])-(?P<duration>\d+?[.]?\d+?)"
TIME_SIGNATURE_REGEX = r"(?P<numerator>\d+?)/(?P<denominator>\d+?)"


class JsonTrackNote(BaseModel):
    name: constr(regex=r"[A-G][#b]?")
    octave: conint(ge=1, le=9)
    duration: confloat(ge=0)


class JsonTrack(BaseModel):
    instrument: str
    notes: Union[constr(regex=NOTE_REGEX, min_length=1), List[JsonTrackNote]]

    @validator("notes")
    def validate_notes(cls, v) -> List[JsonTrackNote]:
        """Parse notes string into a list of Tuples(name, octave, duration)"""
        if not v:
            return []
        if isinstance(v, str):
            v = [
                JsonTrackNote.parse_obj(m.groupdict())
                for m in re.finditer(NOTE_REGEX, v)
            ]
        return v


class JsonTimeSignature(BaseModel):
    numerator: conint(ge=0, le=255)
    denominator: conint(ge=0, le=255)


class JsonAudio(BaseModel):
    tempo: conint(ge=0)  # bpm - beats per minute
    time_signature: Union[constr(regex=TIME_SIGNATURE_REGEX), JsonTimeSignature]
    tracks: List[JsonTrack]

    @validator("time_signature")
    def check_time_signature(cls, v) -> JsonTimeSignature:
        if isinstance(v, str):
            v = v.replace(" ", "")
            values = v.split("/")
            return JsonTimeSignature(numerator=values[0], denominator=values[1])
        return v


In [41]:
import json
import os
from datetime import datetime

import mido
from midi2audio import FluidSynth


class Json2Midi:
    # instrument_to_channel =
    def __init__(self):
        pass

    def convert(self, json_audio: JsonAudio):
        file_name = "temp"
        # Create a MIDI file object
        mid = mido.MidiFile()

        # create instrument_to_channel mapping
        instrument_to_channel = self.get_instrument_to_channel_mapping(json_audio)

        # Create a track for each track in the score
        for track in json_audio.tracks:
            # Add a track name meta message
            mid_track = mido.MidiTrack()
            mid_track.append(mido.MetaMessage("track_name", name=track.instrument))

            # Set the tempo meta message based on the score tempo
            mid_track.append(
                mido.MetaMessage("set_tempo", tempo=mido.bpm2tempo(json_audio.tempo))
            )

            # Set the time signature meta message based on the score time signature
            mid_track.append(
                mido.MetaMessage(
                    "time_signature",
                    numerator=json_audio.time_signature.numerator,
                    denominator=json_audio.time_signature.denominator,
                )
            )

            # Add note messages for each note in the track
            for note in track.notes:
                # Extract pitch and duration from the note string
                pitch = self.parse_note(note)
                duration = int(
                    note.duration * mid.ticks_per_beat * (json_audio.tempo / 60)
                )

                # Add note on and note off messages with velocity 64 and time 0
                mid_track.append(
                    mido.Message(
                        "note_on",
                        channel=instrument_to_channel[track.instrument],
                        note=pitch,
                        velocity=64,
                        time=0,
                    )
                )
                mid_track.append(
                    mido.Message(
                        "note_off",
                        channel=instrument_to_channel[track.instrument],
                        note=pitch,
                        velocity=64,
                        time=duration,
                    )
                )

            # Add end of track meta message
            mid_track.append(mido.MetaMessage("end_of_track"))

            # Append the track to the MIDI file object
            mid.tracks.append(mid_track)

        return mid

    @staticmethod
    def get_instrument_to_channel_mapping(json_audio: JsonAudio) -> dict:
        instrument_set = set(track.instrument for track in json_audio.tracks) - {
            "Drums"
        }
        channels = list(range(0, 9)) + list(range(10, 15))
        instrument_to_channel = {
            instrument: channel for channel, instrument in zip(channels, instrument_set)
        }
        return instrument_to_channel

    @staticmethod
    def parse_note(note: JsonTrackNote):
        """Convert a pitch name to a MIDI note number."""
        # Define a dictionary of note names and their offsets from C
        note_names = {
            "C": 0,
            "C#": 1,
            "Db": 1,
            "D": 2,
            "D#": 3,
            "Eb": 3,
            "E": 4,
            "F": 5,
            "F#": 6,
            "Gb": 6,
            "G": 7,
            "G#": 8,
            "Ab": 8,
            "A": 9,
            "A#": 10,
            "Bb": 10,
            "B": 11,
        }

        # Calculate the MIDI note number based on the formula:
        # note_number = (octave + 1) * 12 + note_names[note_name]
        note_number = (note.octave + 1) * 12 + note_names[note.name]

        # Return the MIDI note number
        return note_number


In [42]:
# import json

# with open("./output/2023_03_03_15_11_07/labyrinth.json", "r") as f:
#     score = json.load(f)

In [44]:
score = {
    "tempo": 120,
    "time_signature": "4/4",
    "tracks": [
        {
            "instrument": "Yamaha Grand Piano",
            "notes": "E5-0.5, F#5-0.5, G5-0.5, A5-0.5, G5-1.0"
        },
        {
            "instrument": "Violin",
            "notes": "E2-1.0, A2-1.0, E2-1.0, B2-1.0, E2-1.0"
        },
        {
            "instrument": "Harp",
            "notes": "B2-0.25, B2-0.25, B2-0.25, B2-0.25"
        }
    ]
} 

In [55]:
import subprocess
import shlex
import re


def get_file_instruments(sound_font):
    command = f'fluidsynth {sound_font}'
    input_str = 'inst 1'

    args = shlex.split(command)
    process = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, text=True)
    output, _ = process.communicate(input=input_str)
    process.kill()

    INSTR_REGEX = r"\n(?P<bank>\d{3})-(?P<num>\d{3}) (?P<instrument>[\w\d ]+)\n"
    matches = [m.groupdict() for m in re.finditer(INSTR_REGEX, output)]
    instruments = {m['instrument']: {"bank": int(m["bank"]), "num": int(m["num"])}  for m in matches}
    return instruments

In [64]:
from io import BytesIO
import mido
import tempfile
import subprocess

class Midi2Wav:
    def __init__(self, sound_font: str):
        self.sound_font = sound_font
        self.instruments = get_file_instruments(sound_font)


    def convert(self, mid: mido.MidiFile, instr_to_channel: dict) -> BytesIO:
        with tempfile.NamedTemporaryFile(dir=".", suffix=".mid", delete=True) as midi_file:
            with tempfile.NamedTemporaryFile(dir=".", suffix=".wav", delete=True) as wav_file:
                with tempfile.NamedTemporaryFile(dir=".", suffix=".sh", mode="w", delete=True) as config_file:
                    # Save the mid object to the temporary MIDI file
                    mid.save(midi_file.name)
                    # Convert the MIDI file to a WAV file
                    for instr, channel in instr_to_channel.items():
                        config_file.write(f"prog {channel} {self.instruments[instr]['num']}\n")
                    
                    
                    subprocess.call(
                        ['fluidsynth', '-ni', '-g', "0.2", self.sound_font, midi_file.name, '-F', wav_file.name, '-r', "44100", "-f", config_file.name], 
                        stdout=None, 
                    )
                    # fluidsynth FluidR3_GM.sf2 midi_file.name -F wav_file.name -r 44100 -f config_file.name
                    # Return a FileIO object containing the WAV data
                    return BytesIO(wav_file.read())
                
    
                


In [65]:
json_audio = JsonAudio.parse_obj(score)
midi_converter = Json2Midi()
midi = midi_converter.convert(json_audio)
wav_file = Midi2Wav(sound_font="FluidR3_GM.sf2").convert(
        midi, 
        Json2Midi.get_instrument_to_channel_mapping(json_audio)
)

fluidsynth: panic: An error occurred while reading from stdin.


FluidSynth runtime version 2.3.1
Copyright (C) 2000-2022 Peter Hanappe and others.
Distributed under the LGPL license.
SoundFont(R) is a registered trademark of Creative Technology Ltd.

Rendering audio to file '/Users/jeffrycacho/Documents/personal/projects/songGPT/notebooks/tmp1lbrdc8m.wav'..
