In [3]:
import os
import mido
import json
import openai
from datetime import datetime
from dotenv import load_dotenv
from midi2audio import FluidSynth

load_dotenv()

True

In [4]:
import re
from typing import List, Union

from pydantic import BaseModel, confloat, conint, constr, validator

NOTE_REGEX = r"(?P<name>[A-G][#b]?)(?P<octave>[1-9])-(?P<duration>\d+?[.]?\d+?)"
TIME_SIGNATURE_REGEX = r"(?P<numerator>\d+?)/(?P<denominator>\d+?)"


class JsonTrackNote(BaseModel):
    name: constr(regex=r"[A-G][#b]?")
    octave: conint(ge=1, le=9)
    duration: confloat(ge=0)


class JsonTrack(BaseModel):
    instrument: str
    notes: Union[constr(regex=NOTE_REGEX, min_length=1), List[JsonTrackNote]]

    @validator("notes")
    def validate_notes(cls, v) -> List[JsonTrackNote]:
        """Parse notes string into a list of Tuples(name, octave, duration)"""
        if not v:
            return []
        if isinstance(v, str):
            v = [
                JsonTrackNote.parse_obj(m.groupdict())
                for m in re.finditer(NOTE_REGEX, v)
            ]
        return v


class JsonTimeSignature(BaseModel):
    numerator: conint(ge=0, le=255)
    denominator: conint(ge=0, le=255)


class JsonAudio(BaseModel):
    tempo: conint(ge=0)  # bpm - beats per minute
    time_signature: Union[constr(regex=TIME_SIGNATURE_REGEX), JsonTimeSignature]
    tracks: List[JsonTrack]

    @validator("time_signature")
    def check_time_signature(cls, v) -> JsonTimeSignature:
        if isinstance(v, str):
            v = v.replace(" ", "")
            values = v.split("/")
            return JsonTimeSignature(numerator=values[0], denominator=values[1])
        return v


In [5]:
import json
import os
from datetime import datetime

import mido
from midi2audio import FluidSynth


class Json2Midi:
    # instrument_to_channel =
    def __init__(self):
        pass

    def convert(self, json_audio: JsonAudio):
        file_name = "temp"
        # Create a MIDI file object
        mid = mido.MidiFile()

        # create instrument_to_channel mapping
        instrument_to_channel = self.get_instrument_to_channel_mapping(json_audio)

        # Create a track for each track in the score
        for track in json_audio.tracks:
            # Add a track name meta message
            mid_track = mido.MidiTrack()
            mid_track.append(mido.MetaMessage("track_name", name=track.instrument))

            # Set the tempo meta message based on the score tempo
            mid_track.append(
                mido.MetaMessage("set_tempo", tempo=mido.bpm2tempo(json_audio.tempo))
            )

            # Set the time signature meta message based on the score time signature
            mid_track.append(
                mido.MetaMessage(
                    "time_signature",
                    numerator=json_audio.time_signature.numerator,
                    denominator=json_audio.time_signature.denominator,
                )
            )

            # Add note messages for each note in the track
            for note in track.notes:
                # Extract pitch and duration from the note string
                pitch = self.parse_note(note)
                duration = int(
                    note.duration * mid.ticks_per_beat * (json_audio.tempo / 60)
                )

                # Add note on and note off messages with velocity 64 and time 0
                mid_track.append(
                    mido.Message(
                        "note_on",
                        channel=instrument_to_channel[track.instrument],
                        note=pitch,
                        velocity=64,
                        time=0,
                    )
                )
                mid_track.append(
                    mido.Message(
                        "note_off",
                        channel=instrument_to_channel[track.instrument],
                        note=pitch,
                        velocity=64,
                        time=duration,
                    )
                )

            # Add end of track meta message
            mid_track.append(mido.MetaMessage("end_of_track"))

            # Append the track to the MIDI file object
            mid.tracks.append(mid_track)

        return mid, instrument_to_channel

    @staticmethod
    def get_instrument_to_channel_mapping(json_audio: JsonAudio) -> dict:
        instrument_set = set(track.instrument for track in json_audio.tracks) - {
            "Drums"
        }
        channels = list(range(0, 9)) + list(range(10, 15))
        instrument_to_channel = {
            instrument: channel for channel, instrument in zip(channels, instrument_set)
        }
        return instrument_to_channel

    @staticmethod
    def parse_note(note: JsonTrackNote):
        """Convert a pitch name to a MIDI note number."""
        # Define a dictionary of note names and their offsets from C
        note_names = {
            "C": 0,
            "C#": 1,
            "Db": 1,
            "D": 2,
            "D#": 3,
            "Eb": 3,
            "E": 4,
            "F": 5,
            "F#": 6,
            "Gb": 6,
            "G": 7,
            "G#": 8,
            "Ab": 8,
            "A": 9,
            "A#": 10,
            "Bb": 10,
            "B": 11,
        }

        # Calculate the MIDI note number based on the formula:
        # note_number = (octave + 1) * 12 + note_names[note_name]
        note_number = (note.octave + 1) * 12 + note_names[note.name]

        # Return the MIDI note number
        return note_number


In [6]:
import subprocess
import shlex
import re


def get_file_instruments(sound_font):
    command = f'fluidsynth {sound_font} -f list_instruments.txt -in'

    args = shlex.split(command)
    process = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, text=True)
    output, _ = process.communicate(input=None, timeout=10)
    process.kill()

    INSTR_REGEX = r"\n?(?P<bank>\d{3})-(?P<num>\d{3}) (?P<instrument>[\w\d ]+)\n?"
    matches = [m.groupdict() for m in re.finditer(INSTR_REGEX, output)]
    instruments = {m['instrument']: {"bank": int(m["bank"]), "num": int(m["num"])}  for m in matches}
    return instruments



In [7]:
# get_file_instruments('./FluidR3_GM.sf2')

In [8]:
import wave
import fluidsynth
import numpy as np
from io import BytesIO

class Midi2Wav:
    def __init__(self, sound_font: str):
        self.sound_font = sound_font
        self.instruments = get_file_instruments(sound_font)


    def convert(self, mid: mido.MidiFile, instr_to_channel: dict) -> BytesIO:
    
        # Set up fluidsynth Synth object
        fl = fluidsynth.Synth(samplerate=44100.0)
        sfid = fl.sfload("FluidR3_GM.sf2")

        # Select instruments for each channel
        for instr, channel in instr_to_channel.items():
            fl.program_select(channel, sfid, 0, self.instruments[instr]['num'])

        # Generate audio data from MIDI messages
        s = []
        note_on_times = {}
        for msg in mid.play():
            if msg.type == 'note_on':
                # Record time of note-on message
                note_on_times[msg.note] = msg.time
                fl.noteon(msg.channel, msg.note, msg.velocity)
            elif msg.type == 'note_off':
                # Calculate duration of note based on time between note-on and note-off messages
                duration = msg.time - note_on_times[msg.note]
                s = np.append(s, fl.get_samples(int(duration * 44100)))
                fl.noteoff(msg.channel, msg.note)
            else:
                s = np.append(s, fl.get_samples(int(msg.time * 44100)))
        fl.delete()

        # Convert audio data to string
        samps = fluidsynth.raw_audio_string(s)

        # Open wave file for writing
        file = BytesIO()
        wav_file = wave.open(file, 'wb')
        # Set wave file parameters
        wav_file.setparams((2, 2, 44100, 0,'NONE', 'not compressed'))
        # Write audio data to wave file
        wav_file.writeframes(samps)
        # Close wave file
        wav_file.close()

In [9]:
score = {
   "tempo":90,
   "time_signature":{
      "numerator":3,
      "denominator":4
   },
   "tracks":[
      {
         "instrument":"Violin",
         "notes":[
            {
               "name":"E",
               "octave":4,
               "duration":0.5
            },
            {
               "name":"D",
               "octave":4,
               "duration":0.5
            },
            {
               "name":"C",
               "octave":4,
               "duration":0.5
            },
            {
               "name":"D",
               "octave":4,
               "duration":0.5
            },
            {
               "name":"E",
               "octave":4,
               "duration":0.5
            },
            {
               "name":"E",
               "octave":4,
               "duration":0.5
            },
            {
               "name":"E",
               "octave":4,
               "duration":0.5
            },
            {
               "name":"D",
               "octave":4,
               "duration":0.5
            },
            {
               "name":"D",
               "octave":4,
               "duration":0.5
            },
            {
               "name":"D",
               "octave":4,
               "duration":0.5
            },
            {
               "name":"E",
               "octave":4,
               "duration":0.5
            },
            {
               "name":"G",
               "octave":4,
               "duration":0.5
            },
            {
               "name":"G",
               "octave":4,
               "duration":0.5
            },
            {
               "name":"E",
               "octave":4,
               "duration":0.5
            },
            {
               "name":"D",
               "octave":4,
               "duration":0.5
            },
            {
               "name":"C",
               "octave":4,
               "duration":1.0
            }
         ]
      }
   ]
}

In [10]:
json_audio = JsonAudio.parse_obj(score)
midi_converter = Json2Midi()
midi, instr_to_channel = midi_converter.convert(json_audio)
wav_file = Midi2Wav(sound_font="/Users/jeffrycacho/Documents/personal/projects/songGPT/back-end/app/data/soundfonts/FluidR3_GM.sf2").convert(
        midi, 
        instr_to_channel
)

0 note_on 0 64 64
0.50000025 note_off 0 64 64
0 note_on 0 62 64
0.50000025 note_off 0 62 64
0 note_on 0 60 64
0.50000025 note_off 0 60 64
0 note_on 0 62 64
0.50000025 note_off 0 62 64
0 note_on 0 64 64
0.50000025 note_off 0 64 64
0 note_on 0 64 64
0.50000025 note_off 0 64 64
0 note_on 0 64 64
0.50000025 note_off 0 64 64
0 note_on 0 62 64
0.50000025 note_off 0 62 64
0 note_on 0 62 64
0.50000025 note_off 0 62 64
0 note_on 0 62 64
0.50000025 note_off 0 62 64
0 note_on 0 64 64
0.50000025 note_off 0 64 64
0 note_on 0 67 64
0.50000025 note_off 0 67 64
0 note_on 0 67 64
0.50000025 note_off 0 67 64
0 note_on 0 64 64
0.50000025 note_off 0 64 64
0 note_on 0 62 64
0.50000025 note_off 0 62 64
0 note_on 0 60 64
1.0000005 note_off 0 60 64


In [12]:
wav_file.seek(0)
with open('temp/test.wav', 'wb') as f:
    f.write(wav_file.read())

In [18]:
import mido
import numpy as np
import fluidsynth

# Set up fluidsynth Synth object
fl = fluidsynth.Synth(samplerate=44100.0)
sfid = fl.sfload("FluidR3_GM.sf2")
instruments = get_file_instruments("FluidR3_GM.sf2")

for instr, channel in instr_to_channel.items():
    fl.program_select(channel, sfid, 0, instruments[instr]['num'])

# Generate audio data from MIDI messages
s = []
note_on_times = {}
for msg in midi.play():
    if msg.type == 'note_on':
        # Record time of note-on message
        note_on_times[msg.note] = msg.time
        fl.noteon(msg.channel, msg.note, msg.velocity)
    elif msg.type == 'note_off':
        # Calculate duration of note based on time between note-on and note-off messages
        duration = msg.time - note_on_times[msg.note]
        s = np.append(s, fl.get_samples(int(duration * 44100)))
        fl.noteoff(msg.channel, msg.note)
    else:
        s = np.append(s, fl.get_samples(int(msg.time * 44100)))
fl.delete()

# Convert audio data to string
samps = fluidsynth.raw_audio_string(s)

# Open wave file for writing
file = BytesIO()
wav_file = wave.open(file, 'wb')
# Set wave file parameters
wav_file.setparams((2, 2, 44100, 0,'NONE', 'not compressed'))
# Write audio data to wave file
wav_file.writeframes(samps)
# Close wave file
wav_file.close()

In [19]:
file.seek(0)
with open('temp/test.wav', 'wb') as f:
    f.write(file.read())