In [1]:
import sys, os

venv_path = os.path.dirname(sys.executable)
ROOT = os.path.abspath(os.path.join(venv_path, "..", ".."))

In [2]:
from music21 import stream, note, meter, clef, key

In [3]:
def parse_semantic_encoding(tokens):
    parsed_stream = stream.Stream()
    
    for token in tokens:
        try:
            if token.startswith("clef-"):
                clef_type = token.split("-")[1]
                parsed_stream.append(clef.clefFromString(clef_type))
            elif token.startswith("keySignature-"):
                key_sig = token.split("-")[1]
                parsed_stream.append(key.Key(key_sig))
            elif token.startswith("timeSignature-"):
                time_sig = token.split("-")[1]
                try:
                    parsed_stream.append(meter.TimeSignature(time_sig))
                except:
                    parsed_stream.append(meter.TimeSignature(time_sig[:-1]))
            elif token.startswith("note-"):
                pitch, duration = token.split("-")[1].split("_")
                parsed_stream.append(note.Note(pitch, quarterLength=parse_duration(duration)))
            elif token.startswith("rest-"):
                duration = token.split("-")[1]
                parsed_stream.append(note.Rest(quarterLength=parse_duration(duration)))
            elif token.startswith("barline"):
                parsed_stream.append(stream.Measure())  # Add a barline
        except:
            continue
    
    return parsed_stream

In [4]:
def parse_duration(duration_token):
    match duration_token:
        case "quarter":
            return 1.0
        case "half":
            return 2.0
        case "whole":
            return 4.0
        case "eighth":
            return 0.5
        case "sixteenth":
            return 0.25
        case "thirty_second":
            return 0.125
        case "sixty_fourth":
            return 0.0625
        case t if "dot" in t:
            base_duration = parse_duration(t.split(".")[0])
            return base_duration * 1.5
        case _:
            return 1.0  # Default to quarter note

In [5]:
semantic_tokens = [
    'clef-G2',
    'timeSignature-C',
    'note-E3_quarter',
    'note-D3_quarter',
    'note-C3_quarter',
    'note-D3_quarter.',
    'barline',
    'note-E3_quarter',
    'note-E3_quarter',
    'note-E3_half',
    'barline',
    'note-D3_quarter',
    'note-D3_quarter',
    'note-D3_half',
    'barline',
    'note-E3_quarter',
    'note-G3_quarter',
    'note-G3_half',
    'barline',
    'note-E3_quarter',
    'note-D3_quarter',
    'note-C3_quarter',
    'note-D3_quarter.',
    'barline',
    'note-E3_quarter',
    'note-E3_quarter',
    'note-E3_quarter',
    'note-E3_quarter',
    'barline',
    'note-D3_quarter',
    'note-D3_quarter',
    'note-E3_quarter',
    'note-D3_quarter',
    'barline',
    'note-C3_whole',
    'barline',
    ''
]

# Parse and play
parsed_stream = parse_semantic_encoding(semantic_tokens)
parsed_stream.show('text')  # Show textual representation
parsed_stream.show('midi')  # Play the music

{0.0} <music21.clef.TrebleClef>
{0.0} <music21.meter.TimeSignature 4/4>
{0.0} <music21.note.Note E>
{1.0} <music21.note.Note D>
{2.0} <music21.note.Note C>
{3.0} <music21.note.Note D>
{4.0} <music21.stream.Measure 0 offset=4.0>

{4.0} <music21.note.Note E>
{5.0} <music21.note.Note E>
{6.0} <music21.note.Note E>
{8.0} <music21.stream.Measure 0 offset=8.0>

{8.0} <music21.note.Note D>
{9.0} <music21.note.Note D>
{10.0} <music21.note.Note D>
{12.0} <music21.stream.Measure 0 offset=12.0>

{12.0} <music21.note.Note E>
{13.0} <music21.note.Note G>
{14.0} <music21.note.Note G>
{16.0} <music21.stream.Measure 0 offset=16.0>

{16.0} <music21.note.Note E>
{17.0} <music21.note.Note D>
{18.0} <music21.note.Note C>
{19.0} <music21.note.Note D>
{20.0} <music21.stream.Measure 0 offset=20.0>

{20.0} <music21.note.Note E>
{21.0} <music21.note.Note E>
{22.0} <music21.note.Note E>
{23.0} <music21.note.Note E>
{24.0} <music21.stream.Measure 0 offset=24.0>

{24.0} <music21.note.Note D>
{25.0} <music21.note.

In [33]:
os.chdir("./CRNN_tf")
from ctc_predict import run

In [34]:
example = f"{ROOT}/CRNN_tf/Data/Example/000051652-1_2_1.png"
agnostic_demo = run(f"{ROOT}/agnostic_vocab.txt", f"{ROOT}/CRNN_tf/models/agnostic_model.meta", f"{ROOT}/JingleBells.png")

I0000 00:00:1733775633.947195   41213 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4277 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


'model_variables' collection should be of type 'byte_list', but instead is of type 'node_list'.
INFO:tensorflow:Restoring parameters from /home/muku/Coursework/Sem_5/Deep Learning/project/DL-2024/CRNN_tf/models/agnostic_model


In [35]:
agnostic_demo

['clef.G-L2',
 'digit.4-S5',
 'digit.4-L4',
 'note.quarter-L-2',
 'note.half-L-1',
 'accidental.natural-L0',
 'note.half-L-2',
 'note.half-L1',
 'barline-L1',
 'note.quarter-L-2',
 'note.quarter-L-2',
 'note.half-L-2',
 'note.half-L1',
 'barline-L1']

In [36]:
os.chdir("..")

In [37]:
os.listdir()

['last',
 'package_aa.csv',
 '.gitignore',
 'JingleBells.png',
 'lacrimosa.png',
 'agnostic_vocab.txt',
 'seq2seq',
 '.git',
 '.venv',
 'demo_img.png',
 '.ipynb_checkpoints',
 'semantic_vocab.txt',
 'Demo.ipynb',
 'utils.py',
 'data.csv',
 'CRNN_torch',
 'package_ab.csv',
 'README.md',
 'requirements.txt',
 'CRNN_tf',
 'PrIMuS',
 'pre-model']

In [38]:
from seq2seq.Model_Testing import run

In [40]:
test = run(agnostic_demo)
test

  df_pre = df_filtered.applymap(lambda x: x.split('\t')[0:-1])


Train size: 70142
Validation size: 8768
Test size: 8768


  model.load_state_dict(torch.load(f"{ROOT}/seq2seq/models/best_model_2_257_at.pt"))


Input String:

digit.4-S5 digit.4-L4 note.quarter-L-2 note.half-L-1 accidental.natural-L0 note.half-L-2 note.half-L1 barline-L1 note.quarter-L-2 note.quarter-L-2 note.half-L-2 note.half-L1

Model Output:

clef-G2 timeSignature-4/2 note-F3_quarter note-A3_half note-A3_half note-E4_half barline note-F3_quarter note-F3_quarter note-D5_half note-E4_half barline



['clef-G2',
 'timeSignature-4/2',
 'note-F3_quarter',
 'note-A3_half',
 'note-A3_half',
 'note-E4_half',
 'barline',
 'note-F3_quarter',
 'note-F3_quarter',
 'note-D5_half',
 'note-E4_half',
 'barline']

In [41]:
parsed_stream = parse_semantic_encoding(test)
parsed_stream.show('text')  # Show textual representation
parsed_stream.show('midi')  # Play the music

{0.0} <music21.clef.TrebleClef>
{0.0} <music21.meter.TimeSignature 4/2>
{0.0} <music21.note.Note F>
{1.0} <music21.note.Note A>
{3.0} <music21.note.Note A>
{5.0} <music21.note.Note E>
{7.0} <music21.stream.Measure 0 offset=7.0>

{7.0} <music21.note.Note F>
{8.0} <music21.note.Note F>
{9.0} <music21.note.Note D>
{11.0} <music21.note.Note E>
{13.0} <music21.stream.Measure 0 offset=13.0>

