In [1]:
# USER OPTIONS
# define tokenizer name - should be one among the keys in the cell below
# tokenizer_name = 'ChordSymbolTokenizer' # or any other name from the keys in tokenizers dictionary
# tokenizer_name = 'RootTypeTokenizer'
tokenizer_name = 'PitchClassTokenizer'
csvs_folder = 'tokenized/gpt/'

In [2]:
from data_utils import MergedMelHarmDataset, PureGenCollator
import os
import numpy as np
from harmony_tokenizers_m21 import ChordSymbolTokenizer, RootTypeTokenizer, \
    PitchClassTokenizer, RootPCTokenizer, GCTRootPCTokenizer, \
    GCTSymbolTokenizer, GCTRootTypeTokenizer, MelodyPitchTokenizer, \
    MergedMelHarmTokenizer
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
tokenizers = {
    'ChordSymbolTokenizer': ChordSymbolTokenizer,
    'RootTypeTokenizer': RootTypeTokenizer,
    'PitchClassTokenizer': PitchClassTokenizer,
    'RootPCTokenizer': RootPCTokenizer,
    'GCTRootPCTokenizer': GCTRootPCTokenizer,
    'GCTSymbolTokenizer': GCTSymbolTokenizer,
    'GCTRootTypeTokenizer': GCTRootTypeTokenizer
}

In [4]:
melody_tokenizer = MelodyPitchTokenizer.from_pretrained('saved_tokenizers/MelodyPitchTokenizer')
harmony_tokenizer = tokenizers[tokenizer_name].from_pretrained('saved_tokenizers/' + tokenizer_name)

tokenizer = MergedMelHarmTokenizer(melody_tokenizer, harmony_tokenizer)

In [5]:
c = pd.read_csv( csvs_folder + tokenizer_name + '.csv' )

In [6]:
x = c['melody'].iloc[0].split() + c['real'].iloc[0].split()
print(x)

['<s>', '<bar>', 'ts_4x4', 'position_0x00', 'P:69', 'position_2x00', 'P:69', 'position_2x25', 'P:71', 'position_2x50', 'P:72', '<bar>', 'position_0x00', 'P:74', 'position_2x00', 'P:74', 'position_2x25', 'P:76', 'position_2x50', 'P:77', '<bar>', 'position_0x00', 'P:69', 'position_2x00', 'P:69', 'position_2x25', 'P:71', 'position_2x50', 'P:72', '<bar>', 'position_0x00', 'P:74', 'position_2x00', 'P:74', 'position_2x25', 'P:72', 'position_2x50', 'P:71', 'position_2x75', 'P:69', 'position_3x00', '<rest>', '<bar>', 'position_0x00', 'P:69', 'position_2x00', 'P:69', 'position_2x25', 'P:71', 'position_2x50', 'P:72', '<bar>', 'position_0x00', 'P:74', 'position_2x00', 'P:74', 'position_2x25', 'P:76', 'position_2x50', 'P:77', '<bar>', 'position_0x00', 'P:69', 'position_2x00', 'P:69', 'position_2x25', 'P:71', 'position_2x50', 'P:72', '<bar>', 'position_0x00', 'P:74', 'position_2x00', 'P:74', 'position_2x25', 'P:72', 'position_2x50', 'P:71', 'position_2x75', 'P:69', 'position_3x00', '<rest>', '<h>',

In [7]:
tokenizer.decode(x)

['A', 'min']
['C', 'maj']
['D', 'min']
['F', 'maj']
['A', 'min']
['C', 'maj']
['D', 'min']
['F', 'maj']
['A', 'min']
['C', 'maj']
['D', 'min']
['F', 'maj']
['A', 'min']
['C', 'maj']
['D', 'min']
['F', 'maj']
{0.0} <music21.stream.Measure 1 offset=0.0>
    {0.0} <music21.meter.TimeSignature 4/4>
    {0.0} <music21.note.Note A>
    {2.0} <music21.note.Note A>
    {2.25} <music21.note.Note B>
    {2.5} <music21.note.Note C>
{4.0} <music21.stream.Measure 2 offset=4.0>
    {0.0} <music21.note.Note D>
    {2.0} <music21.note.Note D>
    {2.25} <music21.note.Note E>
    {2.5} <music21.note.Note F>
{8.0} <music21.stream.Measure 3 offset=8.0>
    {0.0} <music21.note.Note A>
    {2.0} <music21.note.Note A>
    {2.25} <music21.note.Note B>
    {2.5} <music21.note.Note C>
{12.0} <music21.stream.Measure 4 offset=12.0>
    {0.0} <music21.note.Note D>
    {2.0} <music21.note.Note D>
    {2.25} <music21.note.Note C>
    {2.5} <music21.note.Note B>
    {2.75} <music21.note.Note A>
    {3.0} <music21.no