In [1]:
import GridMLM_tokenizers
from GridMLM_tokenizers import CSGridMLMTokenizer
from models import make_graph_from_input_ids, remove_consecutive_duplicates, remove_out_of_dict_ids, compute_edge_features
from data_utils import CSGridMLMDataset
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
tokenizer = CSGridMLMTokenizer(fixed_length=256)
chord_features = GridMLM_tokenizers.CHORD_FEATURES
chord_id_features = {tokenizer.vocab[k]: v for k, v in chord_features.items()}
print(chord_features)

{'C:maj': [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0], 'C#:maj': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0], 'D:maj': [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0], 'D#:maj': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0], 'E:maj': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], 'F:maj': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0], 'F#:maj': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0], 'G:maj': [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1], 'G#:maj': [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0], 'A:maj': [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0], 'A#:maj': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0], 'B:maj': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1]

In [3]:
print(chord_features['C:maj'])
print(chord_features['D:maj'])

[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]
[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0]


In [4]:
print(chord_id_features[7])

[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]


In [5]:
# train_dir = '/mnt/ssd2/maximos/data/hooktheory_midi_hr/CA_train'
# val_dir = '/mnt/ssd2/maximos/data/hooktheory_midi_hr/CA_test'
train_dir = '/media/maindisk/data/hooktheory_midi_hr/CA_train'
val_dir = '/media/maindisk/data/hooktheory_midi_hr/CA_test'

In [6]:
# train_dataset = GuidedGridMLMDataset(train_dir, tokenizer, 512, frontloading=True)
val_dataset = CSGridMLMDataset(val_dir, tokenizer, frontloading=True, name_suffix='Q4_L80_bar_PC')

Loading data file.


In [7]:
print(val_dataset[0])

{'harmony_ids': [6, 269, 269, 269, 269, 6, 152, 152, 66, 66, 6, 269, 269, 269, 269, 6, 152, 152, 66, 66, 6, 269, 269, 269, 269, 6, 152, 152, 66, 66, 6, 269, 269, 269, 269, 6, 152, 152, 66, 66, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'pianoroll': array([[0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 1., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32), 'time_signature': [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], 'h_density_complexity': [0, 1, 0, 0, 1, 0, 0, 0]}


In [8]:
chord_id_duplicates_sequence = val_dataset[6]['harmony_ids']

In [9]:
print(chord_id_duplicates_sequence)

[6, 7, 7, 7, 210, 6, 210, 210, 210, 210, 6, 269, 269, 269, 13, 6, 13, 13, 13, 13, 6, 152, 152, 152, 152, 6, 152, 152, 152, 152, 6, 7, 7, 7, 7, 6, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


In [10]:
chord_ids_sequence = remove_consecutive_duplicates( chord_id_duplicates_sequence )
print(chord_ids_sequence)
chord_ids_sequence = remove_out_of_dict_ids( chord_ids_sequence, chord_id_features )
print(chord_ids_sequence)
unique_chord_ids_sequence = list(set(chord_ids_sequence))
print(unique_chord_ids_sequence)
for c in unique_chord_ids_sequence:
    print(c, tokenizer.ids_to_tokens[c], chord_id_features[c])

[6, 7, 210, 6, 210, 6, 269, 13, 6, 13, 6, 152, 6, 152, 6, 7, 6, 7, 1]
[7, 210, 210, 269, 13, 13, 152, 152, 7, 7]
[7, 13, 269, 210, 152]
7 C:maj [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]
13 C:7 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0]
269 A:min [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0]
210 G:maj [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1]
152 F:maj [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0]


In [11]:
src_id = unique_chord_ids_sequence[0]
dst_id = unique_chord_ids_sequence[1]

src_feat = torch.LongTensor(chord_id_features[src_id])
dst_feat = torch.LongTensor(chord_id_features[dst_id])

feats = compute_edge_features(src_feat, dst_feat)
print(src_id, tokenizer.ids_to_tokens[src_id], src_feat)
print(dst_id, tokenizer.ids_to_tokens[dst_id], dst_feat)
print('Features: ', feats)

7 C:maj tensor([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0])
13 C:7 tensor([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0])
Features:  tensor([0.0000, 0.7500])


In [12]:
# chord_id_duplicates_sequence = [10,11,12,10]
g = make_graph_from_input_ids(chord_id_duplicates_sequence, chord_id_features, use_probabilities=True)

In [13]:
print(chord_id_duplicates_sequence)
# print(g)
print(g.x)
print(g.node_ids)
print(g.edge_index)
print(g.edge_attr)
print(g.edge_weight)

[6, 7, 7, 7, 210, 6, 210, 210, 210, 210, 6, 269, 269, 269, 13, 6, 13, 13, 13, 13, 6, 152, 152, 152, 152, 6, 152, 152, 152, 152, 6, 7, 7, 7, 7, 6, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0.,
         0., 1., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0.,
         0., 1., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 1., 0.,
         0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
         0., 1., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1.,
         0., 0., 0., 1., 0., 0.]])
{0: 7, 1: 13, 2: 269, 3: 210, 4: 152}
tensor([[0, 3, 3, 2, 1, 1, 4, 4, 0],
        [3, 3, 2, 1, 1, 4, 4, 0, 0]])
tensor([[0.1667, 0.2000],
       