In [None]:
import os, miditoolkit
import numpy as np
import os, pickle
import collections
import subprocess

In [None]:
# Position of notes
double_positions_bins = set([i * 30 for i in range(0, 64)])
triplet_positions_bins = set([i * 40 for i in range(0, 48)])
positions_bins = sorted((double_positions_bins | triplet_positions_bins))  # 并集

# duration bins, default resol = 480 ticks per beat
double_duration = set([i * 30 for i in range(1, 257)])
triplet_duration = set([40, 80, 160, 320, 640])
duration_bins = list(sorted(double_duration | triplet_duration))

In [None]:
def build_dict(save_path):
    ## Build dictionaries for lyrics and melodies
    ## save_path: the directory where the dictionary is output

    ## Create output directories ##
    if os.path.exists(save_path):
        subprocess.check_call(f'rm -rf "{save_path}"', shell=True)
        os.makedirs(save_path)
    else:
        os.makedirs(save_path)
    
    ## Create melody dictionary ##
    melody_dict = collections.defaultdict(list)
    special_tokens = [
        '<s>', ## start of sequence
        '<pad>', ## padding symbol
        '</s>', ## end of sequence
    ]
    strengths = ['<strong>', '<substrong>', '<weak>']   ## syllable strength symbols
    lengths = ['<long>', '<short>'] ## syllable length symbols
    ## prosody symbols
    prosody = [
        '<strong, long>',
        '<strong, short>',
        '<substrong, long>',
        '<substrong, short>',
        '<weak, long>',
        '<weak, short>'
    ]
    
    ## phrase boundary symbols
    for st in special_tokens:
        melody_dict['Phrase'].append(st)
    melody_dict['Phrase'].append('<true>')
    melody_dict['Phrase'].append('<false>')
    
    for st in special_tokens:
        melody_dict['CPProsody'].append(st)
    for pt in prosody:
        melody_dict['CPProsody'].append(pt)
        
        
    max_sent_num = 128
    
    ## ------------ Prosody Template -----------------
    ## special tokens
    for st in special_tokens:
        melody_dict['Prosody'].append(st)
    ## prosody tokens
    for x in strengths:
        melody_dict['Prosody'].append(x)
    for x in lengths:
        melody_dict['Prosody'].append(x)
    melody_dict['Prosody'].append(f"<sent>")
    
    ## strength
    for st in special_tokens:
        melody_dict['Strength'].append(st)
    for s in strengths:
        melody_dict['Strength'].append(s)
        
    ## length
    for st in special_tokens:
        melody_dict['Length'].append(st)
    for l in lengths:
        melody_dict['Length'].append(l)
    
    for st in special_tokens:
        melody_dict['Token'].append(st)

    ## ------------ Melody Attribute Symbols -----------------

    melody_dict['Token'].append(f"Bar")
    for pos in positions_bins:
        melody_dict['Token'].append(f"Pos_{pos}")
    for pitch in range (0, 128):
        melody_dict['Token'].append(f"Pitch_{pitch}")
    for dur in duration_bins:
        melody_dict['Token'].append(f"Dur_{dur}") 
    melody_dict['Token'].append(f"<sent>")
    
    ## bar
    for st in special_tokens:
        melody_dict['Bar'].append(st)
    for i in range (0, 256):
        melody_dict['Bar'].append(f"Bar_{i}")
    
    ## position
    for st in special_tokens:
        melody_dict['Pos'].append(st)
    for pos in positions_bins:
        melody_dict['Pos'].append(f"Pos_{pos}")
    
    ## pitch
    for st in special_tokens:
        melody_dict['Pitch'].append(st)
    for pitch in range (0, 128):
        melody_dict['Pitch'].append(f"Pitch_{pitch}")
    
    ## duration
    for st in special_tokens:
        melody_dict['Dur'].append(st)
    for dur in duration_bins:
        melody_dict['Dur'].append(f"Dur_{dur}") 

    for k, v in melody_dict.items():
         print(f"{k:<15s} : {v}\n")
  
    # melody dictionary
    event2word, word2event = {}, {}
    melody_class = melody_dict.keys()

    for cls in melody_class:
        event2word[cls] = {v:k for k,v in enumerate(melody_dict[cls])}
        word2event[cls] = {k:v for k,v in enumerate(melody_dict[cls])}
            
    pickle.dump((event2word, word2event), open(f'{save_path}/music_dict.pkl', 'wb'))
    
    # print dictionary size
    print('Melody Dict [class size]')
    for key in melody_class:
        print('> {:20s} : {}'.format(key, len(event2word[key])))

    return event2word, word2event

In [None]:
dict_dir = './binary'  ## replace with your own path
build_dict(dict_dir)