In [50]:
import os, miditoolkit
import numpy as np
import os, pickle
import collections
import subprocess

In [51]:
# Position
double_positions_bins = set([i * 30 for i in range(0, 64)])
triplet_positions_bins = set([i * 40 for i in range(0, 48)])
positions_bins = sorted((double_positions_bins | triplet_positions_bins))  # 并集

# duration bins, default resol = 480 ticks per beat
double_duration = set([i * 30 for i in range(1, 257)])
triplet_duration = set([40, 80, 160, 320, 640])
duration_bins = list(sorted(double_duration | triplet_duration))

In [52]:
def build_dict(save_path):
    # create save dir
    if os.path.exists(save_path):
        subprocess.check_call(f'rm -rf "{save_path}"', shell=True)  # 运行由args参数提供的命令，等待命 执行结束并返回返回码。
        os.makedirs(save_path)
    else:
        os.makedirs(save_path)
    
    # create melody dictionary
    melody_dict = collections.defaultdict(list)
    
    special_tokens = [
        '<pad>',
        '<s>', ## start of sequence
        '</s>', ## end of sequence
        '<unk>', ## unknown
    ]
    strengths = ['<strong>', '<substrong>', '<weak>']
    lengths = ['<long>', '<short>']
    prosody = [
        '<strong, long>',
        '<strong, short>',
        '<substrong, long>',
        '<substrong, short>',
        '<weak, long>',
        '<weak, short>'
    ]
    
    ## phrase boundary
    for st in special_tokens:
        melody_dict['Phrase'].append(st)
    melody_dict['Phrase'].append('<true>')
    melody_dict['Phrase'].append('<false>')
    
    for st in special_tokens:
        melody_dict['CPProsody'].append(st)
    for pt in prosody:
        melody_dict['CPProsody'].append(pt)
        
        
    max_sent_num = 128
    
    ## ------------ Prosody Template -----------------
    ## special tokens
    for st in special_tokens:
        melody_dict['Prosody'].append(st)
    ## prosody tokens
    for x in strengths:
        melody_dict['Prosody'].append(x)
    for x in lengths:
        melody_dict['Prosody'].append(x)
    """
    ## for basic melody
    for pitch in range (0, 128):
        melody_dict['Prosody'].append(f"Pitch_{pitch}")
    ## for rhythm templates
    ## bar
    melody_dict['Prosody'].append(f"Bar")
    ## position
    for pos in positions_bins:
        melody_dict['Prosody'].append(f"Pos_{pos}")
    """
    ## note numbers
    """
    for x in range(200):
        melody_dict['Prosody'].append(f"<note_{x}>")
    """
    ## for basic melody
    for pitch in range (0, 128):
        melody_dict['Prosody'].append(f"Pitch_{pitch}")
    ## for phrase boundary
    """
    for pid in range(max_sent_num):
        melody_dict['Prosody'].append(f"<sent_{pid}>")
    """
    melody_dict['Prosody'].append(f"<sent>")
    
    
    ## strength
    for st in special_tokens:
        melody_dict['Strength'].append(st)
    for s in strengths:
        melody_dict['Strength'].append(s)
        
    ## length
    for st in special_tokens:
        melody_dict['Length'].append(st)
    for l in lengths:
        melody_dict['Length'].append(l)
    
    ## phrases
    """
    for st in special_tokens:
        melody_dict['Phrase'].append(st)
    for i in range(294):
        melody_dict['Phrase'].append(f"Phrase_{i}")
        """;
    
    for st in special_tokens:
        melody_dict['Token'].append(st)
    # for i in range (0, 256):
    melody_dict['Token'].append(f"Bar")
    for pos in positions_bins:
        melody_dict['Token'].append(f"Pos_{pos}")
    for pitch in range (0, 128):
        melody_dict['Token'].append(f"Pitch_{pitch}")
    for dur in duration_bins:
        melody_dict['Token'].append(f"Dur_{dur}") 
    """
    for pid in range(max_sent_num):
        melody_dict['Token'].append(f"<sent_{pid}>")
    """
    melody_dict['Token'].append(f"<sent>")
    
    ## bar
    for st in special_tokens:
        melody_dict['Bar'].append(st)
    for i in range (0, 256):
        melody_dict['Bar'].append(f"Bar_{i}")
    
    ## position
    for st in special_tokens:
        melody_dict['Pos'].append(st)
    for pos in positions_bins:
        melody_dict['Pos'].append(f"Pos_{pos}")
    
    ## pitch
    for st in special_tokens:
        melody_dict['Pitch'].append(st)
    for pitch in range (0, 128):
        melody_dict['Pitch'].append(f"Pitch_{pitch}")
    
    ## duration
    for st in special_tokens:
        melody_dict['Dur'].append(st)
    for dur in duration_bins:
        melody_dict['Dur'].append(f"Dur_{dur}") 

    for k, v in melody_dict.items():
         print(f"{k:<15s} : {v}\n")
  
    # melody dictionary
    event2word, word2event = {}, {}
    melody_class = melody_dict.keys()

    for cls in melody_class:
        event2word[cls] = {v:k for k,v in enumerate(melody_dict[cls])}
        word2event[cls] = {k:v for k,v in enumerate(melody_dict[cls])}
            
    pickle.dump((event2word, word2event), open(f'{save_path}/music_dict.pkl', 'wb'))
    
    # print
    print('Melody Dict [class size]')
    for key in melody_class:
        print('> {:20s} : {}'.format(key, len(event2word[key])))

    return event2word, word2event

In [53]:
build_dict('/home/qihao/CS6207/binary')

Phrase          : ['<pad>', '<s>', '</s>', '<unk>', '<true>', '<false>']

CPProsody       : ['<pad>', '<s>', '</s>', '<unk>', '<strong, long>', '<strong, short>', '<substrong, long>', '<substrong, short>', '<weak, long>', '<weak, short>']

Prosody         : ['<pad>', '<s>', '</s>', '<unk>', '<strong>', '<substrong>', '<weak>', '<long>', '<short>', 'Pitch_0', 'Pitch_1', 'Pitch_2', 'Pitch_3', 'Pitch_4', 'Pitch_5', 'Pitch_6', 'Pitch_7', 'Pitch_8', 'Pitch_9', 'Pitch_10', 'Pitch_11', 'Pitch_12', 'Pitch_13', 'Pitch_14', 'Pitch_15', 'Pitch_16', 'Pitch_17', 'Pitch_18', 'Pitch_19', 'Pitch_20', 'Pitch_21', 'Pitch_22', 'Pitch_23', 'Pitch_24', 'Pitch_25', 'Pitch_26', 'Pitch_27', 'Pitch_28', 'Pitch_29', 'Pitch_30', 'Pitch_31', 'Pitch_32', 'Pitch_33', 'Pitch_34', 'Pitch_35', 'Pitch_36', 'Pitch_37', 'Pitch_38', 'Pitch_39', 'Pitch_40', 'Pitch_41', 'Pitch_42', 'Pitch_43', 'Pitch_44', 'Pitch_45', 'Pitch_46', 'Pitch_47', 'Pitch_48', 'Pitch_49', 'Pitch_50', 'Pitch_51', 'Pitch_52', 'Pitch_53', 'Pitch_54', 

({'Phrase': {'<pad>': 0,
   '<s>': 1,
   '</s>': 2,
   '<unk>': 3,
   '<true>': 4,
   '<false>': 5},
  'CPProsody': {'<pad>': 0,
   '<s>': 1,
   '</s>': 2,
   '<unk>': 3,
   '<strong, long>': 4,
   '<strong, short>': 5,
   '<substrong, long>': 6,
   '<substrong, short>': 7,
   '<weak, long>': 8,
   '<weak, short>': 9},
  'Prosody': {'<pad>': 0,
   '<s>': 1,
   '</s>': 2,
   '<unk>': 3,
   '<strong>': 4,
   '<substrong>': 5,
   '<weak>': 6,
   '<long>': 7,
   '<short>': 8,
   'Pitch_0': 9,
   'Pitch_1': 10,
   'Pitch_2': 11,
   'Pitch_3': 12,
   'Pitch_4': 13,
   'Pitch_5': 14,
   'Pitch_6': 15,
   'Pitch_7': 16,
   'Pitch_8': 17,
   'Pitch_9': 18,
   'Pitch_10': 19,
   'Pitch_11': 20,
   'Pitch_12': 21,
   'Pitch_13': 22,
   'Pitch_14': 23,
   'Pitch_15': 24,
   'Pitch_16': 25,
   'Pitch_17': 26,
   'Pitch_18': 27,
   'Pitch_19': 28,
   'Pitch_20': 29,
   'Pitch_21': 30,
   'Pitch_22': 31,
   'Pitch_23': 32,
   'Pitch_24': 33,
   'Pitch_25': 34,
   'Pitch_26': 35,
   'Pitch_27': 36,
  

In [45]:
# Create a new tokenizer with the desired vocabulary
"""
templ_vocab = ['<pad>', '<s>', '</s>', '<unk>', 
               '<strong,long>', '<substrong,long>', '<weak,long>', 
               '<strong,short>', '<substrong,short>', '<weak,short>', 
               '<stop>']
"""
templ_vocab = ['<pad>', '<s>', '</s>', '<unk>', 
               '<strong>', '<substrong>', '<weak>', 
               '<long>', '<short>', 
               '<stop>']
templ_tokenizer = tokenizers.CharBPETokenizer()
templ_tokenizer.train_from_iterator(templ_vocab)

NameError: name 'tokenizers' is not defined