In [81]:
# check the percentages of random same-bar chords in generated without constraints
import pandas as pd
import numpy as np

In [82]:
bart_cs_df = pd.read_csv('tokenized/no_bart_beam_7_temp_0x0/ChordSymbolTokenizer.csv')
bart_pc_df = pd.read_csv('tokenized/no_bart_beam_7_temp_0x0/PitchClassTokenizer.csv')
gpt_cs_df = pd.read_csv('tokenized/no_gpt_beam_7_temp_0x0/ChordSymbolTokenizer.csv')
gpt_pc_df = pd.read_csv('tokenized/no_gpt_beam_7_temp_0x0/PitchClassTokenizer.csv')

In [83]:
def is_sublist_contiguous(q, d):
    q_len = len(q)
    for i in range(len(d) - q_len + 1):
        if d[i:i + q_len] == q:
            return True
    return False

In [84]:
def check_random_chord_coinsidence(real, gen, mode='cs'):
    real_tokens = real.split()
    gen_tokens = gen.split()
    # get number of bars
    num_bars = min(real_tokens.count('<bar>') , gen_tokens.count('<bar>') )
    if num_bars <= 0:
        return 0
    # assume no chord token as constraint so far
    chord_token = None
    position_token = None
    pcs = []
    # Find indices of all bars
    real_indices = [i for i, val in enumerate(real_tokens) if val == '<bar>']
    gen_indices = [i for i, val in enumerate(gen_tokens) if val == '<bar>']
    bar_index = -1
    tries = 10
    chord_token = None
    while chord_token == None and tries > 0:
        tries -= 1
        # get a random bar
        rand_bar_num = np.random.randint( num_bars )
        # REAL: Get the index of the rand_bar_num occurrence (zero-based index)
        if len(real_indices) > rand_bar_num+1:
            real_bar_index = real_indices[rand_bar_num]
            real_next_bar_index = real_indices[rand_bar_num+1]
        else:
            # check if there are any bars at all
            if len(real_indices) == 0:
                print('This piece has no bars.')
                break
            # the last bar
            real_bar_index = real_indices[-1]
            real_next_bar_index = len(real_tokens)
        # get all tokens between rand_bar and its next
        real_bar_tokens = real_tokens[real_bar_index:real_next_bar_index]
        # check if bar has a chord
        i = 0
        while i < len(real_bar_tokens):
            if 'position_' in real_bar_tokens[i]:
                # keep position token
                position_token = real_bar_tokens[i]
                if mode == 'cs':
                    if i+1 < len(real_bar_tokens):
                        chord_token = real_bar_tokens[i+1]
                    break
                elif mode == 'pc':
                    if i+1 < len(real_bar_tokens):
                        i += 1
                        while i < len(real_bar_tokens) and 'bar' not in real_bar_tokens[i] and \
                            'position' not in real_bar_tokens[i] and \
                            '</s>' not in real_bar_tokens[i]:
                            if 'chord_pc_' in real_bar_tokens[i]:
                                pcs.append( real_bar_tokens[i] )
                            i += 1
            i += 1
            if position_token is not None:
                break
        # end bar while
    # end tries while
    if mode == 'cs':
        if position_token is not None and chord_token is not None:
            real_target = [position_token, chord_token]
        else:
            real_target = ['x']
    elif mode == 'pc':
        if position_token is not None and len(pcs) > 0:
            real_target = [position_token] + pcs
        else:
            real_target = ['x']
    # GEN: Get the index of the rand_bar_num occurrence (zero-based index)
    if len(gen_indices) > rand_bar_num+1:
        gen_bar_index = gen_indices[rand_bar_num]
        gen_next_bar_index = gen_indices[rand_bar_num+1]
    else:
        # check if there are any bars at all
        if len(gen_indices) == 0:
            print('This piece has no bars.')
        # the last bar
        gen_bar_index = gen_indices[-1]
        gen_next_bar_index = len(gen_tokens)
    # get all tokens between rand_bar and its next
    gen_bar_tokens = gen_tokens[gen_bar_index:gen_next_bar_index]
    
    res = is_sublist_contiguous(real_target, gen_bar_tokens)
    return res

In [85]:
def get_success_of_df(df_in, mode='cs'):
    successes = 0
    total = 0
    for i in range( len(df_in['real']) ):
        successes += check_random_chord_coinsidence( df_in['real'].iloc[i], df_in['generated'].iloc[i] , mode=mode )
        total += 1
    return successes/total

In [97]:
bart_cs_successes = 0
bart_pc_successes = 0
gpt_cs_successes = 0
gpt_pc_successes = 0
total_runs = 100

for i in range( total_runs ):
    print(i, end='\r')
    bart_cs_successes += get_success_of_df(bart_cs_df, mode='cs')/total_runs
    bart_pc_successes += get_success_of_df(bart_pc_df, mode='pc')/total_runs
    gpt_cs_successes += get_success_of_df(gpt_cs_df, mode='cs')/total_runs
    gpt_pc_successes += get_success_of_df(gpt_pc_df, mode='pc')/total_runs

print('bart_cs_successes:', bart_cs_successes)
print('bart_pc_successes:', bart_pc_successes)
print('gpt_cs_successes:', gpt_cs_successes)
print('gpt_pc_successes:', gpt_pc_successes)

bart_cs_successes: 0.27292763157894734
bart_pc_successes: 0.20171710526315795
gpt_cs_successes: 0.22133552631578945
gpt_pc_successes: 0.2181184210526315
