# Sequence Dictionary

This python notebook will determine the frequency of an n-long sequence in a text file formatted as in the List Generator notebook.

In [1]:
def build_n_sequence_dictionary(n : int, txt : str) -> dict:
    """
    Given a length n, returns a dictionary containing sequences
    in txt of length n mapped to the number of times they appear
    in txt.
    """
    seq_dict = dict()
    
    blocks = txt.split('\n')
    for block in blocks:
        elems = block.split(',')
        for i in range(len(elems) - n):
            seq = elems[i:i+n]
            seq_dict[tuple(seq)] = seq_dict.get(tuple(seq), 0) + 1
            
    return seq_dict            

In [2]:
# test

txt = "vi,V,I,IV,I,V,V,i,V,i,VI,V,i,V,i,i,i,I,IV,ii,vii,I,vi,I,I,IV,vii,I,vi,vi,I,IV,IV,I,I,IV,ii,V,I,V,V,ii,ii,vi,IV,V,I,I,V,V,ii,VI,VI,iv,V,V,i,V,I,V,IV,I,vi,V,I,I,i,i,VII,VII,iv,VI,i,V,i,ii,V,I,vi,V,vi,V,vi,i,V,I,V,ii,V,I,V,vi,i,i,V,i,V,I,i,V,i,V,i,V,I,ii,I,ii,V,I,I,vi,V,II,V,I,IV,ii,vii,V,i,V,I,i,V,i,V,I,ii,I,ii,V,I,ii,I,ii,V,I,I,vi,vi,II,V,I,vi,V,i,V,I,i,i,i,i,i,ii,V,i,V,i,i,i,v,IV,III,V,i,V,i,i,II,III,v,IV,i,vii,i,i,V,V,V,I,vii,I,I,vii,I,V,V,i,VII,v,i,V,V,i,V,I,I,V,vi,V,IV,ii,V,i,V,i,I,IV,IV,I,ii,I,ii,V,i,V,i,V,vi,ii,V,IV,vii,iii,IV,V,V,i,V,iii,vi,ii,V,I,I,vi,ii,I,IV,V,I,V,I,IV,V,vi,iii,i,V,i,VII,v,iv,III,iv,iv,III,I,ii,v,i,V,I"

seq_dict = build_n_sequence_dictionary(3, txt)
# print(seq_dict)
print(sorted(seq_dict.items(), key=lambda kv:(kv[1], kv[0]), reverse=True))

[(('V', 'i', 'V'), 16), (('i', 'V', 'i'), 12), (('i', 'V', 'I'), 8), (('ii', 'V', 'I'), 7), (('V', 'I', 'I'), 6), (('i', 'i', 'i'), 5), (('V', 'V', 'i'), 5), (('V', 'I', 'V'), 5), (('ii', 'I', 'ii'), 4), (('I', 'vi', 'V'), 4), (('I', 'ii', 'V'), 4), (('I', 'ii', 'I'), 4), (('I', 'V', 'V'), 4), (('ii', 'V', 'i'), 3), (('V', 'i', 'i'), 3), (('V', 'I', 'ii'), 3), (('V', 'I', 'i'), 3), (('V', 'I', 'IV'), 3), (('I', 'IV', 'ii'), 3), (('I', 'I', 'vi'), 3), (('vii', 'I', 'vi'), 2), (('vi', 'ii', 'V'), 2), (('vi', 'V', 'vi'), 2), (('vi', 'V', 'I'), 2), (('v', 'i', 'V'), 2), (('i', 'ii', 'V'), 2), (('i', 'i', 'V'), 2), (('i', 'VII', 'v'), 2), (('i', 'V', 'V'), 2), (('i', 'I', 'IV'), 2), (('V', 'vi', 'i'), 2), (('V', 'vi', 'V'), 2), (('V', 'i', 'VII'), 2), (('V', 'V', 'ii'), 2), (('V', 'I', 'vi'), 2), (('IV', 'ii', 'vii'), 2), (('IV', 'ii', 'V'), 2), (('IV', 'V', 'I'), 2), (('IV', 'IV', 'I'), 2), (('II', 'V', 'I'), 2), (('I', 'vii', 'I'), 2), (('I', 'vi', 'vi'), 2), (('I', 'i', 'i'), 2), (('I', 

## Using the Datasets

In [65]:
def progression_conventionality(prog: str, composer: str, with_inversions=False) -> str:
    '''
    Given a chord progression formatted as roman numerals separated by commas
    (if with_inversions=True, parentheses containing the inversion number, 0 
    being the root inversion, follow each roman numeral) and a composer 
    (currently 'bach' and 'monteverdi' are the only ones supported),
    returns string message containing number of times the given progression
    appears in the composer's repertoire.
    '''
    # reads the correct dataset
    txt = None
    if composer == 'bach':
        if with_inversions:
            txt = open("inv-dataset-bach.txt","r").read()
        else:
            txt = open("simple-dataset-bach.txt","r").read()
    elif composer == 'monteverdi':
        if with_inversions:
            txt = open("inv-dataset-monteverdi.txt","r").read()
        else:
            txt = open("simple-dataset-monteverdi.txt","r").read()
    else:
        raise NotImplementedError("The composer you've entered is not in the database.")
        
    # creates the dictionary given the progression
    prog_list = tuple(prog.split(','))
    seq_dict = build_n_sequence_dictionary(len(prog_list), txt)
    total_progs = sum(seq_dict.values())
    try:
        num_times = seq_dict[prog_list]
    except:
        raise ValueError('The progression you provided is not in the repertoire.')
    
    # returns message with information
    msg = ''
    if num_times == 1:
        msg = composer + ' uses [' + prog + '] ' + str(num_times) + ' time in their repertoire.\n'
    else: 
        msg = composer + ' uses [' + prog + '] ' + str(num_times) + ' times in their repertoire.\n'
    msg += 'Compared to other length-' + str(len(prog_list)) + ' progressions, [' + prog + '] is used {:.2f}'.format(num_times/total_progs*100) + '%.'
    return msg

In [71]:
# test without inversion
prog = 'V,I'
print(progression_conventionality(prog, 'monteverdi'))

monteverdi uses [V,I] 446 times in their repertoire.
Compared to other length-2 progressions, [V,I] is used 6.74%.


In [70]:
# test with inversion
prog = 'V(0),I(0)'
print(progression_conventionality(prog, 'monteverdi', True))

monteverdi uses [V(0),I(0)] 312 times in their repertoire.
Compared to other length-2 progressions, [V(0),I(0)] is used 4.72%.


## Stretch Goals
* automate roman numeral analysis for music21 parseable composers
* return locations of matching progressions in scores