In [140]:
import pandas as pd
import torch
import torch.nn as nn
import re

from io import StringIO

### Import data into class

In [141]:
class NoteArray(torch.utils.data.Dataset):
    '''
    All the info about the note array
    '''
    
    def __init__(self,notearray_path):
        """
        initialize: convert to df and metadata
        """
        
        with open(notearray_path, "r") as notearray:
            """
            self.lines = whole notearray, line-by-line
            self.notelines = the music part of the notearray
            self.notelines_totals = the totals of each column
            self.blen, self.mlen = total number of beats, measures
            self.df, self.df_totals = df-ization of the above lines (columns = voices+, rows = time)
            self.composer, self.title, self.genre = composer, title, genre
            self.num_voices = number of voices (int)
            """
            
            self.lines = notearray.readlines()
            self.meta_lines = {
                ii:line for ii,line in enumerate(self.lines) if line.count('!') != 0
            }
            start_line_num = min(set(range(len(self.lines)))-set(list(self.meta_lines.keys())))
            end_line_num = max(set(range(len(self.lines))) - set(list(self.meta_lines.keys())))
            notelines =  self.lines[start_line_num : end_line_num+1]
            start_line = notelines[0]
            notelines[0] = start_line.replace('%','')
            
            # totals
            notelines_totals = notelines[:2]            
            
            # just the music itself
            del notelines[1] # deletes the "totals"
            self.notelines = notelines
            
            cleaned_notearray = StringIO('\n'.join(self.notelines))
            cleaned_totals = StringIO('\n'.join(notelines_totals))
            
            df = pd.read_csv(cleaned_notearray, delimiter='\t')
            self.blen = max(df['abs'])
            self.mlen = max(df['bar'])
            # the last row contains these max values only, and nothing about the music itself, so we drop it
            df.drop(df.tail(1).index,inplace=True)
            
            df.rename(columns = {col : col+".0" for col in ['b40', 'attk', 'last', 'next']}, inplace=True)
            self.df = df
            
            df_totals = pd.read_csv(cleaned_totals, delimiter='\t')
            df_totals.rename(columns = {col : col+".0" for col in ['b40', 'attk', 'last', 'next']}, inplace=True)
            self.df_totals = df_totals 
            
            # metadata
            meta_dic = {}
            for line in list(self.meta_lines.values()):
                try:
                    a,b = re.split(r'\t+', line.rstrip('\n'))
                    meta_dic[a] = b
                except:
                    continue
            # self.meta_dic = meta_dic    
            try:
                self.num_voices = int(meta_dic['% !!!voices:'])
            except:
                self.num_voices = int(meta_dic['% !!!voices-ORP:'])
                
            self.composer = meta_dic['% !!!COM:']
            self.title = meta_dic['% !!!OTL:']
            try:
                self.genre = meta_dic['% !!!AGN:']
            except: 
                self.genre = None
            

## Basic Analysis

In [146]:
def create_rhythm_df(note_array,pitchnumbering = 'b40'):
    """
    Input:
        - note_array class
    Output:
        - rhythm_df: a list of dfs (one per voice) listing the occurances of notes in that voice 
            abs: absolute beat of the note
            beat: beat in the measure of the note
            dur : duration of the note
            mdur: duration of the measure containing the note        
    """
    num_voices = note_array.num_voices
    df = note_array.df
    lastbeat = note_array.blen
    
    voices = [pitchnumbering+'.'+str(v) for v in range(num_voices)]
    rhythm_cols = ['mdur', 'beat', 'abs']+voices
    rhythm_data = df[rhythm_cols]
    
    # for each voice, a list of indices (of the array of lines in the note_array) 
    # of when that voice has a new note
    rhythm_index = [
        [i for i in rhythm_data.index if rhythm_data[voices[v]].iloc[i] >=0]
        for v in range(num_voices)
    ]
    # the absolute beat of each new note, per voice
    rhythm_abs = [
                [rhythm_data.iloc[i]['abs'] 
                for i in rhythm_index[v]]
            for v in range(num_voices)
    ]
    # the beat in the measure of each new note, per voice
    rhythm_beat = [
                [rhythm_data.iloc[i]['beat'] 
                for i in rhythm_index[v]]
            for v in range(num_voices)
    ]
    # the duration of the measure containing each new note, per voice
    rhythm_mdur = [
                [rhythm_data.iloc[i]['mdur'] 
                for i in rhythm_index[v]]
            for v in range(num_voices)
    ]
    # the duration of each note, per voice
    # the last duration is calculated back from the beat length
    rhythm_durs = [
                    [rhythm_abs[v][i+1] - rhythm_abs[v][i] for i in range(len(rhythm_abs[v])-1)]
                    +
                    [lastbeat - rhythm_abs[v][-1]]
                    for v in range(num_voices)
    ]

    # synthesis into a list of dfs
    rhythm_df = [
        pd.DataFrame({
        str(v)+'mdur' : rhythm_mdur[v],
        str(v)+'abs' : rhythm_abs[v],
        str(v)+'beat' : rhythm_beat[v], 
        str(v)+'durs' : rhythm_durs[v]
    }) for v in range(num_voices)
    ]
    
    return rhythm_df

# Testing the Scripts

In [143]:
notearray_path = "../jrp.txt"

In [4]:
with open(notearray_path, "r") as in_text:
    lines = in_text.readlines()

In [10]:
for i in range(10):
    print("Line",i,lines[i])

Line 0 % !!!COM:	Anonymous

Line 1 % !!!OTL:	De sartor nui sian maestri

Line 2 % !!!AGN:	Song; Carnival song

Line 3 % !!!SCT:	Ano3237

Line 4 % !!!SCA:	Ano3237

Line 5 % !!!SMS:	Canto dei sartori (Perugia G.20)

Line 6 % !!!folio:	

Line 7 % !!!voices:	4

Line 8 %%idx	%%line	%%bar	%%mdur	%%bdur	%%beat	%%abs	%%b40	%%attk	%%last	%%next	%%b40	%%attk	%%last	%%next	%%b40	%%attk	%%last	%%next	%%b40	%%attk	%%last	%%next

Line 9 0	2000	3000	3100	3200	4000	5000	1040	7100	7200	7300	1040	7100	7200	7300	1040	7100	7200	7300	1040	7100	7200	7300



In [144]:
note_array = NoteArray(notearray_path)
df = note_array.df
df_totals = note_array.df_totals
num_voices = note_array.num_voices
df.head()

Unnamed: 0,idx,line,bar,mdur,bdur,beat,abs,b40.0,attk.0,last.0,...,last.1,next.1,b40.2,attk.2,last.2,next.2,b40.3,attk.3,last.3,next.3
0,1,19,1,8,4,0,0,145,1,-1,...,-1,2,168,1,-1,2,185,1,-1,2
1,2,20,1,8,4,4,4,145,2,1,...,1,3,168,2,1,3,185,2,1,3
2,3,22,2,8,4,0,8,128,3,2,...,2,4,168,3,2,4,191,3,2,4
3,4,23,2,8,4,4,12,145,4,3,...,3,6,0,4,3,5,197,4,3,6
4,5,24,2,8,4,6,14,-145,4,3,...,3,6,185,5,4,6,-197,4,3,6


In [115]:
rhythm_data

Unnamed: 0,mdur,beat,abs,b40.0,b40.1,b40.2,b40.3
0,8,0,0,145,185,168,185
1,8,4,4,145,185,168,185
2,8,0,8,128,179,168,191
3,8,4,12,145,168,0,197
4,8,6,14,-145,-168,185,-197
...,...,...,...,...,...,...,...
84,12,0,292,145,157,168,185
85,12,4,296,128,151,168,-185
86,12,8,300,-128,-151,-168,180
87,12,0,304,168,145,157,185


In [149]:
r_df = create_rhythm_df(note_array)
r_df[0]

Unnamed: 0,0mdur,0abs,0beat,0durs
0,8,0,0,4
1,8,4,4,4
2,8,8,0,4
3,8,12,4,4
4,8,16,0,4
...,...,...,...,...
67,12,280,0,8
68,12,288,8,4
69,12,292,0,4
70,12,296,4,8


## OLD CODE

In [58]:
voices = ['b40.'+str(ii) for ii in range(num_voices)]
rhythm_cols = ['mdur', 'beat', 'abs']+voices
rhythm_data = df[rhythm_cols]
rhythm_data.head()

Unnamed: 0,mdur,beat,abs,b40.0,b40.1,b40.2,b40.3
0,8,0,0,145,185,168,185
1,8,4,4,145,185,168,185
2,8,0,8,128,179,168,191
3,8,4,12,145,168,0,197
4,8,6,14,-145,-168,185,-197


In [70]:
rhythm_durs = []
for v in range(num_voices):
    durs = [rhythm_abs[v][i+1] - rhythm_abs[v][i] for i in range(len(rhythm_abs[v])-1)]
    durs.append(0)
    rhythm_durs.append(durs)

In [85]:
rhythm_df = []
for v in range(num_voices):
    rhythm_df.append(pd.DataFrame({
        str(v)+'mdur' : rhythm_mdur[v][:-1],
        str(v)+'abs' : rhythm_abs[v][:-1],
        str(v)+'beat' : rhythm_beat[v][:-1], 
        str(v)+'durs' : rhythm_durs[v][:-1]
    }))
print(rhythm_df[3])

    3mdur  3abs  3beat  3durs
0       8     0      0      4
1       8     4      4      4
2       8     8      0      4
3       8    12      4      4
4       8    16      0      4
..    ...   ...    ...    ...
71     12   288      8      2
72     12   290     10      2
73     12   292      0      8
74     12   300      8      4
75     12   304      0     12

[76 rows x 4 columns]


In [133]:
# don't need this anymore
rhythm_abs = [
                [rhythm_data.iloc[i]['abs'] 
                for i in rhythm_data.index[:-1] # the last row tells us about the measure after the last measure...
                if rhythm_data[voices[v]].iloc[i] >= 0]
            for v in range(num_voices)]
rhythm_beat = [
                [rhythm_data.iloc[i]['beat'] 
                for i in rhythm_data.index[:-1]
                if rhythm_data[voices[v]].iloc[i] >= 0]
            for v in range(num_voices)]
rhythm_mdur = [
                [rhythm_data.iloc[i]['mdur'] 
                for i in rhythm_data.index[:-1] 
                if rhythm_data[voices[v]].iloc[i] >= 0]
            for v in range(num_voices)]

In [69]:
# ----- DON'T RUN THIS ONE ANYMORE! -----
rhythm_abs = []
rhythm_beat = []
rhythm_mdur = []
for v in range(num_voices):
    voice_abs = []
    voice_beat = []
    voice_mdur = []
    for i in rhythm_data.index:
        if rhythm_data[voices[v]].iloc[i] >= 0:
            voice_abs.append(rhythm_data.iloc[i]['abs'])
            voice_beat.append(rhythm_data.iloc[i]['beat'])
            voice_mdur.append(rhythm_data.iloc[i]['mdur'])
    rhythm_beat.append(voice_beat)    
    rhythm_abs.append(voice_abs)
    rhythm_mdur.append(voice_mdur)
# this removes the first row, which is the "totals"
# rhythm_abs[v], rhythm_beat[v] = rhythm_abs[v][1:], rhythm_beat[v][1:]    