In [1]:
import numpy as np
import pandas as pd
import ms3 as ms
from preprocessing import transform_chords_abs
import warnings

# Importing development version of dimcat
import dimcat as dc

warnings.simplefilter(action='ignore', category=FutureWarning)

pd.options.mode.chained_assignment = None

In [2]:
# Load ABC Corpus
# This takes some time because it parses the original data, not the preprocessed tsv files
corpus = dc.Corpus()
corpus.load("../../ABC", parse_tsv=False, parse_scores=True) # make sure to parse directly from MuseScore files
corpus.data

labels = corpus.get_facet("expanded")

# Process slices 
salami_crp = dc.NoteSlicer().process_data(corpus)
salami_notes = salami_crp.get_facet("notes")

	The incomplete MC 221 (timesig 1, act_dur 3/4) is completed by 1 incorrect duration (expected: 1/4):
	{79: Fraction(1, 1)}
	The incomplete MC 67 (timesig 1, act_dur 5/8) is completed by 1 incorrect duration (expected: 3/8):
	{68: Fraction(1, 1)}
	The incomplete MC 40 (timesig 3/2, act_dur 7/8) is completed by 1 incorrect duration (expected: 5/8):
	{41: Fraction(3, 8)}
	The incomplete MC 79 (timesig 1, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/2):
	{80: Fraction(3, 4)}
	Score contains 1 labels that don't (and 464 that do) match the DCML standard:
	    mc  mn      label harmony_layer
	22  18  18  V7[viio64             1
DCML_HARMONY_INCOMPLETE_PEDAL_COLUMN_ERROR (13,) ms3.Parse.ABC.n06op18-6_04.mscx -- /opt/homebrew/lib/python3.10/site-packages/ms3-0.5.3.post0.dev224+ga917290-py3.10.egg/ms3/expand_dcml.py (line 155) expand_labels():
	propagate_pedal() failed with
	1 organ points started, 2 ended:
	     mc pedal   mc pedalend
	0   106     I   20        ]
	1  <NA>   N

# Helper Functions


In [28]:
def to_pitch(midi, tpc):
    pitch_class = ms.fifths2name(tpc)
    octave = str(midi // 12)
    return pitch_class + octave 

def get_chord_offset(numeral: str, globalkey_is_minor):
    alteration = (numeral.count("#") - numeral.count("b")) * 7

    numeral = numeral.strip("#b")
    numeral = numeral.upper()#
    numeral_to_interval_major = {"I": 0, "II": 2, "III": 4, "IV": 5, "V":1, "VI":3, "VII":5}
    numeral_to_interval_minor = {"I": 0, "II": 2, "III": 9, "IV": 5, "V":1, "VI":8, "VII":10}

    if globalkey_is_minor:
        return (numeral_to_interval_minor[numeral] + alteration) % 7
    else:
        return (numeral_to_interval_major[numeral] + alteration) % 7

def interval_union(i1,i2):
    return pd.Interval(i1.left,i2.right,'left')

def transform_chords_abs(df):
    df['rootoffset'] = df.apply(lambda x: int(get_chord_offset(x.numeral,x.globalkey_is_minor)), axis = 1)

In [30]:
# Returns two dataframes, one for the chords, one for the slices
def preprocessPiece(piece : str, labels):

    # zoom in on the chords in one piece
    chords = labels.loc[('ABC', piece)]

    # Translate labels to absolute pitches

    desired_chord_columns = ['chord','pedal','numeral','form','figbass','changes','relativeroot','localkey','globalkey']
    chordz = chords.copy().reset_index()
    chordz = chordz.reset_index()

    chords_abs_columns = ['chord', 'globalkey','globalkey_is_minor']

    clean_chords = chordz[chordz['chord'] != '@none']
    ms.labels2global_tonic(clean_chords, inplace=True)
    clean_chords.to_csv("chordsbefore.csv")
    transform_chords_abs(clean_chords)

    # Recombine the segments with @None labels
    full_chords_abs = pd.concat([clean_chords, chordz[chordz['chord'] == '@none']]).sort_index()
    full_chords_abs.rootoffset.fillna(0, inplace=True)

    # Now we merge repeated chords
    relavant_columns = ["interval", "chord_type", "rootoffset", "globalkey"]

    dfs = pd.DataFrame()
    prev = None 
    for row in full_chords_abs[relavant_columns].iterrows():
        if prev and (row[1].chord_type == prev[1].chord_type and row[1].rootoffset == prev[1].rootoffset):
            # combine
            dfs.at[dfs.index[-1], 'interval'] = pd.Interval(dfs.iloc[-1].interval.left, row[1].interval.right, "left")
        else:
            # New row
            dfs = dfs.append(row[1])
        prev = row

    dfs.rootoffset = dfs.rootoffset.astype(int)
    full_chords_abs = dfs

    relavant_columns = [ "interval", "chord_type", "rootoffset", "globalkey"]

    full_chords_abs = full_chords_abs.reset_index()[relavant_columns]
    full_chords_abs.index.name ='segment_id'
    full_chords_abs[["chord_type", "rootoffset", "globalkey"]].to_csv('chords.csv')


    salamis = salami_notes.loc[("ABC", piece)]

    mini_salamis = salamis[['midi','tpc','tied']]
    mini_salamis['tied'] = mini_salamis['tied'].fillna(0).astype('bool')

    # Assigning each slice a segment id according to the chord.
    dfs = []
    for segment, interval in enumerate(full_chords_abs["interval"]):
        segMask = mini_salamis.index.get_level_values(0).overlaps(interval)
        slicesInInterval = mini_salamis[segMask]
        slicesInInterval.insert(0,'segment_id',segment)
        dfs.append(slicesInInterval)

    segmented_salamis = pd.concat(dfs)

    segmented_salamis['slice_id'] = pd.factorize(segmented_salamis.reset_index()['onset_slice'])[0]

    segmented_salamis['pitch'] = segmented_salamis.apply(lambda x: to_pitch(x.midi, x.tpc), axis=1)

    final_salamis_columns = ['segment_id','slice_id','pitch','tied']
    final_salamis = segmented_salamis.reset_index()[final_salamis_columns]

    final_salamis["new_segment"] = final_salamis["segment_id"].diff().astype(bool)
    final_salamis['new_slice'] = final_salamis["slice_id"].diff().astype(bool)

    # Correct the new segment and new slice fields for the first row.
    final_salamis.at[0, "new_segment"] = False
    final_salamis.at[0, "new_slice"] = False

    final_salamis.to_csv('salamis.csv',columns=["new_segment", "new_slice", "pitch","tied"], index=False)

    return (full_chords_abs[["chord_type", "rootoffset", "globalkey"]], final_salamis[["new_segment", "new_slice", "pitch", "tied"]])




In [33]:
pieces = labels.loc["ABC"].index.unique(0).tolist()


In [36]:
for piece in pieces:
    (chords, slices) = preprocessPiece(piece, labels)
    chords.to_csv("inputs/chords/{}.csv".format(piece))
    slices.to_csv("inputs/slices/{}.csv".format(piece),index=False)