In [1]:
import pickle
import re
import pandas as pd

notes = pickle.load(open("pickle/notes.p", "rb" ))
links = pickle.load(open("pickle/links.p", "rb" ))

print(len(notes))
print(len(links))

130325
285557


In [2]:
def process_abc_note(note):
    lines = note.strip().splitlines()
    title = [x for x in lines if re.compile("T:").match(x)]
    title = title[0].split(":")[1].strip() if len(title)>0 else None

    time = [x for x in lines if re.compile("M:").match(x)]
    time = time[0].split(":")[1].strip() if len(time)>0 else None

    length = [x for x in lines if re.compile("L:").match(x)]
    length = length[0].split(":")[1].strip() if len(length)>0 else None

    tune_type = [x for x in lines if re.compile("R:").match(x)]
    tune_type = tune_type[0].split(":")[1].strip() if len(tune_type)>0 else None

    keyindex = [i for i, x in enumerate(lines) if re.compile("K:").match(x)]
    key = lines[keyindex[0]].split(":")[1].strip() if len(keyindex)>0 else None
    keyindex = 0 if len(keyindex)==0 else keyindex[0]

    tune = [x for x in lines[keyindex:] if re.compile(".*\\|").match(x)]
    tune = "".join(tune) if len(tune)>0 else None
    
    barsearch = re.compile(":?\|+:?\]?")
    bars = None
    if tune is not None:
        bars = [bar for bar in barsearch.split(tune) if bar!=""]
        barlines = barsearch.findall(tune)
        bars = [b+l for b, l in zip(bars,barlines)]

    features = {
        "title": title,
        "time": time,
        "length": length,
        "tune_type": tune_type,
        "key": key,
        "tune": tune,
        "bars": bars,
        "opening": None,
        "middle": None,
        "ending": None,
        "full_text": note
    }
    
    if bars is not None:
        features["opening"] = bars[:4]
        features["middle"] = bars[-4:4]
        features["ending"] = bars[-4:]
    
    features["complete"] = None not in features.values()
    return(features)

In [10]:
features = [process_abc_note(note) for note in notes]
pickle.dump(features, open("pickle/features.p", "wb"))
len(features)

130325

In [14]:
complete_features = [x for x in features if x['complete']]

pickle.dump(complete_features, open("pickle/complete_features.p", "wb"))
len(complete_features)

67260

In [5]:
print(notes[1234])


X:1
%
T:37th Regiment
T:March of the 37th Regt.
M:C
L:1/8
R:March
S: Seth Johnson – Woburn Fife Manuscript (c. 1807-40?, p. 39)
Z:AK/Fiddler’s Companion
K:G
G>B|d3e dc/B/ cB/A/|G2 G>G G2 A>c|B>G d>B edcB|d3B A2 G/A/B/c/|
d3e dc/B/ cB/A/|G2 G>G G2 A>c|B>de>d B>d d/c/B/A/|G2 G>G G2:|
|:A>A|A3B dcBA|B3c edcB|BGdB edcB|d3B ABcA|
G3B BAcB|B3d dced|(3def (3gfe (3dec (3BcA|G2 G>G G2:|]



In [15]:
df = pd.DataFrame(complete_features)

In [16]:
df

Unnamed: 0,bars,complete,ending,full_text,key,length,middle,opening,time,title,tune,tune_type
0,"[[P:A] G |, ""C""czc |, czd |, eze |, ezg |, ...",True,"[1 b3 |, g3 :|, 2 fdb |, ""C""c'3 |]]",\nX:1\nT:01 Und wenn's im Tal scho nachte tuet...,C %%MIDI gchordon,1/4,[],"[[P:A] G |, ""C""czc |, czd |, eze |]",3/4,01 Und wenn's im Tal scho nachte tuet. Walzer ...,"[P:A] G |""C""czc | czd | eze | ezg | gzc | azg ...",Walzer
1,"[g [P:A] |:, ""C""c'>gg |, e>cc |, ""G7""dzB |, ...",True,"[ ""G7""d>dd |, agB |, ""C""c>cc |, czz |]]",\nX:1\nT:02 Und z'Luterbach hani. Walzer C (...,C %%MIDI gchordon,1/4,[],"[g [P:A] |:, ""C""c'>gg |, e>cc |, ""G7""dzB |]",3/4,02 Und z'Luterbach hani. Walzer C (wz07872),"g [P:A] |:""C""c'>gg | e>cc | ""G7""dzB | Gzg | b>...",Walzer
2,"[ ""F""a3 |, f2g |, a3 |, a2 c |, ""C7""a2a |,...",True,"[2 ""C7""ff/e/ ^d/e/ |, a2g |, ""F""(f3 |, f)zz...",\nX:1\nT:03 Im Storchenhaus. Walzer F (wz078...,F %%MIDI gchordon,1/4,[],"[ ""F""a3 |, f2g |, a3 |, a2 c |]",3/4,03 Im Storchenhaus. Walzer F (wz07868),"""F""a3 | f2g | a3 | a2 c | ""C7""a2a | a2g | (""F...",Walzer
3,"[[P:A] ""C""c'3 |, g2e |, ""F""a3 |, fzd |, ""G...",True,"[ c'2G/G/ :|, 2 g2B |, ""C""c>cc |, czz |]]",\nX:1\nT:04 Rümliker Walzer C (wz07876)\nS:H...,C %%MIDI gchordon,1/4,[],"[[P:A] ""C""c'3 |, g2e |, ""F""a3 |, fzd |]",3/4,04 Rümliker Walzer C (wz07876),"[P:A] ""C""c'3 | g2e | ""F""a3 | fzd | ""G7""g3 | gz...",Walzer
4,"[G3A GE~E2|, CE~E2 GAcd|, eddc AGAc|, dfec dcA...",True,"[2eccB c3d|, AF~F2 Ac~c2|, eddc AcGE|, FAGF EC...",\nX:66\nT:066\nM:4/4\nL:1/8\nR:Reel\nZ:Grk-066...,C,1/8,[],"[G3A GE~E2|, CE~E2 GAcd|, eddc AGAc|, dfec dcAG|]",4/4,066,G3A GE~E2|CE~E2 GAcd|eddc AGAc|dfec dcAG|EG~G2...,Reel
5,"[""No key signature shown"" G4 |, G2F2 |, E2D2...",True,"[ B2G2 |, A2c2 |, B2A2 |, G4 |]]",\nI:abc-charset utf-8\n\nX:10112\nT:100 Psalm....,G,1/4,[],"[""No key signature shown"" G4 |, G2F2 |, E2D2...",2/2,100 Psalm. JMP.102,"""No key signature shown"" G4 | G2F2 | E2D2 | G2...",.Psalm
6,"[""No key signature shown"" G4 |, G2F2 |, E2D2...",True,"[ B2G2 |, A2c2 |, B2A2 |, G4 |]]",\nX:102\nT:100 Psalm. JMP.102\nT:Old Hundredth...,G major,1/4,[],"[""No key signature shown"" G4 |, G2F2 |, E2D2...",2/2,100 Psalm. JMP.102,"""No key signature shown"" G4 | G2F2 | E2D2 | G2...",.Psalm
7,"[[|:, ""A"".d.DD/D/D .A.DD/D/D |, .d.DD/D/D c2...",True,"[ ""D"" a3.A f2.A.c|, d3.A c2Bc ""etc."" |||, ""D...",\nX: 1\nT: 100 Watt Reels\nR: reel\nM: 4/4\nL:...,Ddor,1/8,[],"[[|:, ""A"".d.DD/D/D .A.DD/D/D |, .d.DD/D/D c2...",4/4,100 Watt Reels,"[|: ""A"".d.DD/D/D .A.DD/D/D | .d.DD/D/D c2Bc | ...",reel
8,"[""C""ceG |, ceG |, ceG |, !>!c2e |, ""G7""dfB...",True,"[ ""F""[Af]2 c |, ""C7""[Ge]2[Ec] |, ""F""[FAf] [...",\nX:1\nT:108 Walzer CF (wz06690) \nS:Hanny C...,C %%MIDI gchordon,1/4,[],"[""C""ceG |, ceG |, ceG |, !>!c2e |]",3/4,108 Walzer CF (wz06690),"""C""ceG | ceG | ceG | !>!c2e | ""G7""dfB | !>!d2...",Walzer
9,"[[P:A] !>!""Bb""f2fb !>!f2fb |, ""F7""a2ac cdef ...",True,"[ ""Bb7""agfe dcBA |, ""Eb""GBeg b4 |, ""Bb7""ABdf...",\nX:1\nT:10 Der Schöne (Polka) BbFEb (pk07667...,Bb %%MIDI gchordon,1/16,[],"[[P:A] !>!""Bb""f2fb !>!f2fb |, ""F7""a2ac cdef ...",2/4,10 Der Schöne (Polka) BbFEb (pk07667) ABACD,"[P:A] !>!""Bb""f2fb !>!f2fb | ""F7""a2ac cdef | ...",Polka
