In [1]:
run -i './ismir2020_make_datasets.py'

In [2]:
run -i './ismir2020_featsubsets_segmentation.py'

In [3]:
run -i './ismir2020_featuresubsets.py'

In [4]:
run -i './ismir2020_vizpgrams.py'

In [5]:
from MTCFeatures import MTCFeatureLoader
import music21 as m21
import pickle
from collections import Counter

# Data Preparation

The construction of melody sequences and the extraction of the 5-grams is in notebook ismir2020_extract_grams

Load the melody sequences

In [6]:
seqs_mtc = MTCFeatureLoader('ismir2020_seqs_mtc_sel.jsonl.gz').sequences()
seqs_mtc = list(seqs_mtc)

seqs_essen = MTCFeatureLoader('ismir2020_seqs_essen_sel.jsonl.gz').sequences()
seqs_essen = list(seqs_essen)

seqs_chor = MTCFeatureLoader('ismir2020_seqs_chor_sel.jsonl.gz').sequences()
seqs_chor = list(seqs_chor)

Load the pgrams

In [7]:
pgrams_mtc   = pd.read_pickle("ismir2020_pgrams_mtc_sel.pkl")
arfftype_mtc = pickle.load( open( "ismir2020_arfftype_mtc_sel.pkl", "rb" ) )

pgrams_essen   = pd.read_pickle("ismir2020_pgrams_essen_sel.pkl")
arfftype_essen = pickle.load( open( "ismir2020_arfftype_essen_sel.pkl", "rb" ) )

pgrams_chor   = pd.read_pickle("ismir2020_pgrams_chor_sel.pkl")
arfftype_chor = pickle.load( open( "ismir2020_arfftype_chor_sel.pkl", "rb" ) )

Replace missing values (for random forest classifier)

In [8]:
#replace missing values

#pitchproximity -> 13
pps = [
    'pitchproximityfirst',
    'pitchproximitysecond',
    'pitchproximitythird',
    'pitchproximityfourth',
    'pitchproximityfifth'
]
pgrams_mtc[pps] = pgrams_mtc[pps].fillna(value=13)
pgrams_essen[pps] = pgrams_essen[pps].fillna(value=13)
pgrams_chor[pps] = pgrams_chor[pps].fillna(value=13)

#pitchreversal -> -2
prs = [
    'pitchreversalfirst',
    'pitchreversalsecond',
    'pitchreversalthird',
    'pitchreversalfourth',
    'pitchreversalfifth'
]
pgrams_mtc[prs] = pgrams_mtc[prs].fillna(value=-2)
pgrams_essen[prs] = pgrams_essen[prs].fillna(value=-2)
pgrams_chor[prs] = pgrams_chor[prs].fillna(value=-2)

#intervalfirst -> 0
pgrams_mtc[['intervalfirst']] = pgrams_mtc[['intervalfirst']].fillna(value=0)
pgrams_essen[['intervalfirst']] = pgrams_essen[['intervalfirst']].fillna(value=0)
pgrams_chor[['intervalfirst']] = pgrams_chor[['intervalfirst']].fillna(value=0)

#intervalfirst -> 0
pgrams_mtc[['intervalfirst']] = pgrams_mtc[['intervalfirst']].fillna(value=0)
pgrams_essen[['intervalfirst']] = pgrams_essen[['intervalfirst']].fillna(value=0)
pgrams_chor[['intervalfirst']] = pgrams_chor[['intervalfirst']].fillna(value=0)

#derived from intervalfirst -> '='
for featname,replacement in [
 ('intervalsizefirstsecond','='),
 ('intervalsizefirstthird','='),
 ('intervalsizefirstfourth','='),
 ('intervalsizefirstfifth','='),
 ('intervaldirfirstsecond','='),
 ('intervaldirfirstthird','='),
 ('intervaldirfirstfourth','='),
 ('intervaldirfirstfifth','='),
 ('VosCenterGravityfirst',False)
]:
    pgrams_mtc[[featname]] = pgrams_mtc[[featname]].fillna(value=replacement)
    pgrams_essen[[featname]] = pgrams_essen[[featname]].fillna(value=replacement)
    pgrams_chor[[featname]] = pgrams_chor[[featname]].fillna(value=replacement)
    
#nextisrestthird -> True
pgrams_mtc[['nextisrestthird']] = pgrams_mtc[['nextisrestthird']].fillna(value=True) #n.b. affects all final cadences
pgrams_essen[['nextisrestthird']] = pgrams_essen[['nextisrestthird']].fillna(value=True) #n.b. affects all final cadences
pgrams_chor[['nextisrestthird']] = pgrams_chor[['nextisrestthird']].fillna(value=True) #n.b. affects all final cadences

#localboundary first note -> 0
pgrams_mtc[['lbdmfirst']] = pgrams_mtc[['lbdmfirst']].fillna(value=0.0)
pgrams_essen[['lbdmfirst']] = pgrams_essen[['lbdmfirst']].fillna(value=0.0)
pgrams_chor[['lbdmfirst']] = pgrams_chor[['lbdmfirst']].fillna(value=0.0)

#for the rest just remove the rows with missing values

Remove all final cadences

In [9]:
#remove all final cadences
pgrams_mtc = pgrams_mtc.loc[pgrams_mtc['cadence_class']!='finalcadence']
arfftype_mtc['cadence_class'] = '{midcadence, nocadence}'

pgrams_essen = pgrams_essen.loc[pgrams_essen['cadence_class']!='finalcadence']
arfftype_essen['cadence_class'] = '{midcadence, nocadence}'

pgrams_chor = pgrams_chor.loc[pgrams_chor['cadence_class']!='finalcadence']
arfftype_chor['cadence_class'] = '{midcadence, nocadence}'

Selections already have been made:

In [10]:
pgrams_sel_mtc = pgrams_mtc
pgrams_sel_essen = pgrams_essen
pgrams_sel_chor = pgrams_chor

Write separate arffs for the ismir2020 featsets

In [11]:
writeISMIR2020ARFF = False

if writeISMIR2020ARFF:

    for featlist in ismir2020featsets.keys():
        toarff = {}
        for featname in ismir2020featsets[featlist]:
            toarff[featname] = arfftype[featname]
        toarff['cadence_class'] = arfftype['cadence_class']
    
        mtcfname = f'ismir2020_arff/ismir2020_pgram_mtc_{featlist}.arff'
        essenfname = f'ismir2020_arff/ismir2020_pgram_essen_{featlist}.arff'
        chorfname = f'ismir2020_arff/ismir2020_pgram_chor_{featlist}.arff'

        pgrams2arff(pgrams_sel_mtc, toarff, mtcfname, classfeat='cadence_class')
        if not 'lyr' in featlist and featlist != 'ismir2020_elementaryall':
            pgrams2arff(pgrams_sel_essen, toarff, essenfname, classfeat='cadence_class')
            pgrams2arff(pgrams_sel_chor, toarff, chorfname, classfeat='cadence_class')

Prepare the datasets for classifcation

In [12]:
# This is the dataset to work with
# Retain only the features
# keep class labels separate

dataset_mtc_raw = pgrams_sel_mtc

#which feature to use as class label
classfeat = 'cadence_class'

#get class labels
y_mtc_str = dataset_mtc_raw.loc[:,[classfeat]].values.reshape(-1)

#replace all cadence types with the same 'cadence' label
y_mtc_str = ['nocadence' if lab == 'nocadence' else 'cadence' for lab in y_mtc_str]

#select a subset of features
#featset = ismir2020featsets['ismir2020_elementarypitch']
#featset = ismir2020featsets['ismir2020_elementaryrhythm']
#featset = ismir2020featsets['ismir2020_elementarylyrics']
featset = ismir2020featsets['ismir2020_elementarypitchrhythm']
#featset = ismir2020featsets['ismir2020_elementaryall']
#featset = ismir2020featsets['ismir2020_othermodels']
#featset = ismir2020featsets['ismir2020_all']
#featset = ismir2020featsets['ismir2020_all_lyr']
#featset = ismir2020featsets['ismir2020_all_gt']
#featset = ismir2020featsets['ismir2020_all_lyr_gt']

dataset_mtc_raw = dataset_mtc_raw.loc[:,list(featset)]

cnt = Counter(y_mtc_str)
print(str(cnt))

#remove rows with missing values
#after feature selection!
#also remove those from labels y_mtc_str
dataset_mtc_raw.loc[:,'tmp0'] = y_mtc_str
dataset_mtc_raw = dataset_mtc_raw.dropna(axis=0)
y_mtc_str = dataset_mtc_raw.loc[:,['tmp0']].values.reshape(-1)
del dataset_mtc_raw['tmp0']

cnt = Counter(y_mtc_str)
print(str(cnt))

#make onehot features for all nominal features (dtype=object)
#but not for class feature
onehotcolumns = []
#for dt, name in zip(dataset_mtc_raw.dtypes, dataset_mtc_raw.columns):
#    if dt=='object' and name != classfeat:
#        onehotcolumns.append(name)

for dt, name in zip(dataset_mtc_raw.dtypes, dataset_mtc_raw.columns):
    if arfftype_mtc[name]!='numeric' and arfftype_mtc[name]!="{True, False}":
        onehotcolumns.append(name)

#These columns have bool as dt:
#if 'grouperfirst' in dataset_mtc_raw.columns: onehotcolumns.append('grouperfirst')
#if 'groupersecond' in dataset_mtc_raw.columns: onehotcolumns.append('groupersecond')
#if 'grouperthird' in dataset_mtc_raw.columns: onehotcolumns.append('grouperthird')
        
dataset_mtc_raw = pd.get_dummies(dataset_mtc_raw, columns=onehotcolumns)

dataset_mtc = dataset_mtc_raw

Counter({'nocadence': 63856, 'cadence': 7054})
Counter({'nocadence': 61215, 'cadence': 7049})


In [13]:
# This is the dataset to work with
# Retain only the features
# keep class labels separate

dataset_essen_raw = pgrams_sel_essen

#which feature to use as class label
essen_classfeat = 'cadence_class'

#get class labels
y_essen_str = dataset_essen_raw.loc[:,[classfeat]].values.reshape(-1)

#replace all cadence types with the same 'cadence' label
y_essen_str = ['nocadence' if lab == 'nocadence' else 'cadence' for lab in y_essen_str]

#use same featset as for MTC
essen_featset = featset

dataset_essen_raw = dataset_essen_raw.loc[:,list(essen_featset)]

cnt = Counter(y_essen_str)
print(str(cnt))

#remove rows with missing values
#after feature selection!
#also remove from labels y_essen_str
dataset_essen_raw.loc[:,'tmp0'] = y_essen_str
dataset_essen_raw = dataset_essen_raw.dropna(axis=0)
y_essen_str = dataset_essen_raw.loc[:,['tmp0']].values.reshape(-1)
del dataset_essen_raw['tmp0']

cnt = Counter(y_essen_str)
print(str(cnt))

#make onehot features for all nominal features (dtype=object)
#but not for class feature
onehotcolumns = []
#for dt, name in zip(dataset_essen_raw.dtypes, dataset_essen_raw.columns):
#    if dt=='object' and name != classfeat:
#        onehotcolumns.append(name)

for dt, name in zip(dataset_essen_raw.dtypes, dataset_essen_raw.columns):
    if arfftype_essen[name]!='numeric' and arfftype_mtc[name]!="{True, False}":
        onehotcolumns.append(name)

#These columns have bool as dt:
#if 'grouperfirst' in dataset_essen_raw.columns: onehotcolumns.append('grouperfirst')
#if 'groupersecond' in dataset_essen_raw.columns: onehotcolumns.append('groupersecond')
#if 'grouperthird' in dataset_essen_raw.columns: onehotcolumns.append('grouperthird')

dataset_essen_raw = pd.get_dummies(dataset_essen_raw, columns=onehotcolumns)

dataset_essen = dataset_essen_raw

Counter({'nocadence': 62490, 'cadence': 7703})
Counter({'nocadence': 59230, 'cadence': 7699})


In [14]:
# This is the dataset to work with
# Retain only the features
# keep class labels separate

dataset_chor_raw = pgrams_sel_chor

#which feature to use as class label
chor_classfeat = 'cadence_class'

#get class labels
y_chor_str = dataset_chor_raw.loc[:,[classfeat]].values.reshape(-1)

#replace all cadence types with the same 'cadence' label
if chor_classfeat == 'cadence_class':
    y_chor_str = ['nocadence' if lab == 'nocadence' else 'cadence' for lab in y_chor_str]

#use same featset as for MTC
chor_featset = featset

dataset_chor_raw = dataset_chor_raw.loc[:,list(chor_featset)]

cnt = Counter(y_chor_str)
print(str(cnt))

#just remove rows with missing values
#after feature selection!
#also remove from labels y_chor_str
dataset_chor_raw.loc[:,'tmp0'] = y_chor_str
dataset_chor_raw = dataset_chor_raw.dropna(axis=0)
y_chor_str = dataset_chor_raw.loc[:,['tmp0']].values.reshape(-1)
del dataset_chor_raw['tmp0']

cnt = Counter(y_chor_str)
print(str(cnt))

#make onehot features for all nominal features (dtype=object)
#but not for class feature
onehotcolumns = []
#for dt, name in zip(dataset_chor_raw.dtypes, dataset_chor_raw.columns):
#    if dt=='object' and name != classfeat:
#        onehotcolumns.append(name)

for dt, name in zip(dataset_chor_raw.dtypes, dataset_chor_raw.columns):
    if arfftype_chor[name]!='numeric' and arfftype_mtc[name]!="{True, False}":
        onehotcolumns.append(name)

#These columns have bool as dt:
#if 'grouperfirst' in dataset_chor_raw.columns: onehotcolumns.append('grouperfirst')
#if 'groupersecond' in dataset_chor_raw.columns: onehotcolumns.append('groupersecond')
#if 'grouperthird' in dataset_chor_raw.columns: onehotcolumns.append('grouperthird')

dataset_chor_raw = pd.get_dummies(dataset_chor_raw, columns=onehotcolumns)

dataset_chor = dataset_chor_raw

Counter({'nocadence': 15455, 'cadence': 1907})
Counter({'nocadence': 14716, 'cadence': 1906})


In [15]:
from sklearn.model_selection import GroupKFold
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

Prepare the groups for cross validation

In [16]:
songids_mtc = [sid[:12] for sid in dataset_mtc.index]
pgramids_mtc = dataset_mtc.index.values
group_encoder_mtc = LabelEncoder()
groups_mtc = group_encoder_mtc.fit_transform(songids_mtc)

pgramids_essen = dataset_essen.index.values
songids_essen = [essen_id[:-4] for essen_id in pgramids_essen]
group_encoder_essen = LabelEncoder()
groups_essen = group_encoder_essen.fit_transform(songids_essen)
    
pgramids_chor = dataset_chor.index.values
songids_chor = [chor_id[:-4] for chor_id in pgramids_chor]
group_encoder_chor = LabelEncoder()
groups_chor = group_encoder_chor.fit_transform(songids_chor)    

Prepare X and Y

In [17]:
y_encoder = LabelEncoder()
y_encoder.fit(y_mtc_str)

X_mtc = dataset_mtc.values
y_mtc = y_encoder.transform(y_mtc_str)

X_essen = dataset_essen.values
y_essen = y_encoder.transform(y_essen_str)

X_chor = dataset_chor.values
y_chor = y_encoder.transform(y_chor_str)

# Classification with Random Forest

Do classification of MTC

In [18]:
X_mtc_shuffled, y_mtc_shuffled, groups_mtc_shuffled = shuffle(X_mtc, y_mtc, groups_mtc)

group_kfold = GroupKFold(n_splits=5)
y_mtc_shuffled_pred = np.ones(len(y_mtc), dtype=np.int8)
fold=0
for train_index, test_index in group_kfold.split(X_mtc_shuffled, y_mtc_shuffled, groups_mtc_shuffled):
    print(f"Fold {fold}")
    clf = RandomForestClassifier(n_jobs=12, n_estimators=40)
    clf.fit(X_mtc_shuffled[train_index],y_mtc_shuffled[train_index])
    y_mtc_shuffled_pred[test_index] = clf.predict(X_mtc_shuffled[test_index])
    fold += 1
print(classification_report(y_mtc_shuffled,y_mtc_shuffled_pred))

Fold 0
Fold 1
Fold 2
Fold 3
Fold 4
              precision    recall  f1-score   support

           0       0.79      0.58      0.67      7049
           1       0.95      0.98      0.97     61215

    accuracy                           0.94     68264
   macro avg       0.87      0.78      0.82     68264
weighted avg       0.94      0.94      0.94     68264



Do classification for ESSEN

In [19]:
X_essen_shuffled, y_essen_shuffled, groups_essen_shuffled = shuffle(X_essen, y_essen, groups_essen)

group_kfold = GroupKFold(n_splits=5)
y_essen_shuffled_pred = np.ones(len(y_essen), dtype=np.int8)
fold=0
for train_index, test_index in group_kfold.split(X_essen_shuffled, y_essen_shuffled, groups_essen_shuffled):
    print(f"Fold {fold}")
    clf = RandomForestClassifier(n_jobs=12, n_estimators=40)
    clf.fit(X_essen_shuffled[train_index],y_essen_shuffled[train_index])
    y_essen_shuffled_pred[test_index] = clf.predict(X_essen_shuffled[test_index])
    fold += 1
print("5-fold CV on Essen:")
print(classification_report(y_essen_shuffled,y_essen_shuffled_pred))

#build classifier for entire dataset
clf_essen = RandomForestClassifier(n_jobs=12, n_estimators=40)
clf_essen.fit(X_essen, y_essen)

if featset == essen_featset:
    #build classifier for mtc
    clf_mtc = RandomForestClassifier(n_jobs=12, n_estimators=40)
    clf_mtc.fit(X_mtc, y_mtc)
    y_essen_mtc_pred = clf_mtc.predict(X_essen)
    print("Essen classified with MTC training:")
    print(classification_report(y_essen,y_essen_mtc_pred))

    y_mtc_essen_pred = clf_essen.predict(X_mtc)
    print("MTC classified with Essen training:")
    print(classification_report(y_mtc, y_mtc_essen_pred))

if essen_featset == chor_featset:
    y_chor_essen_pred = clf_essen.predict(X_chor)
    print("Chorales classified with Essen training:")
    print(classification_report(y_chor, y_chor_essen_pred))

Fold 0
Fold 1
Fold 2
Fold 3
Fold 4
5-fold CV on Essen:
              precision    recall  f1-score   support

           0       0.83      0.69      0.76      7699
           1       0.96      0.98      0.97     59230

    accuracy                           0.95     66929
   macro avg       0.90      0.84      0.86     66929
weighted avg       0.95      0.95      0.95     66929

Essen classified with MTC training:
              precision    recall  f1-score   support

           0       0.84      0.57      0.68      7699
           1       0.95      0.99      0.97     59230

    accuracy                           0.94     66929
   macro avg       0.89      0.78      0.82     66929
weighted avg       0.93      0.94      0.93     66929

MTC classified with Essen training:
              precision    recall  f1-score   support

           0       0.76      0.62      0.68      7049
           1       0.96      0.98      0.97     61215

    accuracy                           0.94     68264
 

Do classification for CHOR

In [20]:
X_chor_shuffled, y_chor_shuffled, groups_chor_shuffled = shuffle(X_chor, y_chor, groups_chor)
    
group_kfold = GroupKFold(n_splits=5)
y_chor_shuffled_pred = np.ones(len(y_chor), dtype=np.int8)
fold=0
for train_index, test_index in group_kfold.split(X_chor_shuffled, y_chor_shuffled, groups_chor_shuffled):
    print(f"Fold {fold}")
    clf = RandomForestClassifier(n_jobs=12, n_estimators=40)
    clf.fit(X_chor_shuffled[train_index],y_chor_shuffled[train_index])
    y_chor_shuffled_pred[test_index] = clf.predict(X_chor_shuffled[test_index])
    fold += 1
print("5-fold CV on Chor:")
print(classification_report(y_chor_shuffled,y_chor_shuffled_pred))

#build classifier for entire set
clf_chor = RandomForestClassifier(n_jobs=12, n_estimators=40)
clf_chor.fit(X_chor, y_chor)

if featset == chor_featset:
    clf_mtc = RandomForestClassifier(n_jobs=12, n_estimators=40)
    clf_mtc.fit(X_mtc, y_mtc)
    y_chor_mtc_pred = clf_mtc.predict(X_chor)
    print("Chor classified with MTC training:")
    print(classification_report(y_chor,y_chor_mtc_pred))

    y_mtc_chor_pred = clf_chor.predict(X_mtc)
    print("MTC classified with Chor training:")
    print(classification_report(y_mtc, y_mtc_chor_pred))

if chor_featset == essen_featset:
    y_essen_chor_pred = clf_chor.predict(X_essen)
    print("Essen classified with Chorale training:")
    print(classification_report(y_essen, y_essen_chor_pred))

Fold 0
Fold 1
Fold 2
Fold 3
Fold 4
5-fold CV on Chor:
              precision    recall  f1-score   support

           0       0.94      0.86      0.90      1906
           1       0.98      0.99      0.99     14716

    accuracy                           0.98     16622
   macro avg       0.96      0.92      0.94     16622
weighted avg       0.98      0.98      0.98     16622

Chor classified with MTC training:
              precision    recall  f1-score   support

           0       0.81      0.47      0.59      1906
           1       0.93      0.99      0.96     14716

    accuracy                           0.93     16622
   macro avg       0.87      0.73      0.78     16622
weighted avg       0.92      0.93      0.92     16622

MTC classified with Chor training:
              precision    recall  f1-score   support

           0       0.76      0.33      0.46      7049
           1       0.93      0.99      0.96     61215

    accuracy                           0.92     68264
   m

# Visualise discovered rules and annotate melodies.

Functions for converting the rules to pandas queries.

In [21]:
ordvals = [
    '-',
    '+',
    '=',
    'start',
    'in',
    'end',
]
def transformOne(relation):
    res = relation.strip(" ()")
    for ov in ordvals:
        if res.endswith(ov):
            res = res[:-len(ov)] + '"' + ov + '"'
    res = res.replace(" = "," == ",1)
    return res
    
def JRIP2pandaquery(jrip_query, invert=False):
    if jrip_query.find('=>') > 0:
        jrip_query = jrip_query[:jrip_query.find('=>')]
    jrip_query = jrip_query.split('and')
    jrip_query = [transformOne(el) for el in jrip_query]
    jrip_query = ' & '.join(jrip_query)
    if invert:
        jrip_query = '~( ' + jrip_query + ')'
    return jrip_query

Function for pretty printing the rule set.

In [22]:
def pprint_rule(rule):
    rule_elms = rule.split(' and ')
    print(' and\n  '.join(rule_elms))

The rule sets.

In [23]:
jrip_rules_mtc_pitchrhythm = """(IOIbeatfractionthirdfourth = -) and (completesmeasuresong = True) and (IOIbeatfractionthird >= 1.25) and (meternumerator >= 4) and (IOIbeatfractionfirst <= 0.666667) => cadence_class=midcadence (739.0/54.0)
(IOIbeatfractionthirdfourth = -) and (completesmeasuresong = True) and (IOIbeatfractionthird >= 1) and (IOIbeatfractionsecondthird = +) and (beatstrengthfourth >= 1) => cadence_class=midcadence (705.0/88.0)
(IOIbeatfractionthirdfourth = -) and (completesmeasuresong = True) and (IOIbeatfractionthird >= 1.25) and (IOIbeatfractionfifth <= 1.5) and (VosHarmonyfourth >= 4) and (intervalsecond <= 0) and (diatonicpitchthird <= 30) => cadence_class=midcadence (272.0/15.0)
(IOIbeatfractionthirdfourth = -) and (completesmeasuresong = True) and (beatstrengthfirst <= 0.5) and (IOIbeatfractionthird >= 1.333333) and (meternumerator >= 4) and (beatstrengthsecond <= 0.25) => cadence_class=midcadence (136.0/14.0)
(IOIbeatfractionthirdfourth = -) and (completesmeasuresong = True) and (VosHarmonyfourth >= 4) and (intervalfifth >= 0) and (IOIbeatfractionfifth <= 0.333333) and (midipitchfourth <= 67) and (beatduration >= 1.5) => cadence_class=midcadence (102.0/12.0)
(IOIbeatfractionthirdfourth = -) and (completesmeasuresong = True) and (intervaldirthirdfourth = +) and (intervalfifth >= 0) and (diatonicpitchthirdfourth = =) => cadence_class=midcadence (436.0/92.0)
(IOIbeatfractionthirdfourth = -) and (intervaldirthirdfourth = +) and (IOIbeatfractionthird >= 1.666667) and (completesbeatsong = False) => cadence_class=midcadence (206.0/17.0)
(IOIbeatfractionthirdfourth = -) and (completesmeasuresong = True) and (VosHarmonyfourth >= 4) and (contourthird = -) and (intervalfifth >= 0) and (contourfirst = -) and (VosHarmonythird <= 3) => cadence_class=midcadence (128.0/20.0)
(IOIbeatfractionthirdfourth = -) and (completesmeasuresong = True) and (VosHarmonyfourth >= 4) and (IOIbeatfractionfifth <= 0.5) and (intervalfifth >= 2) and (midipitchfourth <= 68) and (VosHarmonyfirst <= 4) => cadence_class=midcadence (87.0/10.0)
(IOIbeatfractionthirdfourth = -) and (completesmeasuresong = True) and (IOIbeatfractionfifth <= 0.5) and (IOIbeatfractionsecondthird = +) and (intervaldirthirdfourth = +) and (VosCenterGravitysecond = True) => cadence_class=midcadence (113.0/19.0)
(IOIbeatfractionthirdfourth = -) and (completesmeasuresong = True) and (IOIbeatfractionthird >= 1.333333) and (IOIbeatfractionfirst <= 1) and (diatonicpitchthirdfourth = =) => cadence_class=midcadence (154.0/47.0)
(IOIbeatfractionthirdfourth = -) and (completesmeasuresong = True) and (VosHarmonyfourth >= 4) and (beatstrengthfirst <= 0.5) and (IOIbeatfractionsecondthird = +) and (contourfirst = +) => cadence_class=midcadence (124.0/42.0)
(IOIbeatfractionthirdfourth = -) and (IOIbeatfractionthird >= 0.833333) and (beatstrengththird <= 0.5) and (intervalthird <= 0) and (IOIbeatfractionsecondthird = +) and (beatstrengthfourth >= 0.5) => cadence_class=midcadence (293.0/77.0)
(IOIbeatfractionthirdfourth = -) and (IOIbeatfractionthird >= 0.833333) and (intervalthird <= 1) and (beatstrengthfifth >= 1) and (IOIbeatfractionfirst <= 1) and (VosHarmonyfourth >= 6) => cadence_class=midcadence (99.0/18.0)
(IOIbeatfractionthirdfourth = -) and (IOIbeatfractionthird >= 1.666667) and (IOIbeatfractionfifth <= 0.75) and (intervalfourth >= 0) => cadence_class=midcadence (230.0/80.0)
(IOIbeatfractionthirdfourth = -) and (IOIbeatfractionthird >= 0.833333) and (completesmeasuresong = True) and (IOIbeatfractionsecond <= 0.666667) and (contoursecond = -) and (VosHarmonythird <= 2) and (intervalfourth >= 0) => cadence_class=midcadence (37.0/6.0)
(IOIbeatfractionthirdfourth = -) and (IOIbeatfractionthird >= 1.25) and (beatcount <= 2) and (intervalfifth >= 0) and (diatonicpitchfifth <= 32) => cadence_class=midcadence (214.0/65.0)
(IOIbeatfractionthirdfourth = -) and (completesmeasuresong = True) and (VosHarmonyfourth >= 4) and (contourfirst = -) and (intervalfourth <= -4) => cadence_class=midcadence (75.0/29.0)
(IOIbeatfractionthirdfourth = -) and (IOIbeatfractionthird >= 1) and (beatstrengththird <= 0.5) and (intervalthird <= 0) and (onthebeatfifth = True) and (IOIbeatfractionfifth <= 0.666667) => cadence_class=midcadence (138.0/49.0)
(IOIbeatfractionthirdfourth = -) and (completesmeasuresong = True) and (IOIbeatfractionfifth <= 0.666667) and (intervalthird <= 1) and (IOIbeatfractionfirst <= 0.333333) and (beatstrengthfifth >= 1) and (VosHarmonysecond >= 3) => cadence_class=midcadence (78.0/22.0)
(IOIbeatfractionthirdfourth = -) and (IOIbeatfractionthird >= 2.5) and (intervalfourth >= 0) and (IOIbeatfractionsecond <= 2) => cadence_class=midcadence (86.0/14.0)
(IOIbeatfractionthirdfourth = -) and (IOIbeatfractionthird >= 1) and (completesmeasuresong = True) and (intervalthird <= 1) and (diatonicpitchfirst >= 33) and (IOIbeatfractionfirstsecond = +) => cadence_class=midcadence (73.0/21.0)
(IOIbeatfractionthirdfourth = -) and (completesmeasuresong = True) and (IOIbeatfractionfifth <= 0.75) and (intervalthird <= 1) and (VosHarmonyfourth <= 0) and (intervalfirst <= 1) => cadence_class=midcadence (180.0/84.0)
(IOIbeatfractionsecondthird = +) and (VosHarmonyfourth >= 4) and (IOIbeatfractionthird >= 2.5) => cadence_class=midcadence (59.0/17.0)
(IOIbeatfractionthird >= 0.666667) and (completesmeasuresong = True) and (IOIbeatfractionfifth <= 0.75) and (intervaldirthirdfourth = +) and (contoursecond = -) and (intervalfifth >= -1) => cadence_class=midcadence (198.0/79.0)
(completesmeasuresong = True) and (VosHarmonyfourth >= 4) and (diatonicpitchsecondthird = -) and (contourfirst = -) and (midipitchsecond <= 66) and (IOIbeatfractionthirdfourth = =) and (intervalfourth >= 7) => cadence_class=midcadence (46.0/9.0)
(completesmeasuresong = True) and (VosHarmonyfourth >= 4) and (onthebeatthird = True) and (IOIbeatfractionfifth <= 0.75) and (beatcount <= 2) and (VosHarmonyfirst <= 3) and (scaledegreesecond <= 3) => cadence_class=midcadence (187.0/75.0)
(completesmeasuresong = True) and (beatstrengththirdfourth = -) and (IOIbeatfractionfourthfifth = =) and (intervaldirthirdfourth = +) and (contourfirst = -) => cadence_class=midcadence (177.0/81.0)
(IOIbeatfractionthird >= 0.666667) and (completesmeasuresong = True) and (IOIbeatfractionfifth <= 0.75) and (intervalthird <= 0) and (VosHarmonyfourth >= 4) and (midipitchsecond >= 72) => cadence_class=midcadence (118.0/55.0)
(IOIbeatfractionthird >= 1.25) and (beatstrengththird <= 0.5) and (IOIbeatfractionfifth <= 1.5) and (VosHarmonyfourth >= 4) and (VosHarmonyfirst <= 3) => cadence_class=midcadence (114.0/48.0)
(IOIbeatfractionthird >= 0.666667) and (IOIbeatfractionfifth <= 0.75) and (beatstrengththird <= 0.5) and (IOIbeatfractionsecondthird = +) and (intervaldirthirdfourth = +) and (VosHarmonyfirst <= 3) and (intervalsecond <= -2) => cadence_class=midcadence (143.0/67.0)"""

In [24]:
jrip_rules_essen_pitchrhythm = """(completesmeasuresong = True) and (IOIbeatfractionthirdfourth = -) and (contourthird = -) and (beatstrengthfirst <= 0.5) and (IOIbeatfractionthird >= 1.333333) and (IOIbeatfractionfifth <= 1) => cadence_class=midcadence (1284.0/41.0)
(completesmeasuresong = True) and (IOIbeatfractionthirdfourth = -) and (contourthird = -) and (intervalfifth >= 0) and (scaledegreefourth <= 5) and (meternumerator >= 4) and (beatstrengthfifth >= 0.25) => cadence_class=midcadence (316.0/32.0)
(completesmeasuresong = True) and (IOIbeatfractionsecondthird = +) and (IOIbeatfractionfourthfifth = =) and (IOIbeatfractionthird >= 1.5) and (contoursecond = -) => cadence_class=midcadence (446.0/44.0)
(completesmeasuresong = True) and (IOIbeatfractionthirdfourth = -) and (contourthird = -) and (intervaldirthirdfourth = +) and (intervalfifth >= 0) and (beatcount <= 2) and (VosCenterGravitysecond = True) => cadence_class=midcadence (100.0/7.0)
(completesmeasuresong = True) and (IOIbeatfractionsecondthird = +) and (IOIbeatfractionfifth <= 1) and (IOIbeatfractionthird >= 1.333333) and (beatstrengthfourthfifth = -) => cadence_class=midcadence (318.0/36.0)
(completesmeasuresong = True) and (IOIbeatfractionsecondthird = +) and (IOIbeatfractionfirstsecond = =) and (VosHarmonyfourth >= 6) and (intervalfifth >= 4) => cadence_class=midcadence (113.0/14.0)
(completesmeasuresong = True) and (IOIbeatfractionthirdfourth = -) and (intervalfifth >= 0) and (beatstrengthfirst <= 0.5) and (beatstrengthfifth >= 0.5) and (beatstrengthfourth <= 0.25) and (scaledegreefourth <= 2) => cadence_class=midcadence (68.0/5.0)
(completesmeasuresong = True) and (IOIbeatfractionsecondthird = +) and (IOIbeatfractionfifth <= 1) and (IOIbeatfractionthird >= 1.333333) and (beatstrengthfirst <= 0.5) and (beatcount <= 2) => cadence_class=midcadence (90.0/5.0)
(completesmeasuresong = True) and (IOIbeatfractionthirdfourth = -) and (VosHarmonyfourth >= 4) and (contourfirst = -) and (scaledegreefourth <= 1) => cadence_class=midcadence (96.0/11.0)
(completesmeasuresong = True) and (IOIbeatfractionsecondthird = +) and (IOIbeatfractionfourthfifth = =) and (intervaldirthirdfourth = +) and (beatstrengthfifth >= 1) and (VosHarmonysecond >= 2) and (intervalfourth <= 1) => cadence_class=midcadence (53.0/4.0)
(completesmeasuresong = True) and (beatstrengththird >= 0.5) and (IOIbeatfractionfourthfifth = =) and (contourfirst = -) and (intervaldirthirdfourth = +) and (VosCenterGravitythird = True) => cadence_class=midcadence (304.0/67.0)
(completesmeasuresong = True) and (IOIbeatfractionsecondthird = +) and (IOIbeatfractionfifth <= 1) and (contourfirst = -) and (intervalfifth >= -1) and (scaledegreefirst >= 4) and (intervaldirsecondthird = =) => cadence_class=midcadence (133.0/24.0)
(completesmeasuresong = True) and (IOIbeatfractionthirdfourth = -) and (intervalthird <= -2) and (contourfirst = -) and (beatstrengthfirstsecond = +) => cadence_class=midcadence (97.0/15.0)
(completesmeasuresong = True) and (IOIbeatfractionsecondthird = +) and (IOIbeatfractionfifth <= 1) and (VosHarmonyfirst <= 3) and (intervalfourth >= 0) and (VosHarmonyfirst >= 2) => cadence_class=midcadence (577.0/215.0)
(completesmeasuresong = True) and (IOIbeatfractionsecondthird = +) and (IOIbeatfractionfourthfifth = =) and (midipitchthird <= 69) and (VosHarmonyfirst <= 3) and (VosHarmonyfourth >= 5) => cadence_class=midcadence (90.0/22.0)
(completesmeasuresong = True) and (beatstrengththird >= 0.5) and (IOIbeatfractionfourthfifth = =) and (VosHarmonyfourth >= 4) and (beatstrengthfirst <= 0.25) and (IOIbeatfractionsecondthird = -) and (meternumerator >= 4) => cadence_class=midcadence (55.0/1.0)
(completesmeasuresong = True) and (beatstrengththird >= 0.5) and (IOIbeatfractionfourthfifth = =) and (contourthird = -) and (IOIbeatfractionfirstsecond = +) and (beatstrengthfourth <= 0.25) and (IOIbeatfractionthird >= 0.666667) => cadence_class=midcadence (65.0/16.0)
(completesmeasuresong = True) and (beatstrengththird >= 0.5) and (IOIbeatfractionfourthfifth = =) and (VosHarmonyfourth >= 4) and (IOIbeatfractionfirst <= 0.666667) and (contoursecond = -) and (midipitchthird <= 66) and (VosCenterGravityfifth = False) => cadence_class=midcadence (47.0/8.0)
(completesmeasuresong = True) and (beatstrengththird >= 0.5) and (IOIbeatfractionfourthfifth = =) and (contoursecond = -) and (VosHarmonyfourth <= 0) and (scaledegreesecond >= 5) and (beatstrengthfourth <= 0.25) => cadence_class=midcadence (54.0/2.0)
(completesmeasuresong = True) and (IOIbeatfractionthirdfourth = -) and (intervalthird <= 1) and (intervalfifth >= 0) and (intervaldirsecondthird = +) and (IOIbeatfractionfirst <= 0.5) and (scaledegreefirst >= 4) => cadence_class=midcadence (63.0/8.0)
(completesmeasuresong = True) and (beatstrengththird >= 0.5) and (IOIbeatfractionfifth <= 0.75) and (contourthird = -) and (beatstrengthsecond >= 1) and (beatstrengthfourth <= 0.25) and (contourreversal = False) => cadence_class=midcadence (89.0/17.0)
(completesmeasuresong = True) and (IOIbeatfractionthirdfourth = -) and (intervalfifth >= 0) and (VosHarmonyfourth >= 4) and (contourfirst = -) and (IOIbeatfractionfirstsecond = =) and (midipitchsecond <= 69) => cadence_class=midcadence (38.0/6.0)
(completesmeasuresong = True) and (IOIbeatfractionthirdfourth = -) and (intervalfifth >= 0) and (scaledegreesecond >= 3) and (intervaldirthirdfourth = +) and (beatstrengthsecond >= 1) => cadence_class=midcadence (77.0/27.0)
(completesmeasuresong = True) and (beatstrengththird >= 0.5) and (IOIbeatfractionfourthfifth = =) and (VosHarmonyfourth >= 4) and (contourfirst = -) and (intervaldirsecondthird = =) and (beatstrengthfifth >= 1) => cadence_class=midcadence (74.0/22.0)
(completesmeasuresong = True) and (beatstrengththird >= 0.5) and (IOIbeatfractionfifth <= 0.75) and (VosHarmonyfourth >= 4) and (midipitchsecond >= 71) and (beatstrengthfourth >= 0.5) and (scaledegreefifth <= 3) and (diatonicpitchfirst >= 31) => cadence_class=midcadence (52.0/9.0)
(completesmeasuresong = True) and (beatstrengththird >= 0.5) and (IOIbeatfractionfifth <= 0.75) and (beatcount <= 2) and (beatstrengthfirst <= 0.25) and (beatstrengthfourthfifth = +) => cadence_class=midcadence (170.0/54.0)
(completesmeasuresong = True) and (IOIbeatfractionsecondthird = +) and (IOIbeatfractionfifth <= 1.5) and (VosHarmonyfirstsecond = +) and (ambitus <= 1) => cadence_class=midcadence (160.0/59.0)
(completesmeasuresong = True) and (IOIbeatfractionsecondthird = +) and (VosHarmonyfourth >= 4) and (IOIbeatfractionfirstsecond = =) and (beatstrengthfourth >= 1) => cadence_class=midcadence (77.0/29.0)
(completesmeasuresong = True) and (beatstrengththird >= 0.5) and (IOIbeatfractionfourthfifth = =) and (VosHarmonyfirst <= 3) and (diatonicpitchfourth <= 28) and (ambitus <= 1) and (VosHarmonyfirstsecond = +) => cadence_class=midcadence (45.0/13.0)
(completesmeasuresong = True) and (beatstrengththird >= 0.5) and (IOIbeatfractionfourthfifth = =) and (VosHarmonyfirst <= 4) and (diatonicpitchthird <= 30) and (diatonicpitchsecond >= 30) and (beatstrengthsecond <= 0.25) and (VosHarmonyfirstsecond = +) and (diatonicpitchthird >= 30) and (scaledegreefourth >= 3) => cadence_class=midcadence (45.0/8.0)
(completesmeasuresong = True) and (beatstrengththird >= 0.5) and (IOIbeatfractionfourthfifth = =) and (VosHarmonyfirst <= 4) and (VosHarmonyfourth >= 5) and (scaledegreefirst <= 4) and (ambitus <= 2) => cadence_class=midcadence (170.0/67.0)
(completesmeasuresong = True) and (beatstrengththird >= 0.5) and (IOIbeatfractionfifth <= 0.75) and (intervalthird <= -2) and (beatcount <= 2) and (scaledegreethird >= 5) => cadence_class=midcadence (68.0/28.0)
(completesmeasuresong = True) and (IOIbeatfractionthirdfourth = -) and (intervalthird <= -2) and (scaledegreefourth <= 1) => cadence_class=midcadence (77.0/31.0)
(completesmeasuresong = True) and (beatstrengththird >= 0.5) and (VosHarmonyfourth >= 4) and (intervalfourth <= -4) and (diatonicpitchfourth <= 25) and (onthebeatfourth = False) => cadence_class=midcadence (65.0/19.0)
(IOIbeatfractionsecondthird = +) and (IOIbeatfractionthird >= 1.5) and (beatstrengththird <= 0.5) and (intervalfourth >= 0) => cadence_class=midcadence (379.0/122.0)
(completesmeasuresong = True) and (IOIbeatfractionthird >= 0.666667) and (IOIbeatfractionfifth <= 1.5) and (contourfirst = -) and (VosHarmonyfourth >= 4) and (beatstrengthfirst <= 0.25) => cadence_class=midcadence (133.0/47.0)
(completesmeasuresong = True) and (beatstrengththird >= 0.5) and (IOIbeatfractionfourthfifth = =) and (midipitchfirst >= 68) and (scaledegreefourth <= 3) and (scaledegreefourth >= 3) and (VosHarmonyfirst <= 2) => cadence_class=midcadence (126.0/60.0)"""

In [25]:
jrip_rules_chor_pitchrhythm = """(completesmeasuresong = True) and (intervaldirthirdfourth = +) and (IOIbeatfractionfourthfifth = =) and (IOIbeatfractionfirstsecond = +) and (beatstrengththirdfourth = -) => cadence_class=midcadence (257.0/10.0)
(completesmeasuresong = True) and (IOIbeatfractionthird >= 2) and (IOIbeatfractionfourthfifth = =) and (beatstrengthsecond <= 0.25) => cadence_class=midcadence (368.0/4.0)
(completesmeasuresong = True) and (VosHarmonyfourth >= 4) and (contourfourth = +) and (contourfirst = -) => cadence_class=midcadence (255.0/42.0)
(completesmeasuresong = True) and (VosHarmonyfourth <= 0) and (IOIbeatfractionfifth <= 1) and (beatstrengthfourth <= 0.25) => cadence_class=midcadence (320.0/79.0)
(completesmeasuresong = True) and (VosHarmonyfourth >= 4) and (IOIbeatfractionfirstsecond = +) and (diatonicpitchthird <= 30) and (contoursecond = -) => cadence_class=midcadence (47.0/3.0)
(completesmeasuresong = True) and (intervalfourth >= 0) and (beatstrengththird >= 0.5) and (IOIbeatfractionfourthfifth = =) and (VosHarmonyfifth <= 0) and (VosCenterGravityfourth = False) => cadence_class=midcadence (40.0/3.0)
(IOIbeatfractionthird >= 2) and (intervaldirthirdfourth = +) and (beatstrengthfirstsecond = +) and (beatcount >= 4) => cadence_class=midcadence (137.0/5.0)
(IOIbeatfractionthird >= 2) and (intervaldirthirdfourth = +) and (VosHarmonyfourth >= 4) => cadence_class=midcadence (50.0/6.0)
(completesmeasuresong = True) and (VosHarmonyfourth >= 4) and (intervaldirfirstsecond = -) and (intervalthird <= 1) => cadence_class=midcadence (144.0/67.0)
(IOIbeatfractionthird >= 3) and (IOIbeatfractionfourthfifth = =) => cadence_class=midcadence (40.0/4.0)
(IOIbeatfractionthird >= 3) and (intervalfourth >= 0) => cadence_class=midcadence (21.0/6.0)
(completesmeasuresong = True) and (beatstrengththirdfourth = -) and (IOIbeatfractionfirstsecond = +) and (beatstrengthsecond >= 0.5) => cadence_class=midcadence (73.0/20.0)"""

Chose a rule set and accompanying dataset

In [26]:
jrip_rules = jrip_rules_mtc_pitchrhythm
dataset = pgrams_sel_mtc.copy()

#jrip_rules = jrip_rules_essen_pitchrhythm
#dataset = pgrams_sel_essen.copy()

#jrip_rules = jrip_rules_chor_pitchrhythm
#dataset = pgrams_sel_chor.copy()

Show the rules

In [27]:
jrip_rules = jrip_rules.split('\n')

In [28]:
for ix, rule in enumerate(jrip_rules):
    print(f"Rule {ix}:")
    pprint_rule('  '+rule)

Rule 0:
  (IOIbeatfractionthirdfourth = -) and
  (completesmeasuresong = True) and
  (IOIbeatfractionthird >= 1.25) and
  (meternumerator >= 4) and
  (IOIbeatfractionfirst <= 0.666667) => cadence_class=midcadence (739.0/54.0)
Rule 1:
  (IOIbeatfractionthirdfourth = -) and
  (completesmeasuresong = True) and
  (IOIbeatfractionthird >= 1) and
  (IOIbeatfractionsecondthird = +) and
  (beatstrengthfourth >= 1) => cadence_class=midcadence (705.0/88.0)
Rule 2:
  (IOIbeatfractionthirdfourth = -) and
  (completesmeasuresong = True) and
  (IOIbeatfractionthird >= 1.25) and
  (IOIbeatfractionfifth <= 1.5) and
  (VosHarmonyfourth >= 4) and
  (intervalsecond <= 0) and
  (diatonicpitchthird <= 30) => cadence_class=midcadence (272.0/15.0)
Rule 3:
  (IOIbeatfractionthirdfourth = -) and
  (completesmeasuresong = True) and
  (beatstrengthfirst <= 0.5) and
  (IOIbeatfractionthird >= 1.333333) and
  (meternumerator >= 4) and
  (beatstrengthsecond <= 0.25) => cadence_class=midcadence (136.0/14.0)
Rule 4:


Function to remove all objects that obey to any rule in the rule set, leaving all objects not covered by any rule.

In [29]:
def apply_all_jrip_rules(df, rules):
    for rule in rules:
        df = df.query(JRIP2pandaquery(rule, invert=True))
    return df

Function to annotate the objects in case a rule applies

In [30]:
#annotates whether a rule applies for an object
#rule -1 means false negative
#NB does annotation IN PLACE
#TODO: Only annotate a rule if current rule == -1 (or solution for now: do it in reverse order)
def annotate_jrip_rules(df, rules, name):
    df.loc[:,name] = -1 #erase previous or create new series
    for ix, rule in reversed(list(enumerate(rules))):
        df.loc[df.eval(JRIP2pandaquery(rule)).values,name] = ix
    return df

Do it

In [31]:
#add the numbers of the rules
annotate_jrip_rules(dataset, jrip_rules, 'rule')

#select all objects not covered by a rule:
notcovered = apply_all_jrip_rules(dataset, jrip_rules)

#select all cadences
cad = dataset.query('cadence_class == "midcadence"')
#cad = notcovered.query('cadence_class == "midcadence"')

Let's have a look

In [32]:
dataset

Unnamed: 0,ix0_0,ix0_1,ix1_0,ix1_1,ix2_0,ix2_1,ix3_0,ix3_1,ix4_0,ix4_1,...,informationcontentfirstthird,informationcontentfirstfourth,informationcontentfirstfifth,informationcontentsecondthird,informationcontentsecondfourth,informationcontentsecondfifth,informationcontentthirdfourth,informationcontentthirdfifth,informationcontentfourthfifth,rule
NLB015294_01_000,0,1,1,2,2,3,3,4,4,5,...,-,-,-,-,-,-,-,+,+,-1
NLB015294_01_001,1,2,2,3,3,4,4,5,5,6,...,-,-,-,-,+,-,+,-,-,-1
NLB015294_01_002,2,3,3,4,4,5,5,6,6,7,...,+,-,+,+,-,+,-,+,+,-1
NLB015294_01_003,3,4,4,5,5,6,6,7,7,8,...,-,+,-,-,+,-,+,-,-,-1
NLB015294_01_004,4,5,5,6,6,7,7,8,8,9,...,+,-,+,+,-,+,-,+,+,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NLB075958_01_062,62,63,63,64,64,65,65,66,66,67,...,+,-,+,+,-,+,-,-,+,-1
NLB075958_01_063,63,64,64,65,65,66,66,67,67,68,...,-,+,+,-,-,-,+,+,-,-1
NLB075958_01_064,64,65,65,66,66,67,67,68,68,69,...,-,-,-,+,+,+,-,-,+,-1
NLB075958_01_065,65,66,66,67,67,68,68,69,69,70,...,+,+,+,-,-,-,+,-,-,-1


We compute precicion, recall, and F1 (check)

In [33]:
def posneg(c_class, rule):
    return ('FP' if (rule!=-1 and c_class == 'nocadence') else #FP
            'TP' if (rule!=-1 and c_class != 'nocadence') else #TP
            'TN' if (rule==-1 and c_class == 'nocadence') else #TN
            'FN' if (rule==-1 and c_class != 'nocadence') else #FN
            'UNK')

In [34]:
#to compute FP, FN, TP, TN
#select all cadences and all objects labeled as cadence
hits_misses = dataset.query('rule != -1 | cadence_class != "nocadence"')

#find all errors
errors = [posneg(c_class, rule) for c_class, rule in zip(hits_misses['cadence_class'].values, hits_misses['rule'].values)]
hits_misses.insert(1, 'posneg', errors)

In [35]:
c = Counter(errors)
TP = c['TP']
FP = c['FP']
FN = c['FN']
TN = len(dataset) - TP - FP - FN
total = len(dataset)

In [36]:
prec = TP / (TP + FP)
rec  = TP / (TP + FN)

In [37]:
print("TP: ", TP)
print("FP: ", FP)
print("FN: ", FN)
print("TN: ", TN)
print("Total: ", total)
print(prec, rec)

TP:  4283
FP:  1247
FN:  2771
TN:  62609
Total:  70910
0.774502712477396 0.6071732350439467


Store the annotations to disc in a temporary file

In [38]:
hits_misses[['songid','ix0_0','ix2_1','rule','cadence_class']].to_csv('tmp.csv', index=False)

Any special wishes?

In [39]:
wanted_songids = [
    'chor047-sop',
]

Generate visualisations of rule predictions

In [42]:
viztrigrams(
    'tmp.csv',
    'ismir2020_mtc_pitchrhythm',
    #'ismir2020_essen_pitchrhythm',
    #'ismir2020_chor_pitchrhythm',
    max_number=50,
    random_song_selection=True,
    #nlbids=wanted_songids
)

Do some adjustments and repairs to the generated lilypond files

In [None]:
from pathlib import Path
path = Path('.')
# add staff size
for directory in ['ismir2020_mtc_pitchrhythm','ismir2020_chor_pitchrhythm','ismir2020_essen_pitchrhythm']:
    for e in (path/directory).rglob('*.ly'):
        with open(e, 'a') as f:
            f.write("\n#(set-global-staff-size 14)\n")
            f.write("\\layout { indent = 0\\cm}\n")

#For chor music21 puts \autoBeamOff in ly source
#This causes lyrics to shift in case of beamed notes!
#Remove it
for e in (path/'ismir2020_chor_pitchrhythm').rglob('*.ly'):
    with open(e, 'r') as f:
        lines = f.readlines()
    with open(e, 'w') as f:
        for line in lines:
            if 'autoBeamOff' in line:
                line = '% removed \\autoBeamOff\n'
            f.write(line)