## Demo notebook for FoNN similarity search tools

In [1]:
# imports

import os

from FoNN.similarity_search import TFIDFSimilarity, IncipitAndCadenceSimilarity, MotifSimilarity

In [2]:
# set input corpus path
mtc_ann_corpus_path = '../mtc_ann_corpus'

In [3]:
# initialize MotifSimilarity class instance to run 'motif' method similarity searches
# Args:
# -- n -- length of patterns under examination (number of elements). This must correspond to the pattern length outputted via ngram_pattern_extraction_demo.py.
# corpus_path -- points to path defined in cell above
# level -- the appropriate input data representation level for the input data under investigation. Can be 'note' (note-level), 'accent' (accent-level), or 'duration_weighted' (duration-weighted note-level).
# feature -- the musical feature under investigation. For a list of the 16 available features extracted by FoNN's ingest pipeline, see NgramPatternCorpus.FEATURES or ./README.md.
motif = MotifSimilarity(n=8, corpus_path=mtc_ann_corpus_path, level='duration_weighted', feature='diatonic_scale_degree')
# select query tune (tune id corresponds to input filename, excluding filetype suffix)
motif.query_tune='NLB015569_01'
# run standard single-weighted implementation (recommended)
motif.run_similarity_search(weighting='single')
# run alternate double-weighted implementation (recommended)
motif.run_similarity_search(weighting='double')

Loading titles...
Done.
Loading patterns...
Done.
Loading pattern occurrences matrix...
Done.
Loading pattern TF-IDF matrix...
Done.
Running single-weighted 'motif' similarity search...
Median: 0.1789622807553229
Max: 0.1789622807553229
Median absolute deviation: 0.0
Threshold: 0.1789622807553229
21 search term patterns extracted:
21 exact matches detected
7 very similar patterns detected.
28 similar patterns detected.
Final single-weighted 'motif' results:
              score  normalized_score
title                                
NLB072311_01    8.5             1.000
NLB070089_01    6.5             0.733
NLB070732_01    4.0             0.400
NLB070096_01    3.0             0.267
NLB070053_01    3.0             0.267
NLB073486_01    3.0             0.267
NLB073339_01    2.5             0.200
NLB075167_01    2.5             0.200
NLB072553_01    2.5             0.200
NLB074104_01    2.0             0.133
NLB074769_02    2.0             0.133
NLB075532_01    2.0             0.133
NLB073

Unnamed: 0_level_0,score,normalized_score
title,Unnamed: 1_level_1,Unnamed: 2_level_1
NLB072311_01,1.521,1.0
NLB070089_01,1.163,0.733
NLB070732_01,0.716,0.4
NLB070096_01,0.537,0.267
NLB070053_01,0.537,0.267
NLB073486_01,0.537,0.267
NLB073339_01,0.447,0.2
NLB075167_01,0.447,0.2
NLB072553_01,0.447,0.2
NLB074104_01,0.358,0.133


In [4]:
# initialize IncipitAndCadenceSimilarity class instance to run 'incipit and cadence' similarity searches
# Args are as above with the exception of 'n', which does not need to be specified for this class.
incipit = IncipitAndCadenceSimilarity(corpus_path=mtc_ann_corpus_path, level='duration_weighted', feature='diatonic_scale_degree')
# select query tune
incipit.query_tune='NLB015569_01'
# run standard Levenshtein distance implementation (recommended)
incipit.run_similarity_search(edit_dist_metric='levenshtein')
# run with alternate Hamming distance implementation
incipit.run_similarity_search(edit_dist_metric='hamming')
# run with experimental weighted Hamming distance implementation
incipit.run_similarity_search(edit_dist_metric='weighted_hamming')

Loading titles...
Done.
Loading patterns...
Done.
Extracting incipit and cadence subsequences from feature sequence data...
Done.
Running 'incipit and cadence' similarity search, using Levenshtein distance metric...


Pandas Apply:   0%|          | 0/360 [00:00<?, ?it/s]

              Levenshtein distance
NLB138219_01                     8
NLB072886_01                     9
NLB071666_01                    10
NLB072886_02                    10
NLB072299_01                    12
Running 'incipit and cadence' similarity search, using Hamming distance metric...
              Hamming distance
NLB070089_01          0.302979
NLB072311_01          0.363525
NLB072946_01          0.424316
NLB075635_01          0.515137
NLB071957_03          0.515137
Running 'incipit and cadence' similarity search, using Weighted Hamming distance metric...
              Custom-weighted Hamming distance
NLB138219_01                               8.5
NLB071666_01                              10.5
NLB071441_01                              10.5
NLB072886_01                              11.0
NLB072299_01                              12.5


In [5]:
# initialize TFIDFSimilarity class instance to run 'TF-IDF' similarity search
tfidf = TFIDFSimilarity(corpus_path=mtc_ann_corpus_path, level='duration_weighted', feature='diatonic_scale_degree')
# select query tune
tfidf.query_tune='NLB015569_01'
# run search
tfidf.run_similarity_search()

Loading titles...
Done.
Loading patterns...
Done.
Loading TF-IDF Cosine similarity matrix...
Done.
Running TF-IDF similarity search...
              Cosine similarity
NLB070089_01           0.107788
NLB074769_02           0.092102
NLB072311_01           0.085999
NLB072883_01           0.083984
NLB072559_01           0.078979


In [6]:
# Outputs of all searches above can be found in subdirectories under '../mtc_ann_corpus/similarity_results/' root.