## Demo notebook for FoNN feature sequence extraction (accessed via FoNN.feature_extraction_tools)

Imports

In [1]:
# import required classes from local FoNN module
from FoNN.feature_sequence_extraction_tools import Corpus, Tune

Step 1: Load test tune from input corpus; extract root and primary feature sequence data

In [2]:
# local path to MTC-ANN corpus
corpus_path = '../mtc_ann_corpus/midi'
# test file
in_file = 'NLB015569_01.mid'

# single-tune MIDI ingest test
#  initialize Tune class object
test_tune = Tune(corpus_path + '/' + in_file)
# print title (extracted from filename)
print(f"Title: {test_tune.title}")
# extract root of input music document in diatonic and chromatic pitch class formats
test_tune.extract_root()
# extract primary feature sequence data
test_tune.extract_primary_feature_sequences()
print("Note-level primary feature sequence data:")
print(test_tune.feat_seq.head())

Title: NLB015569_01
Note-level primary feature sequence data:
   midi_note_num  diatonic_note_num  chromatic_pitch_class     onset  \
0             67                 33                      7  0.000000   
1             66                 32                      6  0.669922   
2             67                 33                      7  1.330078   
3             69                 34                      9  2.000000   
4             69                 34                      9  4.000000   

   duration  velocity  
0  0.669922        67  
1  0.669922        58  
2  0.669922        60  
3  2.000000        63  
4  1.000000        63  


Step 2. Repeat for entire corpus

In [3]:
# initialize corpus object
mtc_ann_corpus = Corpus(corpus_path)
# Corpus.setup_corpus_iteratively() populates corpus tune titles, extracts primary feature sequence data and root for all MIDI files in input dir.
mtc_ann_corpus.setup_corpus_iteratively()
# Print sample output:
print(f"Title: {mtc_ann_corpus.tunes[0].title}")
print("Note-level feature sequence output")
print(mtc_ann_corpus.tunes[0].feat_seq.head())
print(f"Corpus contains {len(mtc_ann_corpus.tunes)} tunes.")

Extracting primary feature sequence data from MIDI: 100%|██████████| 360/360 [00:02<00:00, 136.28it/s]

Title: NLB070078_01
Note-level feature sequence output
   midi_note_num  diatonic_note_num  chromatic_pitch_class  onset  duration  \
0             74                 37                      2    0.0       2.0   
1             74                 37                      2    2.0       3.0   
2             74                 37                      2    5.0       1.0   
3             76                 38                      4    6.0       2.0   
4             74                 37                      2    8.0       2.0   

   velocity  
0        65  
1        69  
2        57  
3        67  
4        61  
Corpus contains 360 tunes.





Step 3. Extract secondary feature sequence data

In [4]:
# Add relative chromatic & diatonic pitch:
# TODO: Re-instate the two methods below!
mtc_ann_corpus.extract_relative_chromatic_pitch_seqs()
mtc_ann_corpus.extract_relative_diatonic_pitch_seqs()
# Add chromatic & diatonic scale degrees:
mtc_ann_corpus.extract_chromatic_scale_degree_seqs()
mtc_ann_corpus.extract_diatonic_scale_degree_seqs()
# Add chromatic & diatonic intervals:
mtc_ann_corpus.extract_chromatic_intervals()
mtc_ann_corpus.extract_diatonic_intervals()
# Add Parsons code (simple contour)
mtc_ann_corpus.extract_parsons_codes()
print(mtc_ann_corpus.tunes[0].title)
# Print sample output:
print(f"Title: {mtc_ann_corpus.tunes[0].title}")
print("Note-level feature sequence output")
print(mtc_ann_corpus.tunes[0].feat_seq.head())
print(f"Corpus contains {len(mtc_ann_corpus.tunes)} tunes.")

Calculating relative chromatic pitch sequences: 100%|██████████| 360/360 [00:00<00:00, 1754.42it/s]
Calculating relative diatonic pitch sequences: 100%|██████████| 360/360 [00:00<00:00, 1895.21it/s]
Calculating chromatic scale degree sequences: 100%|██████████| 360/360 [00:00<00:00, 1860.46it/s]
Calculating diatonic scale degree sequences: 100%|██████████| 360/360 [00:00<00:00, 1496.56it/s]
Calculating chromatic interval sequences: 100%|██████████| 360/360 [00:00<00:00, 1381.00it/s]
Calculating diatonic interval sequences: 100%|██████████| 360/360 [00:00<00:00, 1365.44it/s]
Calculating cumulative Parsons code sequences: 100%|██████████| 360/360 [00:01<00:00, 249.32it/s]

NLB070078_01
Title: NLB070078_01
Note-level feature sequence output
   midi_note_num  diatonic_note_num  chromatic_pitch_class  onset  duration  \
0             74                 37                      2    0.0       2.0   
1             74                 37                      2    2.0       3.0   
2             74                 37                      2    5.0       1.0   
3             76                 38                      4    6.0       2.0   
4             74                 37                      2    8.0       2.0   

   velocity  relative_chromatic_pitch  relative_diatonic_pitch  \
0        65                        67                       32   
1        69                        67                       32   
2        57                        67                       32   
3        67                        69                       33   
4        61                        67                       32   

   chromatic_scale_degree  diatonic_scale_degree  chromatic_




Step 4. Filter sequences to create 'accent-level' representation

In [5]:
# This step is not applied to the MTC-ANN corpus as it was conceived specifically for the study of dance tune melodies.
# The shorter song melodies in MTC-ANN are not necessarily suited to analysis at this higher level of granularity.

 # To apply accent-level filtering, please call Corpus.filter_feat_seq_accents() method as instructed below:
 # For corpora originating in ABC Notation format: Corpus.filter_feat_seq_accents(self, thresh=80, by='velocity')
 # For corpora originating in MIDI format: Corpus.filter_feat_seq_accents(self, thresh=0.5, by='beat strength')

Step 5. Add Parsons code sequence data

In [6]:
# NOTE: If accent-level sequence filtering is applied, Parsons code must be calculated after filtration to ensure accuracy of the accent-level output sequences.

# Add Parsons code (simple contour)
mtc_ann_corpus.extract_parsons_codes()
print(mtc_ann_corpus.tunes[0].title)
# Print sample output:
print(f"Title: {mtc_ann_corpus.tunes[0].title}")
print("Note-level feature sequence output")
print(mtc_ann_corpus.tunes[0].feat_seq.head())
print(f"Corpus contains {len(mtc_ann_corpus.tunes)} tunes.")

Calculating cumulative Parsons code sequences: 100%|██████████| 360/360 [00:01<00:00, 321.78it/s]


NLB070078_01
Title: NLB070078_01
Note-level feature sequence output
   midi_note_num  diatonic_note_num  chromatic_pitch_class  onset  duration  \
0             74                 37                      2    0.0       2.0   
1             74                 37                      2    2.0       3.0   
2             74                 37                      2    5.0       1.0   
3             76                 38                      4    6.0       2.0   
4             74                 37                      2    8.0       2.0   

   velocity  relative_chromatic_pitch  relative_diatonic_pitch  \
0        65                        67                       32   
1        69                        67                       32   
2        57                        67                       32   
3        67                        69                       33   
4        61                        67                       32   

   chromatic_scale_degree  diatonic_scale_degree  chromatic_

Step 6: Apply duration-weighting to feature sequence data

In [7]:
# select all features for input into duration-weighting process
features = [col for col in mtc_ann_corpus.tunes[0].feat_seq.columns]
features.remove('duration')
print("Input features for duration weighting:")
for feat in features:
    print(feat)

Input features for duration weighting:
midi_note_num
diatonic_note_num
chromatic_pitch_class
onset
velocity
relative_chromatic_pitch
relative_diatonic_pitch
chromatic_scale_degree
diatonic_scale_degree
chromatic_interval
diatonic_interval
parsons_code
parsons_cumsum


In [8]:
# Run duration-weighting
mtc_ann_corpus.extract_duration_weighted_feat_seqs(features=features)
# Print sample output:
print(f"Title: {mtc_ann_corpus.tunes[0].title}")
print("Note-level feature sequence output")
print(mtc_ann_corpus.tunes[0].feat_seq.head())
print(f"Duration-weighted corpus contains {len(mtc_ann_corpus.tunes)} tunes.")

Calculating duration-weighted feature sequences: 100%|██████████| 360/360 [00:00<00:00, 422.47it/s]

Title: NLB070078_01
Note-level feature sequence output
   midi_note_num  diatonic_note_num  chromatic_pitch_class  onset  duration  \
0             74                 37                      2    0.0       2.0   
1             74                 37                      2    2.0       3.0   
2             74                 37                      2    5.0       1.0   
3             76                 38                      4    6.0       2.0   
4             74                 37                      2    8.0       2.0   

   velocity  relative_chromatic_pitch  relative_diatonic_pitch  \
0        65                        67                       32   
1        69                        67                       32   
2        57                        67                       32   
3        67                        69                       33   
4        61                        67                       32   

   chromatic_scale_degree  diatonic_scale_degree  chromatic_interval  \
0




Step 7. Write output data to file

In [9]:
# set outpath
mtc_ann_corpus.csv_outpath = '../mtc_ann_corpus/feature_sequence_data'
# write output to csv files
mtc_ann_corpus.save_feat_seq_data_to_csv()

Saving feature sequence data to csv: 100%|██████████| 360/360 [00:00<00:00, 534.45it/s]
