## Small-scale study on Tosca

In [1]:
# Step 1: Preprocessing of chord sequences: not needed in this case
# Step 2: Transposition to the Cmaj key
# Step 3: Encoding of sequences
# Step 4: Computation of the ngrams
# Step 5: Computation of the harmonic similarity
# Step 6: New hsim_map + (optional) updating the graph

In [2]:
import sys
import json
import joblib

import pandas as pd

from ChordalPy.Transposers import transpose

sys.path.insert(0,'src/')

In [3]:
sonar_meta = pd.read_csv("setup/sonar_datasets_meta.csv")

with open("extra/arias.json", "rb") as fo:
    arias_raw = json.load(fo)

with open("setup/sonar_ngrams_global.joblib", "rb") as fo:
    sonar_brps = joblib.load(fo)

with open("setup/sonar_encoding_bundle.joblib", "rb") as fo:
    encoding_bundle = joblib.load(fo)

encdec = encoding_bundle["encoder_decoder"]

### Step 1: Preprocessing chord sequences
For this example, we do not need to remove consecutive repetitions as they do not occur.

In [7]:
chord_map = {
    "E:min7(b5)": "E:min7",
    "F:min7(b5)": "F:min7"
}

chord_pproc = {}

for track_name, track_ann in arias_raw.items():
    chord_pproc[track_name] = []
    for chord in track_ann["chord"]:
        chord_class = chord[0]  # just keep the label
        chord_class = chord_map.get(chord_class, chord_class)
        chord_pproc[track_name].append(chord_class)

### Step 2: Transposition to the C(maj) key

In [8]:
chord_transp = {}

for track_name in chord_pproc.keys():
    track_gkey = arias_raw[track_name]["key"][0]
    track_gkey = track_gkey[0].split(":")[0]
    # Ready to transpose -- only at the tonic-level
    transposed = []  # holds the transpositions
    for chord in chord_pproc[track_name]:
        transposed.append(transpose(chord, track_gkey))
    # Update the transp-specific dict
    chord_transp[track_name] = transposed


### Step 3: Encoding of chord sequences

In [9]:
chord_encoded = {}

for track_name, chords in chord_transp.items():
    chord_encoded[track_name] = []
    for chord in chords:  # iterating over chords
        try:
            hash = encdec._compute_chord_hash(chord)
            token = encdec.hash_to_index[hash]
            chord_encoded[track_name].append(token)
        except:
            print(f"Could not encode: {chord}")

### Step 4: Computation of ngrams

In [10]:
from ngrams_lib import extract_ngrams

recurring_patterns = {}

for track_name, chords in chord_encoded.items():
    recurring_patterns.update(extract_ngrams(
        track_name, chords, n_start=3))

### Step 5: Computing the harmonic similarity

In [11]:
from harmonic_lib import ngram_hsim

hsim_map_mini = {track_id: {} for track_id in list(recurring_patterns.keys())}

for track_name, track_brps in recurring_patterns.items():
    for sonar_entry in sonar_brps:
        sonar_track = list(sonar_entry.keys())[0]
        sonar_bag = list(sonar_entry.values())[0]
        # Compute the harmonic similarity
        hsim, longest_rps = ngram_hsim(track_brps, sonar_bag)
        longest_rps = [[encdec.decode_event(idx) for idx in lsrp_shot] \
            for lsrp_shot in longest_rps]  # keep and decode
        if hsim > 0.:  # save only non-trivial
            hsim_map_mini[track_name][sonar_track] = hsim, longest_rps

In [12]:
hsim_map_mini

{'E lucevan le stelle': {'isophonics_61': (0.24561403508771928,
   [['Eb', 'G:7', 'C:(b3,5)']]),
  'isophonics_152': (0.22435897435897434, [['Ab:maj6', 'Bb', 'Eb']]),
  'isophonics_164': (0.2549019607843137, [['Eb', 'G:7', 'C:(b3,5)']]),
  'isophonics_191': (0.22916666666666666, [['Eb', 'G:7', 'C:(b3,5)']])},
 "Vissi D'Arte": {'isophonics_144': (0.5606060606060606,
   [['C:(b3,5)', 'Bb', 'G:min', 'F:min']])}}

In [13]:
sonar_map = sonar_meta.set_index("id").to_dict("index")

for sonar_matches in hsim_map_mini.values():
    for sonar_match in sonar_matches:
        print("{}:\tTitle: {}; Artist: {}".format(
            sonar_match,
            sonar_map[sonar_match]["title"],
            sonar_map[sonar_match]["artist"])
        )

isophonics_61:	Title: 12_-_Wait; Artist: The Beatles
isophonics_152:	Title: 06_-_Tigerfest; Artist: Zweieck
isophonics_164:	Title: 15_-_Es_Wird_Alles_Wieder_Gut,_Herr_Professor; Artist: Zweieck
isophonics_191:	Title: 03_-_I'm_Only_Sleeping; Artist: The Beatles
isophonics_144:	Title: 12_-_Jakob_Und_Marie; Artist: Zweieck
