In [185]:
names = [
    'Wolfgang Amadeus Mozart',
    'Joseph Haydn',
    'Maurice Ravel',
    'Claude Debussy',
    'Thomas Tallis',
    'William Byrd',
]

name_pairs = [
    ('Wolfgang Amadeus Mozart', 'Joseph Haydn'),
    ('Maurice Ravel', 'Claude Debussy'),
    ('Thomas Tallis', 'William Byrd'),
]

embeddings_path = '../data/embeddings/*.h5'

In [186]:
from difflib import SequenceMatcher
import glob
import re

import h5py
import numpy as np
import pandas as pd
from scipy.spatial import distance

In [187]:
all_composers = [(i, *c) for i, c in enumerate(pd.read_csv('../data/composers.csv', sep='|').values)]

In [188]:
def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

def name_to_composer_id(name):
    composer = max(all_composers, key=lambda c: similar(c[1], name))
    composer_id = composer[0]
    print('Assuming {}: born {}; died {}; composer_id: {}'.format(composer[1], composer[2], composer[3], composer[0]))
    return composer_id

id_pairs = [(name_to_composer_id(a), name_to_composer_id(b)) for a, b in name_pairs]

Assuming Wolfgang Amadeus Mozart: born 1756; died 1791; composer_id: 2489
Assuming Joseph Haydn: born 1732; died 1809; composer_id: 1747
Assuming Maurice Ravel: born 1875; died 1937; composer_id: 2784
Assuming Claude Debussy: born 1862; died 1918; composer_id: 1136
Assuming Thomas Tallis: born 1505; died 1585; composer_id: 3204
Assuming William Byrd: born 1540; died 1623; composer_id: 857


In [189]:
def path_to_epoch(path):
    return int(re.search(r'\-(\d*)\-\d*', path).groups()[0])


def path_to_embedding(path):
    with h5py.File(path, 'r') as f:
        return f.get('composer_embeddings').value

In [190]:
embeddings_by_epoch = {path_to_epoch(path): path_to_embedding(path) for path in glob.glob(embeddings_path)}

In [191]:
distances_per_pair = []

for id_a, id_b in id_pairs:
    distances_by_epoch = []
    for epoch, embeddings in embeddings_by_epoch.items():
        d = distance.cosine(embeddings[id_a], embeddings[id_b])
        distances_by_epoch.append((epoch, d))
    distances_per_pair.append(sorted(distances_by_epoch, key=lambda tup: tup[0]))

In [192]:
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.palettes import Plasma4

output_notebook()

x_axis = [epoch for epoch, _ in distances_per_pair[0]]

plot = figure(title='Similarity by epoch',
              x_axis_label='Epochs',
              y_axis_label='Similarity')

for distances_by_epoch, (name_a, name_b), color in zip(distances_per_pair, name_pairs, Plasma4):
    distances = [d for _, d in distances_by_epoch]
    plot.line(x_axis, 1 - np.array(distances), legend='{} & {}'.format(name_a, name_b), line_width=2, line_color=color)

plot.plot_height = 500
plot.plot_width = 800
plot.legend.location = 'bottom_right'

show(plot)

In [193]:
import itertools

mozart_pairs = [(name_to_composer_id(names[0]), name_to_composer_id(n)) for n in names[1:]]

distances_per_pair = []

for id_a, id_b in mozart_pairs:
    distances_by_epoch = []
    for epoch, embeddings in embeddings_by_epoch.items():
        d = distance.cosine(embeddings[id_a], embeddings[id_b])
        distances_by_epoch.append((epoch, d))
    distances_per_pair.append(sorted(distances_by_epoch, key=lambda tup: tup[0]))

Assuming Wolfgang Amadeus Mozart: born 1756; died 1791; composer_id: 2489
Assuming Joseph Haydn: born 1732; died 1809; composer_id: 1747
Assuming Wolfgang Amadeus Mozart: born 1756; died 1791; composer_id: 2489
Assuming Maurice Ravel: born 1875; died 1937; composer_id: 2784
Assuming Wolfgang Amadeus Mozart: born 1756; died 1791; composer_id: 2489
Assuming Claude Debussy: born 1862; died 1918; composer_id: 1136
Assuming Wolfgang Amadeus Mozart: born 1756; died 1791; composer_id: 2489
Assuming Thomas Tallis: born 1505; died 1585; composer_id: 3204
Assuming Wolfgang Amadeus Mozart: born 1756; died 1791; composer_id: 2489
Assuming William Byrd: born 1540; died 1623; composer_id: 857


In [196]:
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.palettes import Plasma6

output_notebook()

x_axis = [epoch for epoch, _ in distances_per_pair[0]]

plot = figure(title='Similarity to Mozart by epoch',
              x_axis_label='Epochs',
              y_axis_label='Similarity')

for distances_by_epoch, name, color in zip(distances_per_pair, names[1:], Plasma6):
    distances = [d for _, d in distances_by_epoch]
    plot.line(x_axis, 1 - np.array(distances), legend=name, line_width=2, line_color=color)

plot.plot_height = 500
plot.plot_width = 800
plot.legend.location = 'top_left'

show(plot)