In [1]:
target_composer_names = ['Wolfgang Amadeus Mozart', 'Bejamin Britten']

embeddings_path = '../data/embeddings/composer-embeddings-c2v-dbow-5000-10000.h5'

In [2]:
from difflib import SequenceMatcher
import glob
import re

import h5py
import numpy as np
import pandas as pd
import scipy
from scipy.spatial import distance

  return f(*args, **kwds)
  return f(*args, **kwds)


In [3]:
all_composers = [(i, *c) for i, c in enumerate(pd.read_csv('../data/composers.csv', sep='|').values)]

In [4]:
def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

def name_to_composer_id(name):
    composer = max(all_composers, key=lambda c: similar(c[1], name))
    composer_id = composer[0]
    print('Assuming {}: born {}; died {}; composer_id: {}'.format(composer[1], composer[2], composer[3], composer[0]))
    return composer_id

target_ids = [name_to_composer_id(name) for name in target_composer_names]

Assuming Wolfgang Amadeus Mozart: born 1756; died 1791; composer_id: 2489
Assuming Benjamin Britten: born 1913; died 1976; composer_id: 774


In [5]:
def path_to_embedding(path):
    with h5py.File(path, 'r') as f:
        return f.get('doc_embeddings').value

In [6]:
embeddings = path_to_embedding(embeddings_path)

In [7]:
distances = distance.cdist(embeddings, embeddings, metric='cosine')
closest = distances.argsort()

In [8]:
for t_id, t_name in zip(target_ids, target_composer_names):
    print('Most similar to {}:'.format(t_name))
    for c_id in closest[t_id, 1:6]:
        print((all_composers[c_id][1], all_composers[c_id][-1]))

Most similar to Wolfgang Amadeus Mozart:
('Ludwig van Beethoven', 'https://en.wikipedia.org/wiki/Ludwig_van_Beethoven')
('Joseph Haydn', 'https://en.wikipedia.org/wiki/Joseph_Haydn')
('Leopold Mozart', 'https://en.wikipedia.org/wiki/Leopold_Mozart')
('Antonio Salieri', 'https://en.wikipedia.org/wiki/Antonio_Salieri')
('Christoph Willibald Gluck', 'https://en.wikipedia.org/wiki/Christoph_Willibald_Gluck')
Most similar to Bejamin Britten:
('Michael Tippett', 'https://en.wikipedia.org/wiki/Michael_Tippett')
('William Walton', 'https://en.wikipedia.org/wiki/William_Walton')
('Ralph Vaughan Williams', 'https://en.wikipedia.org/wiki/Ralph_Vaughan_Williams')
('Gustav Holst', 'https://en.wikipedia.org/wiki/Gustav_Holst')
('Arthur Bliss', 'https://en.wikipedia.org/wiki/Arthur_Bliss')
