-
-
Notifications
You must be signed in to change notification settings - Fork 20
/
annoy.py
72 lines (57 loc) · 2.58 KB
/
annoy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import requests
import ujson
from troi import Element, Recording, PipelineError
"""
-> annoy similarity (timbre - similar sounding)
-> filter by similar artists (dataset name -> credit id -> similar artists)
-> later, add in collab filtering similar artists
-> recordings from a given year/range
"""
VALID_METRICS = ['mfccs', 'mfccsw', 'gfccs', 'gfccsw', 'key', 'bpm', 'onsetrate', 'moods',
'instruments', 'dortmund', 'rosamerica', 'tzanetakis']
class AnnoyLookupElement(Element):
"""
Given an recording MBID, lookup tracks that are similar given some
criteria (e.g. mfccs, gfccs, etc).
"""
SERVER_URL = "https://acousticbrainz.org/api/v1/similarity/"
def __init__(self, metric, mbid):
"""
The given recording mbid is the source track that will be looked up
in the annoy index using the passed metric.
"""
super().__init__()
self.mbid = mbid
if metric.lower() not in VALID_METRICS:
raise PipelineError("metric %s is not valid. Must be one of %s" % (metric, '.'.join(VALID_METRICS)))
self.metric = metric
def outputs(self):
return [Recording]
def read(self, inputs):
self.debug("read for %s/%s" % (self.metric, self.mbid))
url = self.SERVER_URL + self.metric + "/"
self.debug(f"url: {url}")
# recording_ids format is mbid:offset;mbid:offset
r = requests.get(url, params={'remove_dups': 'all',
'recording_ids': self.mbid + ":0",
'n_neighbours': 1000})
if r.status_code != 200:
raise PipelineError("Cannot fetch annoy similarities from AcousticBrainz: HTTP code %d" % r.status_code)
try:
results = ujson.loads(r.text)
except ValueError as err:
raise PipelineError("Cannot fetch annoy similarities from AcousticBrainz: Invalid JSON returned: " + str(err))
entities = []
for row in results[self.mbid]["0"]:
r = Recording(mbid=row['recording_mbid'],
acousticbrainz={
'metric': self.metric,
'similarity_from': self.mbid,
'similarity': row['distance'],
'offset': row['offset']
}
)
r.add_note("Related to %s with metric %s" % (self.mbid, self.metric))
entities.append(r)
self.debug("read %d recordings" % len(entities))
return entities