From 47dfe7481f1cc60c7e2dea8516fc6ed29ea2d26e Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Mon, 11 Apr 2022 15:51:24 +0200 Subject: [PATCH 1/2] Remove AB related stuff. Move YIM stuff to interal folder. --- troi/acousticbrainz/__init__.py | 0 troi/acousticbrainz/annoy.py | 72 ------------------ troi/acousticbrainz/bpm_lookup.py | 70 ------------------ troi/acousticbrainz/mood_lookup.py | 70 ------------------ troi/acousticbrainz/tests/test_annoy.py | 74 ------------------- troi/internal/README.md | 3 + .../top_discoveries_for_year.py | 0 .../top_missed_recordings_for_year.py | 0 ...new_recordings_you_listened_to_for_year.py | 0 .../top_recordings_for_year.py | 0 .../top_sitewide_recordings_for_year.py | 0 .../{patches => internal}/yim_patch_runner.py | 0 troi/patches/ab_similar_recordings.py | 57 -------------- troi/tests/test_utils.py | 9 ++- 14 files changed, 8 insertions(+), 347 deletions(-) delete mode 100644 troi/acousticbrainz/__init__.py delete mode 100644 troi/acousticbrainz/annoy.py delete mode 100644 troi/acousticbrainz/bpm_lookup.py delete mode 100644 troi/acousticbrainz/mood_lookup.py delete mode 100644 troi/acousticbrainz/tests/test_annoy.py create mode 100644 troi/internal/README.md rename troi/{patches => internal}/top_discoveries_for_year.py (100%) rename troi/{patches => internal}/top_missed_recordings_for_year.py (100%) rename troi/{patches => internal}/top_new_recordings_you_listened_to_for_year.py (100%) rename troi/{patches => internal}/top_recordings_for_year.py (100%) rename troi/{patches => internal}/top_sitewide_recordings_for_year.py (100%) rename troi/{patches => internal}/yim_patch_runner.py (100%) delete mode 100755 troi/patches/ab_similar_recordings.py diff --git a/troi/acousticbrainz/__init__.py b/troi/acousticbrainz/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/troi/acousticbrainz/annoy.py b/troi/acousticbrainz/annoy.py deleted file mode 100644 index 5ccc1f93..00000000 --- a/troi/acousticbrainz/annoy.py +++ /dev/null @@ -1,72 +0,0 @@ -import requests -import ujson - -from troi import Element, Recording, PipelineError - -""" --> annoy similarity (timbre - similar sounding) --> filter by similar artists (dataset name -> credit id -> similar artists) - -> later, add in collab filtering similar artists --> recordings from a given year/range -""" - -VALID_METRICS = ['mfccs', 'mfccsw', 'gfccs', 'gfccsw', 'key', 'bpm', 'onsetrate', 'moods', - 'instruments', 'dortmund', 'rosamerica', 'tzanetakis'] - - -class AnnoyLookupElement(Element): - """ - Given an recording MBID, lookup tracks that are similar given some - criteria (e.g. mfccs, gfccs, etc). - """ - - SERVER_URL = "https://acousticbrainz.org/api/v1/similarity/" - - def __init__(self, metric, mbid): - """ - The given recording mbid is the source track that will be looked up - in the annoy index using the passed metric. - """ - super().__init__() - self.mbid = mbid - - if metric.lower() not in VALID_METRICS: - raise PipelineError("metric %s is not valid. Must be one of %s" % (metric, '.'.join(VALID_METRICS))) - self.metric = metric - - def outputs(self): - return [Recording] - - def read(self, inputs): - self.debug("read for %s/%s" % (self.metric, self.mbid)) - - url = self.SERVER_URL + self.metric + "/" - self.debug(f"url: {url}") - # recording_ids format is mbid:offset;mbid:offset - r = requests.get(url, params={'remove_dups': 'all', - 'recording_ids': self.mbid + ":0", - 'n_neighbours': 1000}) - if r.status_code != 200: - raise PipelineError("Cannot fetch annoy similarities from AcousticBrainz: HTTP code %d" % r.status_code) - - try: - results = ujson.loads(r.text) - except ValueError as err: - raise PipelineError("Cannot fetch annoy similarities from AcousticBrainz: Invalid JSON returned: " + str(err)) - - entities = [] - for row in results[self.mbid]["0"]: - r = Recording(mbid=row['recording_mbid'], - acousticbrainz={ - 'metric': self.metric, - 'similarity_from': self.mbid, - 'similarity': row['distance'], - 'offset': row['offset'] - } - ) - r.add_note("Related to %s with metric %s" % (self.mbid, self.metric)) - entities.append(r) - - self.debug("read %d recordings" % len(entities)) - - return entities diff --git a/troi/acousticbrainz/bpm_lookup.py b/troi/acousticbrainz/bpm_lookup.py deleted file mode 100644 index 9eb67573..00000000 --- a/troi/acousticbrainz/bpm_lookup.py +++ /dev/null @@ -1,70 +0,0 @@ -import re -import sys -import uuid -from urllib.parse import quote - -import requests -import ujson - -from troi import Element, Artist, Recording, PipelineError - -class BPMLookupElement(Element): - ''' - Look up musicbrainz earliest release year for a list of recordings, based on artist credit name and recording name. - - By default items that are not found in the year lookup are not returned by this element. Pass - skip_not_found=False to init to keep tracks that failed the year lookup. - - ''' - - SERVER_URL = "https://bono.metabrainz.org/bpm-key-lookup/json" - - def __init__(self, skip_not_found=True): - Element.__init__(self) - self.skip_not_found = skip_not_found - - @staticmethod - def inputs(): - return [ Recording ] - - @staticmethod - def outputs(): - return [ Recording ] - - def read(self, inputs): - - recordings = inputs[0] - if not recordings: - return [] - - data = [] - for r in recordings: - data.append({ '[recording_mbid]': r.mbid }) - - r = requests.post(self.SERVER_URL, json=data) - if r.status_code != 200: - raise PipelineError("Cannot fetch recording BPM from datasets: HTTP code %d" % r.status_code) - - try: - rows = ujson.loads(r.text) - except ValueError as err: - raise PipelineError("Cannot fetch recording BPM from datasets: " + str(err)) - - mbid_index = {} - for row in rows: - mbid_index[row['recording_mbid']] = row['bpm'] - - output = [] - for r in recordings: - try: - r.acousticbrainz["bpm"] = mbid_index[r.mbid] - except KeyError: - if self.skip_not_found: - self.debug("recording (%s) not found, skipping." % (r.mbid)) - else: - output.append(r) - continue - - output.append(r) - - return output diff --git a/troi/acousticbrainz/mood_lookup.py b/troi/acousticbrainz/mood_lookup.py deleted file mode 100644 index fe7abe2d..00000000 --- a/troi/acousticbrainz/mood_lookup.py +++ /dev/null @@ -1,70 +0,0 @@ -import requests - -from troi import Element, Artist, PipelineError, Recording - - -def chunks(lst, n): - """ Break a list into roughly equally spaced chunks """ - for i in range(0, len(lst), n): - yield lst[i:i + n] - -class MoodLookupElement(Element): - ''' - Lookup moods and store them in the acousticbrainz element of the Recordings: - - Once loaded, the acousticbrainz dict shuld contain a moods key with the following sub-keys: - mood_acoustic - aggressive - electronic - happy - party - relaxed - sad - - Each with a float value from 0->1.0 - ''' - - SERVER_URL = "http://acousticbrainz.org/api/v1/high-level?recording_ids=" - - def __init__(self, skip_not_found=True): - Element.__init__(self) - self.skip_not_found = skip_not_found - - @staticmethod - def inputs(): - return [ Recording ] - - @staticmethod - def outputs(): - return [ Recording ] - - - def read(self, inputs): - - recordings = inputs[0] - if not recordings: - return [] - - max_items_per_call = 25 - output = [] - for rec_chunk in chunks(recordings, max_items_per_call): - mbids = [ r.mbid for r in rec_chunk ] - r = requests.post(self.SERVER_URL + ";".join(mbids)) - if r.status_code != 200: - raise PipelineError("Cannot fetch moods from AcousticBrainz: HTTP code %d" % r.status_code) - - data = r.json() - for r in recordings: - if r.mbid not in data: - if not self.skip_not_found: - output.append(r) - continue - - moods = {} - for mood in ("acoustic", "aggressive", "electronic", "happy", "party", "relaxed", "sad"): - moods["mood_" + mood] = data[r.mbid]["0"]['highlevel']["mood_" + mood]["all"][mood] - - r.acousticbrainz["moods"] = moods - output.append(r) - - return output diff --git a/troi/acousticbrainz/tests/test_annoy.py b/troi/acousticbrainz/tests/test_annoy.py deleted file mode 100644 index 7e65c4e4..00000000 --- a/troi/acousticbrainz/tests/test_annoy.py +++ /dev/null @@ -1,74 +0,0 @@ -import json -import unittest -import unittest.mock - -import troi -import troi.acousticbrainz.annoy -from troi import PipelineError - -return_json = { - "8f8cc91f-0bca-4351-90d4-ef334ac0a0cf": { - "0": [ - { - "distance": 0.00010826535435626283, - "offset": 0, - "recording_mbid": "7b3ecb51-919b-494d-8085-47e3390dd212" - }, - { - "distance": 0.3184245228767395, - "offset": 0, - "recording_mbid": "724335d8-4ae6-4b2d-8be5-056944b8132d" - }, - { - "distance": 0.3630277216434479, - "offset": 0, - "recording_mbid": "d4b46b96-ab56-4f99-a59e-bab590deed8f" - }, - { - "distance": 0.3671037256717682, - "offset": 0, - "recording_mbid": "441a879f-4b8c-4d52-8a6f-e289bef2fd83" - }, - { - "distance": 0.3676069676876068, - "offset": 0, - "recording_mbid": "b26b7d9f-dd39-4b74-89f0-a534d6bbf556" - }, - { - "distance": 0.385963499546051, - "offset": 0, - "recording_mbid": "c1d750f3-93ea-4e0e-85f0-8baa8548c97a" - }, - ] - } -} - - -class TestAnnoyLookupElement(unittest.TestCase): - - @unittest.mock.patch('requests.get') - def test_read(self, req): - - mock = unittest.mock.MagicMock() - mock.status_code = 200 - mock.text = json.dumps(return_json) - req.return_value = mock - e = troi.acousticbrainz.annoy.AnnoyLookupElement("mfccs", "8f8cc91f-0bca-4351-90d4-ef334ac0a0cf") - - entities = e.read([[]]) - req.assert_called_with(e.SERVER_URL + "mfccs/", params={ - "remove_dups": "all", "recording_ids": "8f8cc91f-0bca-4351-90d4-ef334ac0a0cf:0", 'n_neighbours': 1000 - }) - - assert len(entities) == 6 - assert entities[0].acousticbrainz == { - "metric": "mfccs", - "similarity_from": "8f8cc91f-0bca-4351-90d4-ef334ac0a0cf", - "similarity": 0.00010826535435626283, - "offset": 0, - } - assert entities[0].mbid == "7b3ecb51-919b-494d-8085-47e3390dd212" - - def test_invalid_metric(self): - with self.assertRaises(PipelineError): - troi.acousticbrainz.annoy.AnnoyLookupElement("foo", "8f8cc91f-0bca-4351-90d4-ef334ac0a0cf") diff --git a/troi/internal/README.md b/troi/internal/README.md new file mode 100644 index 00000000..20ace366 --- /dev/null +++ b/troi/internal/README.md @@ -0,0 +1,3 @@ +This directory contains patches that we use to create the Year in Music reviews at the end of the year. +Given the limited use, they've been moved away from the other patches in order to not confuse the normal +flow of the other patches diff --git a/troi/patches/top_discoveries_for_year.py b/troi/internal/top_discoveries_for_year.py similarity index 100% rename from troi/patches/top_discoveries_for_year.py rename to troi/internal/top_discoveries_for_year.py diff --git a/troi/patches/top_missed_recordings_for_year.py b/troi/internal/top_missed_recordings_for_year.py similarity index 100% rename from troi/patches/top_missed_recordings_for_year.py rename to troi/internal/top_missed_recordings_for_year.py diff --git a/troi/patches/top_new_recordings_you_listened_to_for_year.py b/troi/internal/top_new_recordings_you_listened_to_for_year.py similarity index 100% rename from troi/patches/top_new_recordings_you_listened_to_for_year.py rename to troi/internal/top_new_recordings_you_listened_to_for_year.py diff --git a/troi/patches/top_recordings_for_year.py b/troi/internal/top_recordings_for_year.py similarity index 100% rename from troi/patches/top_recordings_for_year.py rename to troi/internal/top_recordings_for_year.py diff --git a/troi/patches/top_sitewide_recordings_for_year.py b/troi/internal/top_sitewide_recordings_for_year.py similarity index 100% rename from troi/patches/top_sitewide_recordings_for_year.py rename to troi/internal/top_sitewide_recordings_for_year.py diff --git a/troi/patches/yim_patch_runner.py b/troi/internal/yim_patch_runner.py similarity index 100% rename from troi/patches/yim_patch_runner.py rename to troi/internal/yim_patch_runner.py diff --git a/troi/patches/ab_similar_recordings.py b/troi/patches/ab_similar_recordings.py deleted file mode 100755 index 00afcdac..00000000 --- a/troi/patches/ab_similar_recordings.py +++ /dev/null @@ -1,57 +0,0 @@ -import click - -import troi -import troi.patch -import troi.filters -import troi.utils -import troi.playlist -import troi.listenbrainz.recs -import troi.musicbrainz.recording_lookup -from troi.acousticbrainz import annoy - - -@click.group() -def cli(): - pass - - -class ABSimilarRecordingsPatch(troi.patch.Patch): - - @staticmethod - @cli.command(no_args_is_help=True) - @click.argument('recording_id') - @click.argument('similarity_type') - def parse_args(**kwargs): - """ - Find acoustically similar recordings from AcousticBrainz. - - \b - RECORDING_ID: A musicbrainz recording ID to find similar recordings to - SIMILARITY_TYPE: an annoy similarity type to use when finding similar recordings - """ - return kwargs - - @staticmethod - def slug(): - return "ab-similar-recordings" - - @staticmethod - def description(): - return "Find acoustically similar recordings from AcousticBrainz" - - def create(self, inputs, patch_args): - recording_id = inputs['recording_id'] - similarity_type = inputs['similarity_type'] - - annoy_element = annoy.AnnoyLookupElement(similarity_type, recording_id) - - r_lookup = troi.musicbrainz.recording_lookup.RecordingLookupElement() - r_lookup.set_sources(annoy_element) - - dedup_filter = troi.filters.DuplicateRecordingArtistCreditFilterElement() - dedup_filter.set_sources(r_lookup) - - pl_maker = troi.playlist.PlaylistMakerElement("Annoy test playlist", "Annoy test playlist", patch_slug=self.slug(), max_num_recordings=50) - pl_maker.set_sources(dedup_filter) - - return pl_maker diff --git a/troi/tests/test_utils.py b/troi/tests/test_utils.py index e036adbc..7ea4c272 100644 --- a/troi/tests/test_utils.py +++ b/troi/tests/test_utils.py @@ -9,11 +9,12 @@ class TestPatches(unittest.TestCase): def test_discover_patches(self): patches = discover_patches() - assert len(patches) == 12 + assert len(patches) == 5 assert "daily-jams" in patches assert "area-random-recordings" in patches - assert "ab-similar-recordings" in patches assert "weekly-flashback-jams" in patches + assert "playlist-from-mbids" in patches + assert "world-trip" in patches - assert issubclass(patches['daily-jams'], Patch) - assert issubclass(patches['area-random-recordings'], Patch) + for p in patches: + assert issubclass(patches[p], Patch) From dc1b7f9d45883fc40ea115ab1c3129564bccbaa9 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Mon, 11 Apr 2022 17:18:52 +0200 Subject: [PATCH 2/2] Fix mapping test --- troi/musicbrainz/mbid_mapping.py | 1 + troi/musicbrainz/tests/test_mbid_mapping.py | 27 ++++++++++++++++----- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/troi/musicbrainz/mbid_mapping.py b/troi/musicbrainz/mbid_mapping.py index bee8823f..7d9128be 100644 --- a/troi/musicbrainz/mbid_mapping.py +++ b/troi/musicbrainz/mbid_mapping.py @@ -48,6 +48,7 @@ def read(self, inputs): r.add_note("recording mbid %s overwritten by mbid_lookup" % (r.mbid)) r.mbid = row['recording_mbid'] r.name = row['recording_name'] + r.year = row['year'] if r.artist.artist_credit_id: r.artist.add_note("artist_credit_id %d overwritten by mbid_lookup" % (r.artist.artist_credit_id)) diff --git a/troi/musicbrainz/tests/test_mbid_mapping.py b/troi/musicbrainz/tests/test_mbid_mapping.py index 8d2adf78..d508bdfb 100644 --- a/troi/musicbrainz/tests/test_mbid_mapping.py +++ b/troi/musicbrainz/tests/test_mbid_mapping.py @@ -15,19 +15,33 @@ "recording_mbid": "97e69767-5d34-4c97-b36a-f3b2b1ef9dae", "recording_name": "Trigger Hippie", "release_mbid": "9db51cd6-38f6-3b42-8ad5-559963d68f35", - "release_name": "Who Can You Trust?" + "release_name": "Who Can You Trust?", + "year": 1996 } ] + +def mocked_requests_post(*args, **kwargs): + class MockResponse: + def __init__(self, json_data, status_code): + self.json_data = json_data + self.status_code = status_code + + def json(self): + return self.json_data + + e = troi.musicbrainz.mbid_mapping.MBIDMappingLookupElement() + if args[0] == e.SERVER_URL: + return MockResponse(return_data, 200) + + return MockResponse(None, 404) + + class TestMBIDMapping(unittest.TestCase): - @unittest.mock.patch('requests.post') + @unittest.mock.patch('requests.post', side_effect=mocked_requests_post) def test_read(self, req): - mock = unittest.mock.MagicMock() - mock.status_code = 200 - mock.text = json.dumps(return_data) - req.return_value = mock e = troi.musicbrainz.mbid_mapping.MBIDMappingLookupElement() r = [ troi.Recording("trigger hippie", artist=troi.Artist("morcheeba")) ] @@ -41,6 +55,7 @@ def test_read(self, req): assert entities[0].release.name == "Who Can You Trust?" assert entities[0].mbid == "97e69767-5d34-4c97-b36a-f3b2b1ef9dae" assert entities[0].name == "Trigger Hippie" + assert entities[0].year == 1996 @unittest.mock.patch('requests.post')