From 78f926cb3832e34b253df8378a73d78237fce138 Mon Sep 17 00:00:00 2001 From: Param Singh Date: Sun, 10 Nov 2019 14:33:12 +0000 Subject: [PATCH 1/5] Safe import config --- listenbrainz/db/stats.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/listenbrainz/db/stats.py b/listenbrainz/db/stats.py index d6a8817100..2ebd0360cf 100644 --- a/listenbrainz/db/stats.py +++ b/listenbrainz/db/stats.py @@ -25,7 +25,8 @@ import ujson from listenbrainz import db -from listenbrainz import config +from listenbrainz.utils import safely_import_config +safely_import_config() def insert_user_stats(user_id, artists, recordings, releases, artist_count, yearmonth): """Inserts user stats calculated from Spark into the database. From d613e3c9a81a2ae1ef163b8b00bfb1d565893291 Mon Sep 17 00:00:00 2001 From: Param Singh Date: Sun, 10 Nov 2019 14:37:10 +0000 Subject: [PATCH 2/5] Add a log --- listenbrainz/spark/spark_reader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/listenbrainz/spark/spark_reader.py b/listenbrainz/spark/spark_reader.py index 926626da55..dbdf07fbbe 100644 --- a/listenbrainz/spark/spark_reader.py +++ b/listenbrainz/spark/spark_reader.py @@ -71,6 +71,7 @@ def start(self): queue=current_app.config['SPARK_RESULT_QUEUE'], callback_function=self.callback, ) + print("Spark consumer started!") current_app.logger.info('Spark consumer started!') try: self.incoming_ch.start_consuming() From 1192e92c1ab257277e525e970dfc4b4f0fc4463c Mon Sep 17 00:00:00 2001 From: Param Singh Date: Sun, 10 Nov 2019 14:46:08 +0000 Subject: [PATCH 3/5] Only account for all time stats now We'll have to look into the best way to store monthly or other stats later, but it seems simple enough --- listenbrainz/db/stats.py | 13 ++++--------- listenbrainz/spark/spark_reader.py | 3 +-- listenbrainz/webserver/views/user.py | 2 +- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/listenbrainz/db/stats.py b/listenbrainz/db/stats.py index 2ebd0360cf..2d5b085b85 100644 --- a/listenbrainz/db/stats.py +++ b/listenbrainz/db/stats.py @@ -28,7 +28,7 @@ from listenbrainz.utils import safely_import_config safely_import_config() -def insert_user_stats(user_id, artists, recordings, releases, artist_count, yearmonth): +def insert_user_stats(user_id, artists, recordings, releases, artist_count): """Inserts user stats calculated from Spark into the database. If stats are already present for some user, they are updated to the new @@ -39,30 +39,25 @@ def insert_user_stats(user_id, artists, recordings, releases, artist_count, year recordings (dict): the top recordings listened to by the user releases (dict): the top releases listened to by the user artist_count (int): the total number of artists listened to by the user - yearmonth (str): a string representing the month in which the stats were calculated, - for example '2019-01' """ artist_stats = { 'count': artist_count, - 'top_month': { + 'all_time': { 'artists': artists, - 'month': yearmonth, } } recording_stats = { - 'top_month': { + 'all_time': { 'recordings': recordings, - 'month': yearmonth, }, } release_stats = { - 'top_month': { + 'all_time': { 'releases': releases, - 'month': yearmonth, } } diff --git a/listenbrainz/spark/spark_reader.py b/listenbrainz/spark/spark_reader.py index dbdf07fbbe..21757a8da8 100644 --- a/listenbrainz/spark/spark_reader.py +++ b/listenbrainz/spark/spark_reader.py @@ -45,8 +45,7 @@ def callback(self, ch, method, properties, body): recordings = metadata['recordings'] releases = metadata['releases'] artist_count = metadata['artists']['artist_count'] - yearmonth = metadata['yearmonth'] - db_stats.insert_user_stats(user['id'], artists, recordings, releases, artist_count, yearmonth) + db_stats.insert_user_stats(user['id'], artists, recordings, releases, artist_count) current_app.logger.info("data for {} published".format(username)) while True: diff --git a/listenbrainz/webserver/views/user.py b/listenbrainz/webserver/views/user.py index 51cadeef1c..37d8921cf2 100644 --- a/listenbrainz/webserver/views/user.py +++ b/listenbrainz/webserver/views/user.py @@ -189,7 +189,7 @@ def artists(user_name): flash.error(msg) return redirect(url_for('user.profile', user_name=user_name)) - top_artists = data.get('artist', {}).get('top_month', {}).get('artists', []) + top_artists = data.get('artist', {}).get('all_time', {}).get('artists', []) return render_template( "user/artists.html", From 5b3d020e46f88583506994b4b8e8c500f887f577 Mon Sep 17 00:00:00 2001 From: Param Singh Date: Sun, 10 Nov 2019 14:48:31 +0000 Subject: [PATCH 4/5] Remove print --- listenbrainz/spark/spark_reader.py | 1 - 1 file changed, 1 deletion(-) diff --git a/listenbrainz/spark/spark_reader.py b/listenbrainz/spark/spark_reader.py index 21757a8da8..59d84191ce 100644 --- a/listenbrainz/spark/spark_reader.py +++ b/listenbrainz/spark/spark_reader.py @@ -70,7 +70,6 @@ def start(self): queue=current_app.config['SPARK_RESULT_QUEUE'], callback_function=self.callback, ) - print("Spark consumer started!") current_app.logger.info('Spark consumer started!') try: self.incoming_ch.start_consuming() From b0b76a60f80c4fb258bf8d8ddc6d5ec1a7de3ae4 Mon Sep 17 00:00:00 2001 From: Param Singh Date: Sun, 10 Nov 2019 15:02:09 +0000 Subject: [PATCH 5/5] Only store all time stats for now Also, fix another config bug --- listenbrainz/db/stats.py | 10 +++++----- listenbrainz/db/tests/test_stats.py | 20 +++++++++---------- listenbrainz/db/tests/test_user.py | 1 - listenbrainz/webserver/views/profile.py | 4 ++-- .../webserver/views/test/test_user.py | 3 --- listenbrainz/webserver/views/user.py | 2 +- 6 files changed, 17 insertions(+), 23 deletions(-) diff --git a/listenbrainz/db/stats.py b/listenbrainz/db/stats.py index 2d5b085b85..9df5b26cbb 100644 --- a/listenbrainz/db/stats.py +++ b/listenbrainz/db/stats.py @@ -25,8 +25,6 @@ import ujson from listenbrainz import db -from listenbrainz.utils import safely_import_config -safely_import_config() def insert_user_stats(user_id, artists, recordings, releases, artist_count): """Inserts user stats calculated from Spark into the database. @@ -170,12 +168,14 @@ def get_all_user_stats(user_id): return get_user_stats(user_id, 'artist, recording, release') -def valid_stats_exist(user_id): +def valid_stats_exist(user_id, days): """ Returns True if statistics for a user have been calculated in - the last week, and are present in the db + the last X days (where x is passed to the function), and are present in the db Args: user_id (int): the row ID of the user + days (int): the number of days in which stats should have been calculated + to consider them valid Returns: bool value signifying if valid stats exist for the user in the db @@ -189,7 +189,7 @@ def valid_stats_exist(user_id): AND last_updated >= NOW() - INTERVAL ':x days' """), { 'user_id': user_id, - 'x': config.STATS_CALCULATION_INTERVAL, + 'x': days, }) row = result.fetchone() return True if row is not None else False diff --git a/listenbrainz/db/tests/test_stats.py b/listenbrainz/db/tests/test_stats.py index f3e0b4047b..d658633626 100644 --- a/listenbrainz/db/tests/test_stats.py +++ b/listenbrainz/db/tests/test_stats.py @@ -29,14 +29,13 @@ def test_insert_user_stats(self): recordings=recordings, releases=releases, artist_count=2, - yearmonth='2019-01', ) result = db_stats.get_all_user_stats(user_id=self.user['id']) - self.assertListEqual(result['artist']['top_month']['artists'], artists) + self.assertListEqual(result['artist']['all_time']['artists'], artists) self.assertEqual(result['artist']['count'], 2) - self.assertListEqual(result['release']['top_month']['releases'], releases) - self.assertListEqual(result['recording']['top_month']['recordings'], recordings) + self.assertListEqual(result['release']['all_time']['releases'], releases) + self.assertListEqual(result['recording']['all_time']['recordings'], recordings) self.assertGreater(int(result['last_updated'].strftime('%s')), 0) def insert_test_data(self): @@ -55,7 +54,6 @@ def insert_test_data(self): recordings=recordings, releases=releases, artist_count=2, - yearmonth='2019-01', ) return { @@ -71,7 +69,7 @@ def test_get_user_stats(self): self.assertEqual(data['artist']['count'], 2) data = db_stats.get_user_stats(self.user['id'], 'recording') - self.assertListEqual(data['recording']['top_month']['recordings'], data_inserted['user_recordings']) + self.assertListEqual(data['recording']['all_time']['recordings'], data_inserted['user_recordings']) def test_get_user_artists(self): data_inserted = self.insert_test_data() @@ -81,14 +79,14 @@ def test_get_user_artists(self): def test_get_all_user_stats(self): data_inserted = self.insert_test_data() result = db_stats.get_all_user_stats(self.user['id']) - self.assertListEqual(result['artist']['top_month']['artists'], data_inserted['user_artists']) + self.assertListEqual(result['artist']['all_time']['artists'], data_inserted['user_artists']) self.assertEqual(result['artist']['count'], 2) - self.assertListEqual(result['release']['top_month']['releases'], data_inserted['user_releases']) - self.assertListEqual(result['recording']['top_month']['recordings'], data_inserted['user_recordings']) + self.assertListEqual(result['release']['all_time']['releases'], data_inserted['user_releases']) + self.assertListEqual(result['recording']['all_time']['recordings'], data_inserted['user_recordings']) self.assertGreater(int(result['last_updated'].strftime('%s')), 0) def test_valid_stats_exist(self): - self.assertFalse(db_stats.valid_stats_exist(self.user['id'])) + self.assertFalse(db_stats.valid_stats_exist(self.user['id'], 7)) self.insert_test_data() - self.assertTrue(db_stats.valid_stats_exist(self.user['id'])) + self.assertTrue(db_stats.valid_stats_exist(self.user['id'], 7)) diff --git a/listenbrainz/db/tests/test_user.py b/listenbrainz/db/tests/test_user.py index 256d22c295..03592854a2 100644 --- a/listenbrainz/db/tests/test_user.py +++ b/listenbrainz/db/tests/test_user.py @@ -135,7 +135,6 @@ def test_delete(self): recordings={}, releases={}, artist_count=2, - yearmonth='2019-01', ) user_stats = db_stats.get_all_user_stats(user_id) self.assertIsNotNone(user_stats) diff --git a/listenbrainz/webserver/views/profile.py b/listenbrainz/webserver/views/profile.py index 734ea9899f..94421d3583 100644 --- a/listenbrainz/webserver/views/profile.py +++ b/listenbrainz/webserver/views/profile.py @@ -91,7 +91,7 @@ def info(): # check if user is in stats calculation queue or if valid stats already exist in_stats_queue = _redis.redis.get(construct_stats_queue_key(current_user.musicbrainz_id)) == 'queued' try: - stats_exist = db_stats.valid_stats_exist(current_user.id) + stats_exist = db_stats.valid_stats_exist(current_user.id, current_app.config['STATS_CALCULATION_INTERVAL']) except DatabaseException: stats_exist = False @@ -170,7 +170,7 @@ def request_stats(): status = _redis.redis.get(construct_stats_queue_key(current_user.musicbrainz_id)) == 'queued' if status == 'queued': flash.info('You have already been added to the stats calculation queue! Please check back later.') - elif db_stats.valid_stats_exist(current_user.id): + elif db_stats.valid_stats_exist(current_user.id, current_app.config['STATS_CALCULATION_INTERVAL']): flash.info('Your stats were calculated in the most recent stats calculation interval,' ' please wait until the next interval! We calculate new statistics every Monday at 00:00 UTC.') else: diff --git a/listenbrainz/webserver/views/test/test_user.py b/listenbrainz/webserver/views/test/test_user.py index 04cb302140..1d48f1166b 100644 --- a/listenbrainz/webserver/views/test/test_user.py +++ b/listenbrainz/webserver/views/test/test_user.py @@ -71,7 +71,6 @@ def test_user_page(self): recordings={}, releases={}, artist_count=2, - yearmonth='2019-01', ) response = self.client.get(url_for('user.profile', user_name=self.user.musicbrainz_id)) self.assert200(response) @@ -161,7 +160,6 @@ def test_top_artists(self): recordings={}, releases={}, artist_count=2, - yearmonth='2019-01', ) r = self.client.get(url_for('user.artists', user_name=self.user.musicbrainz_id)) @@ -174,7 +172,6 @@ def test_top_artists(self): recordings={}, releases={}, artist_count=2, - yearmonth='2019-01', ) r = self.client.get(url_for('user.artists', user_name=self.user.musicbrainz_id)) diff --git a/listenbrainz/webserver/views/user.py b/listenbrainz/webserver/views/user.py index 37d8921cf2..e6e7cc9df4 100644 --- a/listenbrainz/webserver/views/user.py +++ b/listenbrainz/webserver/views/user.py @@ -16,7 +16,7 @@ from listenbrainz.webserver.influx_connection import _influx from listenbrainz.webserver.views.api_tools import publish_data_to_queue import time -from datetime import datetime +from datetime import datetime from werkzeug.exceptions import NotFound, BadRequest, RequestEntityTooLarge, InternalServerError LISTENS_PER_PAGE = 25