Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LB-221: Only calculate stats of users who haven't been done recently #268

Merged
merged 2 commits into from
Oct 31, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
35 changes: 27 additions & 8 deletions listenbrainz/db/tests/test_user.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# -*- coding: utf-8 -*-
import time

import listenbrainz.db.user as db_user
import sqlalchemy
import time
import ujson

import listenbrainz.db.user as db_user
from listenbrainz import db
from listenbrainz.db.testing import DatabaseTestCase

Expand Down Expand Up @@ -71,12 +72,11 @@ def test_increase_latest_import(self):
user = db_user.get_by_mb_id(user['musicbrainz_id'])
self.assertEqual(val, int(user['latest_import'].strftime('%s')))

def test_get_recently_logged_in_users(self):
"""Tests getting recently logged in users"""
def test_get_users_with_uncalculated_stats(self):

# create two users, set one's last_login
# to a very old value and one's last_login
# to now and then call get_recently_logged_in_users
# to now and then call the function
user1 = db_user.get_or_create('recentuser1')
with db.engine.connect() as connection:
connection.execute(sqlalchemy.text("""
Expand All @@ -97,9 +97,28 @@ def test_get_recently_logged_in_users(self):
'musicbrainz_id': 'recentuser2'
})

recent_users = db_user.get_recently_logged_in_users()
self.assertEqual(len(recent_users), 1)
self.assertEqual(recent_users[0]['musicbrainz_id'], 'recentuser2')
users_with_uncalculated_stats = db_user.get_users_with_uncalculated_stats()
self.assertEqual(len(users_with_uncalculated_stats), 1)
self.assertEqual(users_with_uncalculated_stats[0]['musicbrainz_id'], 'recentuser2')


# now if we've calculated the stats for user2 recently (just now)
# then the function shouldn't return user2

# put some data in the stats table for user2
with db.engine.connect() as connection:
connection.execute(sqlalchemy.text("""
INSERT INTO statistics.user (user_id, artist, release, recording, last_updated)
VALUES (:user_id, :artist, :release, :recording, NOW())
"""), {
'user_id': user2['id'],
'artist': ujson.dumps({}),
'release': ujson.dumps({}),
'recording': ujson.dumps({}),
})

users_with_uncalculated_stats = db_user.get_users_with_uncalculated_stats()
self.assertListEqual(users_with_uncalculated_stats, [])

def test_reset_latest_import(self):
user = db_user.get_or_create('resetlatestimportuser')
Expand Down
19 changes: 11 additions & 8 deletions listenbrainz/db/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,16 +232,19 @@ def reset_latest_import(musicbrainz_id):
update_latest_import(musicbrainz_id, 0)


def get_recently_logged_in_users():
"""Returns a list of users who have logged-in in the
last config.STATS_CALCULATION_LOGIN_TIME days
"""
def get_users_with_uncalculated_stats():

with db.engine.connect() as connection:
result = connection.execute(sqlalchemy.text("""
SELECT {columns}
FROM "user"
WHERE last_login >= NOW() - INTERVAL ':x days'
SELECT public."user".musicbrainz_id
FROM public."user"
LEFT JOIN statistics.user
ON public."user".id = statistics.user.user_id
WHERE public."user".last_login >= NOW() - INTERVAL ':x days'
AND (statistics.user.last_updated IS NULL OR statistics.user.last_updated < NOW() - INTERVAL ':y days')
""".format(columns=','.join(USER_GET_COLUMNS))), {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

columns is not longer used, right?

'x': config.STATS_CALCULATION_LOGIN_TIME
'x': config.STATS_CALCULATION_LOGIN_TIME,
'y': config.STATS_CALCULATION_INTERVAL,
})

return [dict(row) for row in result]
8 changes: 6 additions & 2 deletions listenbrainz/stats/calculate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
import listenbrainz.stats.user as stats_user
import time

from listenbrainz import db
from listenbrainz import config
from listenbrainz import db
from listenbrainz import stats


def calculate_user_stats():
for user in db_user.get_recently_logged_in_users():
"""Get the users we need to calculate our statistics for and calculate their stats.
"""

for user in db_user.get_users_with_uncalculated_stats():
recordings = stats_user.get_top_recordings(musicbrainz_id=user['musicbrainz_id'])
artists = stats_user.get_top_artists(musicbrainz_id=user['musicbrainz_id'])
releases = stats_user.get_top_releases(musicbrainz_id=user['musicbrainz_id'])
Expand All @@ -24,6 +27,7 @@ def calculate_user_stats():
artist_count=artist_count
)


def calculate_stats():
calculate_user_stats()

Expand Down