From d9999172a83bad33741270eda8639147cccf9506 Mon Sep 17 00:00:00 2001
From: Param Singh <paramsingh258@gmail.com>
Date: Mon, 3 Jul 2017 18:01:03 +0530
Subject: [PATCH 01/20] Refactor scheduler code to remove unneeded code

Also add some docstrings.
---
 listenbrainz/webserver/scheduler.py | 69 +++++------------------------
 1 file changed, 12 insertions(+), 57 deletions(-)

diff --git a/listenbrainz/webserver/scheduler.py b/listenbrainz/webserver/scheduler.py
index 349e6b43ed..0f78333657 100644
--- a/listenbrainz/webserver/scheduler.py
+++ b/listenbrainz/webserver/scheduler.py
@@ -1,74 +1,29 @@
-from sqlalchemy import create_engine, text
-from sqlalchemy.pool import NullPool
-import sqlalchemy.exc
-from apscheduler.schedulers.background import BackgroundScheduler
+""" This module contains code that triggers jobs we want to run
+    on regular intervals.
+"""
 import logging
+from apscheduler.schedulers.background import BackgroundScheduler
 
-# Create a cron job to clean postgres database
 class ScheduledJobs():
     """ Schedule the scripts that need to be run at scheduled intervals """
 
     def __init__(self, conf):
+        self.log = logging.getLogger(__name__)
+        logging.basicConfig()
+        self.log.setLevel(logging.INFO)
+
         self.conf = conf
         self.scheduler = BackgroundScheduler()
         self.add_jobs()
         self.run()
 
     def run(self):
+        """ Start the scheduler but stop on KeyboardInterrupts and such"""
         try:
             self.scheduler.start()
         except (KeyboardInterrupt, SystemExit):
-            self.shutdown()
+            self.scheduler.shutdown()
 
     def add_jobs(self):
-        args = {}
-        if 'MAX_POSTGRES_LISTEN_HISTORY' in self.conf:
-            args['max_days'] = int(self.conf['MAX_POSTGRES_LISTEN_HISTORY'])
-
-        self.scheduler.add_job(self._clean_postgres, 'interval', hours=24, \
-                kwargs=args)
-
-    def _clean_postgres(self, max_days=90):
-        """ Clean all the listens that are older than a set no of days
-            Default: 90 days
-        """
-
-        # If max days is set to a negative number, don't throw anything out
-        if max_days < 0:
-            return
-
-        seconds = max_days*24*3600
-        engine = create_engine(self.conf['SQLALCHEMY_DATABASE_URI'], poolclass=NullPool)
-        connection = engine.connect()
-        # query = """
-        #          WITH max_table as (
-        #              SELECT user_id, max(extract(epoch from ts)) - %s as mx
-        #              FROM listens
-        #              GROUP BY user_id
-        #          )
-        #          DELETE FROM listens
-        #          WHERE extract(epoch from ts) < (SELECT mx
-        #                                          FROM max_table
-        #                                          WHERE max_table.user_id = listens.user_id)
-        #         RETURNING *
-        #         """
-
-        query = """
-                DELETE FROM listen
-                WHERE id in (
-                    SELECT id FROM listen
-                    JOIN (
-                        SELECT user_id, extract(epoch from max(ts)) as max
-                        FROM listens
-                        GROUP BY user_id
-                    ) max_table on listens.user_id = max_table.user_id AND extract(epoch from listens.ts) <= max_table.max - %s
-                ) RETURNING *
-                """
-
-        deleted = connection.execute(query % (seconds))
-        log = logging.getLogger(__name__)
-        log.info('(Scheduled Job) CleanPostgres: ' + str(len(deleted.fetchall())) + " records deleted successfully")
-        connection.close()
-
-    def shutdown(self):
-        self.scheduler.shutdown()
+        """ Add the jobs that need to be run to the scheduler """
+        pass

From c3404f3b65e738d23070acd968c8b6f8b622c54d Mon Sep 17 00:00:00 2001
From: Param Singh <paramsingh258@gmail.com>
Date: Wed, 2 Aug 2017 22:25:04 +0530
Subject: [PATCH 02/20] Add function for getting recently logged in users

---
 listenbrainz/db/tests/test_user.py | 38 ++++++++++++++++++++++++++----
 listenbrainz/db/user.py            | 22 ++++++++++++++---
 2 files changed, 53 insertions(+), 7 deletions(-)

diff --git a/listenbrainz/db/tests/test_user.py b/listenbrainz/db/tests/test_user.py
index 0664d71a2c..df295e1678 100644
--- a/listenbrainz/db/tests/test_user.py
+++ b/listenbrainz/db/tests/test_user.py
@@ -1,10 +1,9 @@
 # -*- coding: utf-8 -*-
-from listenbrainz.db.testing import DatabaseTestCase
-import listenbrainz.db.user as db_user
-from listenbrainz import db
 import time
 import sqlalchemy
-
+from listenbrainz import db
+from listenbrainz.db.testing import DatabaseTestCase
+import listenbrainz.db.user as db_user
 
 class UserTestCase(DatabaseTestCase):
 
@@ -59,3 +58,34 @@ def test_update_latest_import(self):
         db_user.update_latest_import(user['musicbrainz_id'], val)
         user = db_user.get_by_mb_id(user['musicbrainz_id'])
         self.assertEqual(val, int(user['latest_import'].strftime('%s')))
+
+    def test_get_recently_logged_in_users(self):
+        """Tests getting recently logged in users"""
+
+        # create two users, set one's last_login
+        # to a very old value and one's last_login
+        # to now and then call get_recently_logged_in_users
+        user1 = db_user.get_or_create('recentuser1')
+        with db.engine.connect() as connection:
+            connection.execute(sqlalchemy.text("""
+                UPDATE "user"
+                   SET last_login = to_timestamp(0)
+                 WHERE musicbrainz_id = :musicbrainz_id
+                """), {
+                    'musicbrainz_id': 'recentuser1'
+                })
+
+        user2 = db_user.get_or_create('recentuser2')
+        with db.engine.connect() as connection:
+            connection.execute(sqlalchemy.text("""
+                UPDATE "user"
+                   SET last_login = NOW()
+                 WHERE musicbrainz_id = :musicbrainz_id
+                """), {
+                    'musicbrainz_id': 'recentuser2'
+                })
+
+        recent_users = db_user.get_recently_logged_in_users()
+        self.assertEqual(len(recent_users), 1)
+        self.assertEqual(recent_users[0]['musicbrainz_id'], 'recentuser2')
+
diff --git a/listenbrainz/db/user.py b/listenbrainz/db/user.py
index 4648e8c3fa..57cfd5838b 100644
--- a/listenbrainz/db/user.py
+++ b/listenbrainz/db/user.py
@@ -1,10 +1,10 @@
-
-from listenbrainz import db
 import uuid
 import sqlalchemy
-from listenbrainz.db.exceptions import DatabaseException
 import logging
 import time
+from listenbrainz import db
+from listenbrainz.db.exceptions import DatabaseException
+from listenbrainz import config
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -213,3 +213,19 @@ def update_latest_import(musicbrainz_id, ts):
             except sqlalchemy.exc.ProgrammingError as e:
                 logger.error(e)
                 raise DatabaseException
+
+
+def get_recently_logged_in_users():
+    """Returns a list of users who have logged-in in the last X days, X is
+       defined in the config.
+    """
+    with db.engine.connect() as connection:
+        result = connection.execute(sqlalchemy.text("""
+                SELECT {columns}
+                  FROM "user"
+                 WHERE last_login >= NOW() - INTERVAL ':x days'
+                """.format(columns=','.join(USER_GET_COLUMNS))), {
+                    #TODO(param): think about the name of this variable in config
+                    'x': config.STATS_CALCULATE_LOGIN
+                })
+        return [dict(row) for row in result]

From cade177f172aa8c524ed9ab72255ee5463972195 Mon Sep 17 00:00:00 2001
From: Param Singh <paramsingh258@gmail.com>
Date: Thu, 3 Aug 2017 22:56:42 +0530
Subject: [PATCH 03/20] Add stats calculation variable to config.py.sample

---
 listenbrainz/config.py.sample | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/listenbrainz/config.py.sample b/listenbrainz/config.py.sample
index c3391ca29a..dc7705a101 100644
--- a/listenbrainz/config.py.sample
+++ b/listenbrainz/config.py.sample
@@ -55,6 +55,10 @@ BIGQUERY_TABLE_ID = "listen"
 # Stats
 STATS_ENTITY_LIMIT = 100 # the number of entities to calculate at max with BQ
 
+# Stats
+STATS_CALCULATE_LOGIN = 7
+
+
 # Max time in seconds after which the playing_now stream will expire.
 PLAYING_NOW_MAX_DURATION = 10 * 60
 

From 6a4ff2d03a9173c231c630e67dd3ef58ecc1245f Mon Sep 17 00:00:00 2001
From: Param Singh <paramsingh258@gmail.com>
Date: Thu, 3 Aug 2017 23:36:30 +0530
Subject: [PATCH 04/20] Write code for calculation of user stats

---
 listenbrainz/stats/calculate.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 listenbrainz/stats/calculate.py

diff --git a/listenbrainz/stats/calculate.py b/listenbrainz/stats/calculate.py
new file mode 100644
index 0000000000..c48ed0e9c2
--- /dev/null
+++ b/listenbrainz/stats/calculate.py
@@ -0,0 +1,21 @@
+import listenbrainz.stats.user as stats_user
+import listenbrainz.db.user as db_user
+import listenbrainz.db.stats as db_stats
+
+
+def calculate_user_stats():
+    for user in get_recently_logged_in_users():
+        recordings = stats_user.get_top_recordings(musicbrainz_id=user['musicbrainz_id'])
+        artists    = stats_user.get_top_artists(musicbrainz_id=user['musicbrainz_id'])
+        releases   = stats_user.get_top_releases(musicbrainz_id=user['musicbrainz_id'])
+
+        db_stats.insert_user_stats(
+            user_id=user['id'],
+            artists=artists,
+            recordings=recordings,
+            releases=releases,
+        )
+
+def calculate():
+    pass
+

From fe850d30f76c194d991db7d6c2f17fd13eb912c6 Mon Sep 17 00:00:00 2001
From: Param Singh <paramsingh258@gmail.com>
Date: Thu, 3 Aug 2017 23:42:35 +0530
Subject: [PATCH 05/20] Make last_updated columns in stats tables NOT NULL

---
 admin/sql/create_tables.sql                      |  8 ++++----
 .../2017-08-03-make-stats-updated-not-null.sql   | 16 ++++++++++++++++
 2 files changed, 20 insertions(+), 4 deletions(-)
 create mode 100644 admin/sql/updates/2017-08-03-make-stats-updated-not-null.sql

diff --git a/admin/sql/create_tables.sql b/admin/sql/create_tables.sql
index 70efd3e207..4e318efaaa 100644
--- a/admin/sql/create_tables.sql
+++ b/admin/sql/create_tables.sql
@@ -49,7 +49,7 @@ CREATE TABLE statistics.user (
     artists                 JSONB,
     releases                JSONB,
     recordings              JSONB,
-    last_updated            TIMESTAMP WITH TIME ZONE
+    last_updated            TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
 );
 
 CREATE TABLE statistics.artist (
@@ -60,7 +60,7 @@ CREATE TABLE statistics.artist (
     recordings              JSONB,
     users                   JSONB,
     listen_count            JSONB,
-    last_updated            TIMESTAMP WITH TIME ZONE
+    last_updated            TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
 );
 ALTER TABLE statistics.artist ADD CONSTRAINT artist_stats_msid_uniq UNIQUE (msid);
 
@@ -71,7 +71,7 @@ CREATE TABLE statistics.release (
     recordings              JSONB,
     users                   JSONB,
     listen_count            JSONB,
-    last_updated            TIMESTAMP WITH TIME ZONE
+    last_updated            TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
 );
 ALTER TABLE statistics.release ADD CONSTRAINT release_stats_msid_uniq UNIQUE (msid);
 
@@ -81,7 +81,7 @@ CREATE TABLE statistics.recording (
     name                    VARCHAR,
     users_all_time          JSONB,
     listen_count            JSONB,
-    last_updated            TIMESTAMP WITH TIME ZONE
+    last_updated            TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
 
 );
 ALTER TABLE statistics.recording ADD CONSTRAINT recording_stats_msid_uniq UNIQUE (msid);
diff --git a/admin/sql/updates/2017-08-03-make-stats-updated-not-null.sql b/admin/sql/updates/2017-08-03-make-stats-updated-not-null.sql
new file mode 100644
index 0000000000..3a345bdba4
--- /dev/null
+++ b/admin/sql/updates/2017-08-03-make-stats-updated-not-null.sql
@@ -0,0 +1,16 @@
+BEGIN;
+
+ALTER TABLE statistics.user ALTER COLUMN last_updated SET NOT NULL;
+ALTER TABLE statistics.user ALTER COLUMN last_updated SET DEFAULT NOW();
+
+ALTER TABLE statistics.artist ALTER COLUMN last_updated SET NOT NULL;
+ALTER TABLE statistics.artist ALTER COLUMN last_updated SET DEFAULT NOW();
+
+
+ALTER TABLE statistics.release ALTER COLUMN last_updated SET NOT NULL;
+ALTER TABLE statistics.release ALTER COLUMN last_updated SET DEFAULT NOW();
+
+ALTER TABLE statistics.recording ALTER COLUMN last_updated SET NOT NULL;
+ALTER TABLE statistics.recording ALTER COLUMN last_updated SET DEFAULT NOW();
+
+COMMIT;

From 3b4606cd835d66ceb9ffbb2ba9db49340a890314 Mon Sep 17 00:00:00 2001
From: Param Singh <paramsingh258@gmail.com>
Date: Fri, 4 Aug 2017 00:41:08 +0530
Subject: [PATCH 06/20] Comment to think about

---
 admin/sql/updates/2017-08-03-make-stats-updated-not-null.sql | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/admin/sql/updates/2017-08-03-make-stats-updated-not-null.sql b/admin/sql/updates/2017-08-03-make-stats-updated-not-null.sql
index 3a345bdba4..2d82281a16 100644
--- a/admin/sql/updates/2017-08-03-make-stats-updated-not-null.sql
+++ b/admin/sql/updates/2017-08-03-make-stats-updated-not-null.sql
@@ -1,5 +1,8 @@
 BEGIN;
 
+-- XXX(param): What should values that are null already be set to here now?
+-- timestamp 0 or NOW() ?
+
 ALTER TABLE statistics.user ALTER COLUMN last_updated SET NOT NULL;
 ALTER TABLE statistics.user ALTER COLUMN last_updated SET DEFAULT NOW();
 

From 8b6f3ec92cd860dab1dc9d810f733ee768a32438 Mon Sep 17 00:00:00 2001
From: Param Singh <paramsingh258@gmail.com>
Date: Fri, 4 Aug 2017 00:41:41 +0530
Subject: [PATCH 07/20] Add code for inserting calculated stats into db

---
 listenbrainz/db/stats.py                      | 47 +++++++++++++++++++
 listenbrainz/db/tests/test_stats.py           | 47 +++++++++++++++++++
 listenbrainz/testdata/user_top_artists.json   | 14 ++++++
 .../testdata/user_top_recordings.json         | 14 ++++++
 listenbrainz/testdata/user_top_releases.json  | 14 ++++++
 5 files changed, 136 insertions(+)
 create mode 100644 listenbrainz/db/stats.py
 create mode 100644 listenbrainz/db/tests/test_stats.py
 create mode 100644 listenbrainz/testdata/user_top_artists.json
 create mode 100644 listenbrainz/testdata/user_top_recordings.json
 create mode 100644 listenbrainz/testdata/user_top_releases.json

diff --git a/listenbrainz/db/stats.py b/listenbrainz/db/stats.py
new file mode 100644
index 0000000000..5b9df1f2b1
--- /dev/null
+++ b/listenbrainz/db/stats.py
@@ -0,0 +1,47 @@
+"""This module contains functions to insert and retrieve statistics
+   calculated from Google BigQuery into the database.
+"""
+
+import sqlalchemy
+import ujson
+from listenbrainz import db
+
+
+# XXX(param): think about the names of these variables
+# should they be artist_stats and so on instead?
+# Note: names are used in tests and stats/calculate.py also
+
+def insert_user_stats(user_id, artists, recordings, releases):
+    # XXX(param): should this name be upsert_user_stats?
+
+    with db.engine.connect() as connection:
+        connection.execute(sqlalchemy.text("""
+            INSERT INTO statistics.user (user_id, artists, recordings, releases)
+                 VALUES (:user_id, :artists, :recordings, :releases)
+            ON CONFLICT (user_id)
+          DO UPDATE SET artists = :artists,
+                        recordings = :recordings,
+                        releases = :releases,
+                        last_updated = NOW()
+            """), {
+                'user_id': user_id,
+                'artists': ujson.dumps(artists),
+                'recordings': ujson.dumps(recordings),
+                'releases': ujson.dumps(releases)
+            }
+        )
+
+
+def get_user_stats(user_id):
+
+    with db.engine.connect() as connection:
+        result = connection.execute(sqlalchemy.text("""
+            SELECT user_id, artists, releases, recordings
+              FROM statistics.user
+             WHERE user_id = :user_id
+            """), {
+                'user_id': user_id
+            }
+        )
+        row = result.fetchone()
+        return dict(row) if row else None
diff --git a/listenbrainz/db/tests/test_stats.py b/listenbrainz/db/tests/test_stats.py
new file mode 100644
index 0000000000..a97c54f52f
--- /dev/null
+++ b/listenbrainz/db/tests/test_stats.py
@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+import time
+import uuid
+import json
+import os
+import listenbrainz.db.user as db_user
+import listenbrainz.db.stats as db_stats
+from listenbrainz.db.testing import DatabaseTestCase
+
+
+class StatsDatabaseTestCase(DatabaseTestCase):
+
+    TEST_DATA_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..', 'testdata')
+
+    def setUp(self):
+        DatabaseTestCase.setUp(self)
+        self.user = db_user.get_or_create('stats_user')
+
+    def path_to_data_file(self, filename):
+        # XXX(param): we have this function in IntegrationTestCase also,
+        # maybe find some way to share it
+        # ListenBrainzTestCase?
+       return os.path.join(StatsDatabaseTestCase.TEST_DATA_PATH, filename)
+
+    def test_insert_user_stats(self):
+
+        with open(self.path_to_data_file('user_top_artists.json')) as f:
+            artists = json.load(f)
+        with open(self.path_to_data_file('user_top_releases.json')) as f:
+            releases = json.load(f)
+        with open(self.path_to_data_file('user_top_recordings.json')) as f:
+            recordings = json.load(f)
+
+
+        db_stats.insert_user_stats(
+            user_id=self.user['id'],
+            artists=artists,
+            recordings=recordings,
+            releases=releases
+        )
+
+        # TODO(param): test the last_updated values too
+        result = db_stats.get_user_stats(user_id=self.user['id'])
+        self.assertDictEqual(result['artists'], artists)
+        self.assertDictEqual(result['releases'], releases)
+        self.assertDictEqual(result['recordings'], recordings)
+
diff --git a/listenbrainz/testdata/user_top_artists.json b/listenbrainz/testdata/user_top_artists.json
new file mode 100644
index 0000000000..6dfdc2e4f5
--- /dev/null
+++ b/listenbrainz/testdata/user_top_artists.json
@@ -0,0 +1,14 @@
+{
+    "all_time": [
+        {
+            "listen_count": 230,
+            "artist_msid": "ed6c388e-ea65-431f-8be5-4959239d8c65",
+            "name": "Kanye West"
+        },
+        {
+            "listen_count": 229,
+            "artist_msid": "80ca06c7-07fb-4b3a-a315-ad584cc8eeb0",
+            "name": "Frank Ocean"
+        }
+    ]
+}
diff --git a/listenbrainz/testdata/user_top_recordings.json b/listenbrainz/testdata/user_top_recordings.json
new file mode 100644
index 0000000000..2101d15058
--- /dev/null
+++ b/listenbrainz/testdata/user_top_recordings.json
@@ -0,0 +1,14 @@
+{
+    "all_time": [
+        {
+            "listen_count": 230,
+            "artist_msid": "59f4d2c6-ca76-4dc7-80de-c3fef6b51211",
+            "name": "Fade"
+        },
+        {
+            "listen_count": 229,
+            "artist_msid": "c2375e2e-e98a-4c6b-a139-3aae0a831ebf",
+            "name": "Nikes"
+        }
+    ]
+}
diff --git a/listenbrainz/testdata/user_top_releases.json b/listenbrainz/testdata/user_top_releases.json
new file mode 100644
index 0000000000..c0306d6282
--- /dev/null
+++ b/listenbrainz/testdata/user_top_releases.json
@@ -0,0 +1,14 @@
+{
+    "all_time": [
+        {
+            "listen_count": 230,
+            "artist_msid": "19eadc02-1f64-48fe-bb4c-33732b96f7b1",
+            "name": "The Life Of Pablo"
+        },
+        {
+            "listen_count": 229,
+            "artist_msid": "155fa8f8-ef04-471d-ab14-dd21838338d7",
+            "name": "Blonde"
+        }
+    ]
+}

From 0a9a3f438b60ece5b89d6bf0a6ac30c8ec06d9b1 Mon Sep 17 00:00:00 2001
From: Param Singh <paramsingh258@gmail.com>
Date: Fri, 4 Aug 2017 01:18:33 +0530
Subject: [PATCH 08/20] First cut of adding jobs to scheduler

---
 listenbrainz/config.py.sample       | 3 ++-
 listenbrainz/stats/calculate.py     | 4 ++--
 listenbrainz/webserver/scheduler.py | 7 ++++---
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/listenbrainz/config.py.sample b/listenbrainz/config.py.sample
index dc7705a101..aa5f18e030 100644
--- a/listenbrainz/config.py.sample
+++ b/listenbrainz/config.py.sample
@@ -56,7 +56,8 @@ BIGQUERY_TABLE_ID = "listen"
 STATS_ENTITY_LIMIT = 100 # the number of entities to calculate at max with BQ
 
 # Stats
-STATS_CALCULATE_LOGIN = 7
+STATS_CALCULATE_LOGIN = 30
+STATS_CALCULATION_INTERVAL = 7
 
 
 # Max time in seconds after which the playing_now stream will expire.
diff --git a/listenbrainz/stats/calculate.py b/listenbrainz/stats/calculate.py
index c48ed0e9c2..9dd4059895 100644
--- a/listenbrainz/stats/calculate.py
+++ b/listenbrainz/stats/calculate.py
@@ -16,6 +16,6 @@ def calculate_user_stats():
             releases=releases,
         )
 
-def calculate():
-    pass
+def calculate_stats():
+    calculate_user_stats()
 
diff --git a/listenbrainz/webserver/scheduler.py b/listenbrainz/webserver/scheduler.py
index 0f78333657..c2f43af4f1 100644
--- a/listenbrainz/webserver/scheduler.py
+++ b/listenbrainz/webserver/scheduler.py
@@ -3,9 +3,10 @@
 """
 import logging
 from apscheduler.schedulers.background import BackgroundScheduler
+from listenbrainz.stats.calculate import calculate_stats
 
 class ScheduledJobs():
-    """ Schedule the scripts that need to be run at scheduled intervals """
+    """Schedule the scripts that need to be run at regular intervals """
 
     def __init__(self, conf):
         self.log = logging.getLogger(__name__)
@@ -25,5 +26,5 @@ def run(self):
             self.scheduler.shutdown()
 
     def add_jobs(self):
-        """ Add the jobs that need to be run to the scheduler """
-        pass
+        """Add the jobs that need to be run to the scheduler"""
+        self.scheduler.add_job(calculate_stats, 'interval', days=conf['STATS_CALCULATION_INTERVAL'])

From 12528243d0d4392504d0143bb6c96fb19d6de820 Mon Sep 17 00:00:00 2001
From: Param Singh <paramsingh258@gmail.com>
Date: Tue, 8 Aug 2017 13:46:57 +0530
Subject: [PATCH 09/20] Add a script that can be run to manually calculate
 stats

Also fix a couple bugs in the code
---
 listenbrainz/stats/calculate.py     | 14 +++++++++++++-
 listenbrainz/webserver/scheduler.py |  2 +-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/listenbrainz/stats/calculate.py b/listenbrainz/stats/calculate.py
index 9dd4059895..b4d9e6c428 100644
--- a/listenbrainz/stats/calculate.py
+++ b/listenbrainz/stats/calculate.py
@@ -1,10 +1,13 @@
 import listenbrainz.stats.user as stats_user
 import listenbrainz.db.user as db_user
 import listenbrainz.db.stats as db_stats
+from listenbrainz import db
+from listenbrainz import config
+from listenbrainz import stats
 
 
 def calculate_user_stats():
-    for user in get_recently_logged_in_users():
+    for user in db_user.get_recently_logged_in_users():
         recordings = stats_user.get_top_recordings(musicbrainz_id=user['musicbrainz_id'])
         artists    = stats_user.get_top_artists(musicbrainz_id=user['musicbrainz_id'])
         releases   = stats_user.get_top_releases(musicbrainz_id=user['musicbrainz_id'])
@@ -19,3 +22,12 @@ def calculate_user_stats():
 def calculate_stats():
     calculate_user_stats()
 
+if __name__ == '__main__':
+    print('Connecting to Google BigQuery...')
+    stats.init_bigquery_connection()
+    print('Connecting to database...')
+    db.init_db_connection(config.SQLALCHEMY_DATABASE_URI)
+    print('Connected!')
+    print('Calculating statistics using Google BigQuery...')
+    calculate_stats()
+    print('Calculations done!')
diff --git a/listenbrainz/webserver/scheduler.py b/listenbrainz/webserver/scheduler.py
index c2f43af4f1..dc592229b9 100644
--- a/listenbrainz/webserver/scheduler.py
+++ b/listenbrainz/webserver/scheduler.py
@@ -27,4 +27,4 @@ def run(self):
 
     def add_jobs(self):
         """Add the jobs that need to be run to the scheduler"""
-        self.scheduler.add_job(calculate_stats, 'interval', days=conf['STATS_CALCULATION_INTERVAL'])
+        self.scheduler.add_job(calculate_stats, 'interval', days=self.conf['STATS_CALCULATION_INTERVAL'])

From f91488f7661e6ee266a3873acb9af02938913c9d Mon Sep 17 00:00:00 2001
From: Param Singh <paramsingh258@gmail.com>
Date: Tue, 8 Aug 2017 22:46:46 +0530
Subject: [PATCH 10/20] Show artist count on user page

Ignore the styling for now, this is more of a proof-of-concept.
---
 listenbrainz/webserver/templates/user/profile.html | 2 ++
 listenbrainz/webserver/views/user.py               | 5 ++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/listenbrainz/webserver/templates/user/profile.html b/listenbrainz/webserver/templates/user/profile.html
index 9e5fd0ee62..8c9ccb0bdd 100644
--- a/listenbrainz/webserver/templates/user/profile.html
+++ b/listenbrainz/webserver/templates/user/profile.html
@@ -17,6 +17,8 @@ <h3> Listen Count: {{ listen_count }}  </h3>
       We were not able to get listen count for {{ user.musicbrainz_id }} due to an error. Please reload to try again.
     {% endif %}
 
+	<h3> Artist Count: {{ artist_count }} </h3>
+
     <h3>Recent listens</h3>
 
     {% if not listens %}
diff --git a/listenbrainz/webserver/views/user.py b/listenbrainz/webserver/views/user.py
index a7f623494d..0a859f727a 100644
--- a/listenbrainz/webserver/views/user.py
+++ b/listenbrainz/webserver/views/user.py
@@ -1,4 +1,3 @@
-
 from flask import Blueprint, render_template, request, url_for, Response, redirect, flash, current_app, jsonify
 from flask_login import current_user, login_required
 from werkzeug.exceptions import NotFound, BadRequest, RequestEntityTooLarge, InternalServerError
@@ -9,6 +8,7 @@
 from listenbrainz import webserver
 from listenbrainz.webserver import flash
 import listenbrainz.db.user as db_user
+import listenbrainz.db.stats as db_stats
 import listenbrainz.config as config
 from listenbrainz.db.exceptions import DatabaseException
 from flask import make_response
@@ -152,6 +152,8 @@ def profile(user_name):
             }
             listens.insert(0, listen)
 
+    user_stats = db_stats.get_user_stats(user.id)
+
     return render_template(
         "user/profile.html",
         user=user,
@@ -161,6 +163,7 @@ def profile(user_name):
         spotify_uri=_get_spotify_uri_for_listens(listens),
         have_listen_count=have_listen_count,
         listen_count=format(int(listen_count), ",d"),
+        artist_count=format(len(user_stats.get("artists", [])), ",d")
     )
 
 

From addea60fb5dd43fc831d5ec5ee45d15ae6bc866d Mon Sep 17 00:00:00 2001
From: Param Singh <paramsingh258@gmail.com>
Date: Thu, 17 Aug 2017 16:56:26 +0530
Subject: [PATCH 11/20] Remove extra line that came from rebase

---
 listenbrainz/config.py.sample | 2 --
 1 file changed, 2 deletions(-)

diff --git a/listenbrainz/config.py.sample b/listenbrainz/config.py.sample
index aa5f18e030..0675af5abc 100644
--- a/listenbrainz/config.py.sample
+++ b/listenbrainz/config.py.sample
@@ -54,8 +54,6 @@ BIGQUERY_TABLE_ID = "listen"
 
 # Stats
 STATS_ENTITY_LIMIT = 100 # the number of entities to calculate at max with BQ
-
-# Stats
 STATS_CALCULATE_LOGIN = 30
 STATS_CALCULATION_INTERVAL = 7
 

From b9e6120692cc688b3d8e1867918911350893a66e Mon Sep 17 00:00:00 2001
From: Param Singh <paramsingh258@gmail.com>
Date: Fri, 18 Aug 2017 01:06:43 +0530
Subject: [PATCH 12/20] Add different stat function for getting artist_count

---
 listenbrainz/db/stats.py             | 14 +++++++---
 listenbrainz/db/tests/test_stats.py  |  6 +++--
 listenbrainz/stats/calculate.py      |  2 ++
 listenbrainz/stats/user.py           | 40 +++++++++++++++++++++++++++-
 listenbrainz/webserver/views/user.py |  2 ++
 5 files changed, 58 insertions(+), 6 deletions(-)

diff --git a/listenbrainz/db/stats.py b/listenbrainz/db/stats.py
index 5b9df1f2b1..8f591e8056 100644
--- a/listenbrainz/db/stats.py
+++ b/listenbrainz/db/stats.py
@@ -7,13 +7,21 @@
 from listenbrainz import db
 
 
-# XXX(param): think about the names of these variables
+# XXX(param): think about the names of these stats variables
 # should they be artist_stats and so on instead?
 # Note: names are used in tests and stats/calculate.py also
 
-def insert_user_stats(user_id, artists, recordings, releases):
+def insert_user_stats(user_id, artists, recordings, releases, artist_count):
     # XXX(param): should this name be upsert_user_stats?
 
+    # put all artist stats into one dict which will then be inserted
+    # into the artist column of the stats.user table
+    # XXX(param): This makes the schema of the stats very variable though.
+    artist_stats = {
+        'count': artist_count,
+        'all_time': artists
+    }
+
     with db.engine.connect() as connection:
         connection.execute(sqlalchemy.text("""
             INSERT INTO statistics.user (user_id, artists, recordings, releases)
@@ -25,7 +33,7 @@ def insert_user_stats(user_id, artists, recordings, releases):
                         last_updated = NOW()
             """), {
                 'user_id': user_id,
-                'artists': ujson.dumps(artists),
+                'artists': ujson.dumps(artist_stats),
                 'recordings': ujson.dumps(recordings),
                 'releases': ujson.dumps(releases)
             }
diff --git a/listenbrainz/db/tests/test_stats.py b/listenbrainz/db/tests/test_stats.py
index a97c54f52f..e9532f5501 100644
--- a/listenbrainz/db/tests/test_stats.py
+++ b/listenbrainz/db/tests/test_stats.py
@@ -36,12 +36,14 @@ def test_insert_user_stats(self):
             user_id=self.user['id'],
             artists=artists,
             recordings=recordings,
-            releases=releases
+            releases=releases,
+            artist_count=2,
         )
 
         # TODO(param): test the last_updated values too
         result = db_stats.get_user_stats(user_id=self.user['id'])
-        self.assertDictEqual(result['artists'], artists)
+        self.assertDictEqual(result['artists']['all_time'], artists)
+        self.assertEqual(result['artists']['count'], 2)
         self.assertDictEqual(result['releases'], releases)
         self.assertDictEqual(result['recordings'], recordings)
 
diff --git a/listenbrainz/stats/calculate.py b/listenbrainz/stats/calculate.py
index b4d9e6c428..094e7a8003 100644
--- a/listenbrainz/stats/calculate.py
+++ b/listenbrainz/stats/calculate.py
@@ -11,12 +11,14 @@ def calculate_user_stats():
         recordings = stats_user.get_top_recordings(musicbrainz_id=user['musicbrainz_id'])
         artists    = stats_user.get_top_artists(musicbrainz_id=user['musicbrainz_id'])
         releases   = stats_user.get_top_releases(musicbrainz_id=user['musicbrainz_id'])
+        artist_count = stats_user.get_artist_count(musicbrainz_id=user['musicbrainz_id'])
 
         db_stats.insert_user_stats(
             user_id=user['id'],
             artists=artists,
             recordings=recordings,
             releases=releases,
+            artist_count=artist_count
         )
 
 def calculate_stats():
diff --git a/listenbrainz/stats/user.py b/listenbrainz/stats/user.py
index 3ef04acc7b..a2e9619c7a 100644
--- a/listenbrainz/stats/user.py
+++ b/listenbrainz/stats/user.py
@@ -133,11 +133,49 @@ def get_top_artists(musicbrainz_id, time_interval=None):
 
     return stats.run_query(query, parameters)
 
+
+def get_artist_count(musicbrainz_id, time_interval=None):
+    """ Get artist count for user with given MusicBrainz ID over a particular period of time 
+        
+        Args: musicbrainz_id (str): the MusicBrainz ID of the user
+              time_interval  (str): the time interval over which artist count should be returned
+                                    (defaults to all time)
+
+        Returns: artist_count (int): total number of artists listened to by the user in that
+                                     period of time
+    """
+
+    filter_clause = ""
+    if time_interval:
+        filter_clause = "AND listened_at >= TIMESTAMP_SUB(CURRENT_TIME(), INTERVAL {})".format(time_interval)
+
+    query = """SELECT COUNT(DISTINCT(artist_msid)) as artist_count
+                 FROM {dataset_id}.{table_id}
+                WHERE user_name = @musicbrainz_id
+                {time_filter_clause}
+            """.format(
+                    dataset_id=config.BIGQUERY_DATASET_ID,
+                    table_id=config.BIGQUERY_TABLE_ID,
+                    time_filter_clause=filter_clause,
+                )
+
+    parameters = [
+        {
+            'name': 'musicbrainz_id',
+            'type': 'STRING',
+            'value': musicbrainz_id,
+        }
+    ]
+
+    return stats.run_query(query, parameters)['listen_count']
+
+
+
 def get_top_releases(musicbrainz_id, time_interval=None):
     """ Get top releases for user with given MusicBrainz ID over a particular period of time
 
         Args: musicbrainz_id (str): the MusicBrainz ID of the user
-              time_interval  (str): the time interval over which top artists should be returned
+              time_interval  (str): the time interval over which top releases should be returned
                                     (defaults to all time)
 
         Returns: A sorted list of dicts with the following structure
diff --git a/listenbrainz/webserver/views/user.py b/listenbrainz/webserver/views/user.py
index 0a859f727a..a7eea8b000 100644
--- a/listenbrainz/webserver/views/user.py
+++ b/listenbrainz/webserver/views/user.py
@@ -153,6 +153,8 @@ def profile(user_name):
             listens.insert(0, listen)
 
     user_stats = db_stats.get_user_stats(user.id)
+    if not user_stats:
+        user_stats = {}
 
     return render_template(
         "user/profile.html",

From 7362d80ad18175cfe420ad59415e9363d777c61d Mon Sep 17 00:00:00 2001
From: Param Singh <paramsingh258@gmail.com>
Date: Tue, 22 Aug 2017 14:34:58 +0530
Subject: [PATCH 13/20] Set NULL values of last_updated in stat tables to 0

---
 .../sql/updates/2017-08-03-make-stats-updated-not-null.sql  | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/admin/sql/updates/2017-08-03-make-stats-updated-not-null.sql b/admin/sql/updates/2017-08-03-make-stats-updated-not-null.sql
index 2d82281a16..9a1b30c8b8 100644
--- a/admin/sql/updates/2017-08-03-make-stats-updated-not-null.sql
+++ b/admin/sql/updates/2017-08-03-make-stats-updated-not-null.sql
@@ -1,7 +1,9 @@
 BEGIN;
 
--- XXX(param): What should values that are null already be set to here now?
--- timestamp 0 or NOW() ?
+UPDATE statistics.user SET last_updated = to_timestamp(0) WHERE last_updated IS NULL;
+UPDATE statistics.artist SET last_updated = to_timestamp(0) WHERE last_updated IS NULL;
+UPDATE statistics.release SET last_updated = to_timestamp(0) WHERE last_updated IS NULL;
+UPDATE statistics.recording SET last_updated = to_timestamp(0) WHERE last_updated IS NULL;
 
 ALTER TABLE statistics.user ALTER COLUMN last_updated SET NOT NULL;
 ALTER TABLE statistics.user ALTER COLUMN last_updated SET DEFAULT NOW();

From 5b6ff06f6c3dcf339a00390a32a0d7767b4a1012 Mon Sep 17 00:00:00 2001
From: Param Singh <paramsingh258@gmail.com>
Date: Wed, 23 Aug 2017 00:38:34 +0530
Subject: [PATCH 14/20] Show stats in a table on user page

---
 listenbrainz/stats/user.py                    |  2 +-
 .../webserver/templates/user/profile.html     | 28 +++++++++++++++----
 listenbrainz/webserver/views/user.py          |  2 +-
 3 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/listenbrainz/stats/user.py b/listenbrainz/stats/user.py
index a2e9619c7a..f575560373 100644
--- a/listenbrainz/stats/user.py
+++ b/listenbrainz/stats/user.py
@@ -167,7 +167,7 @@ def get_artist_count(musicbrainz_id, time_interval=None):
         }
     ]
 
-    return stats.run_query(query, parameters)['listen_count']
+    return stats.run_query(query, parameters)[0]['artist_count']
 
 
 
diff --git a/listenbrainz/webserver/templates/user/profile.html b/listenbrainz/webserver/templates/user/profile.html
index 8c9ccb0bdd..78f110945d 100644
--- a/listenbrainz/webserver/templates/user/profile.html
+++ b/listenbrainz/webserver/templates/user/profile.html
@@ -11,13 +11,29 @@ <h2 class="page-title">{{ user.musicbrainz_id }}</h2>
       <b><a href="https://musicbrainz.org/user/{{ user.musicbrainz_id }}">See profile on MusicBrainz</a></b>
     </div>
 
-    {% if have_listen_count %}
-      <h3> Listen Count: {{ listen_count }}  </h3>
-    {% else %}
-      We were not able to get listen count for {{ user.musicbrainz_id }} due to an error. Please reload to try again.
-    {% endif %}
 
-	<h3> Artist Count: {{ artist_count }} </h3>
+	<h3> Statistics </h3>
+
+	<div class="row">
+	  <div class="col-md-8">
+		<table class="table table-border table-condensed table-striped">
+	      <tbody>
+		  {% if listen_count %}
+			<tr>
+			  <td>Listen count</td>
+			  <td>{{ listen_count }}</td>
+			</tr>
+		  {% endif %}
+
+		  {% if artist_count %}
+			<tr>
+			  <td>Artist count</td>
+			  <td>{{ artist_count }}</td>
+			</tr>
+		  {% endif %}
+		</table>
+	  </div>
+	</div>
 
     <h3>Recent listens</h3>
 
diff --git a/listenbrainz/webserver/views/user.py b/listenbrainz/webserver/views/user.py
index a7eea8b000..000748d3f4 100644
--- a/listenbrainz/webserver/views/user.py
+++ b/listenbrainz/webserver/views/user.py
@@ -165,7 +165,7 @@ def profile(user_name):
         spotify_uri=_get_spotify_uri_for_listens(listens),
         have_listen_count=have_listen_count,
         listen_count=format(int(listen_count), ",d"),
-        artist_count=format(len(user_stats.get("artists", [])), ",d")
+        artist_count=format(int(user_stats['artists']['count']), ",d")
     )
 
 

From f6eacfda2402c489b4527f4388abe7efbf3bb260 Mon Sep 17 00:00:00 2001
From: Param Singh <paramsingh258@gmail.com>
Date: Wed, 23 Aug 2017 01:06:30 +0530
Subject: [PATCH 15/20] Fix error if stats are not calculated for user

---
 listenbrainz/webserver/views/user.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/listenbrainz/webserver/views/user.py b/listenbrainz/webserver/views/user.py
index 000748d3f4..e0689b2d6e 100644
--- a/listenbrainz/webserver/views/user.py
+++ b/listenbrainz/webserver/views/user.py
@@ -153,8 +153,10 @@ def profile(user_name):
             listens.insert(0, listen)
 
     user_stats = db_stats.get_user_stats(user.id)
-    if not user_stats:
-        user_stats = {}
+    try:
+        artist_count = int(user_stats['artists']['count'])
+    except (KeyError, TypeError):
+        artist_count = 0
 
     return render_template(
         "user/profile.html",
@@ -165,7 +167,7 @@ def profile(user_name):
         spotify_uri=_get_spotify_uri_for_listens(listens),
         have_listen_count=have_listen_count,
         listen_count=format(int(listen_count), ",d"),
-        artist_count=format(int(user_stats['artists']['count']), ",d")
+        artist_count=format(artist_count, ",d")
     )
 
 

From 3b9b04385d1bfdd0712ac5ff7973695a0b694287 Mon Sep 17 00:00:00 2001
From: Param Singh <paramsingh258@gmail.com>
Date: Fri, 1 Sep 2017 13:03:25 +0530
Subject: [PATCH 16/20] Change indentation to spaces in profile.html

---
 .../webserver/templates/user/profile.html     | 40 +++++++++----------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/listenbrainz/webserver/templates/user/profile.html b/listenbrainz/webserver/templates/user/profile.html
index 78f110945d..c46f0f92ea 100644
--- a/listenbrainz/webserver/templates/user/profile.html
+++ b/listenbrainz/webserver/templates/user/profile.html
@@ -12,28 +12,28 @@ <h2 class="page-title">{{ user.musicbrainz_id }}</h2>
     </div>
 
 
-	<h3> Statistics </h3>
+  <h3> Statistics </h3>
 
-	<div class="row">
-	  <div class="col-md-8">
-		<table class="table table-border table-condensed table-striped">
-	      <tbody>
-		  {% if listen_count %}
-			<tr>
-			  <td>Listen count</td>
-			  <td>{{ listen_count }}</td>
-			</tr>
-		  {% endif %}
+  <div class="row">
+    <div class="col-md-8">
+    <table class="table table-border table-condensed table-striped">
+        <tbody>
+      {% if listen_count %}
+      <tr>
+        <td>Listen count</td>
+        <td>{{ listen_count }}</td>
+      </tr>
+      {% endif %}
 
-		  {% if artist_count %}
-			<tr>
-			  <td>Artist count</td>
-			  <td>{{ artist_count }}</td>
-			</tr>
-		  {% endif %}
-		</table>
-	  </div>
-	</div>
+      {% if artist_count %}
+      <tr>
+        <td>Artist count</td>
+        <td>{{ artist_count }}</td>
+      </tr>
+      {% endif %}
+    </table>
+    </div>
+  </div>
 
     <h3>Recent listens</h3>
 

From 4eb5b5c6dbfda3578ca201a0589c2cbb96928ea5 Mon Sep 17 00:00:00 2001
From: Param Singh <paramsingh258@gmail.com>
Date: Fri, 1 Sep 2017 15:22:58 +0530
Subject: [PATCH 17/20] Address TODO comments and add docstrings to new
 functions

Also improve tests and variable names a bit
---
 listenbrainz/config.py.sample       |  4 ++--
 listenbrainz/db/stats.py            | 32 ++++++++++++++++++++++-------
 listenbrainz/db/tests/test_stats.py |  7 +------
 listenbrainz/db/user.py             |  3 +--
 4 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/listenbrainz/config.py.sample b/listenbrainz/config.py.sample
index 0675af5abc..62ac99a271 100644
--- a/listenbrainz/config.py.sample
+++ b/listenbrainz/config.py.sample
@@ -54,8 +54,8 @@ BIGQUERY_TABLE_ID = "listen"
 
 # Stats
 STATS_ENTITY_LIMIT = 100 # the number of entities to calculate at max with BQ
-STATS_CALCULATE_LOGIN = 30
-STATS_CALCULATION_INTERVAL = 7
+STATS_CALCULATION_LOGIN_TIME = 30 # users must have logged in to LB in the past 30 days for stats to be calculated
+STATS_CALCULATION_INTERVAL = 7 # stats are calculated every 7 days
 
 
 # Max time in seconds after which the playing_now stream will expire.
diff --git a/listenbrainz/db/stats.py b/listenbrainz/db/stats.py
index 8f591e8056..649217f8b1 100644
--- a/listenbrainz/db/stats.py
+++ b/listenbrainz/db/stats.py
@@ -7,16 +7,21 @@
 from listenbrainz import db
 
 
-# XXX(param): think about the names of these stats variables
-# should they be artist_stats and so on instead?
-# Note: names are used in tests and stats/calculate.py also
-
 def insert_user_stats(user_id, artists, recordings, releases, artist_count):
-    # XXX(param): should this name be upsert_user_stats?
+    """Inserts user stats calculated from Google BigQuery into the database.
+
+       If stats are already present for some user, they are updated to the new
+       values passed.
+
+       Args: user_id (int): the row id of the user,
+             artists (dict): the top artists listened to by the user
+             recordings (dict): the top recordings listened to by the user
+             releases (dict): the top releases listened to by the user
+             artist_count (int): the total number of artists listened to by the user
+    """
 
     # put all artist stats into one dict which will then be inserted
     # into the artist column of the stats.user table
-    # XXX(param): This makes the schema of the stats very variable though.
     artist_stats = {
         'count': artist_count,
         'all_time': artists
@@ -41,10 +46,23 @@ def insert_user_stats(user_id, artists, recordings, releases, artist_count):
 
 
 def get_user_stats(user_id):
+    """Get user stats for user with given ID.
+
+        Args: user_id (int): the row ID of the user in the DB
+
+        Returns: A dict of the following format
+                 {
+                    "user_id" (int): the id of the user
+                    "artists" (dict): artist stats for the user
+                    "releases" (dict) : release stats for the user
+                    "recordings" (dict): recording stats for the user
+                    "last_updated" (datetime): timestamp when the stats were last updated
+                 }
+    """
 
     with db.engine.connect() as connection:
         result = connection.execute(sqlalchemy.text("""
-            SELECT user_id, artists, releases, recordings
+            SELECT user_id, artists, releases, recordings, last_updated
               FROM statistics.user
              WHERE user_id = :user_id
             """), {
diff --git a/listenbrainz/db/tests/test_stats.py b/listenbrainz/db/tests/test_stats.py
index e9532f5501..32c2382bc7 100644
--- a/listenbrainz/db/tests/test_stats.py
+++ b/listenbrainz/db/tests/test_stats.py
@@ -1,6 +1,4 @@
 # -*- coding: utf-8 -*-
-import time
-import uuid
 import json
 import os
 import listenbrainz.db.user as db_user
@@ -17,9 +15,6 @@ def setUp(self):
         self.user = db_user.get_or_create('stats_user')
 
     def path_to_data_file(self, filename):
-        # XXX(param): we have this function in IntegrationTestCase also,
-        # maybe find some way to share it
-        # ListenBrainzTestCase?
        return os.path.join(StatsDatabaseTestCase.TEST_DATA_PATH, filename)
 
     def test_insert_user_stats(self):
@@ -40,10 +35,10 @@ def test_insert_user_stats(self):
             artist_count=2,
         )
 
-        # TODO(param): test the last_updated values too
         result = db_stats.get_user_stats(user_id=self.user['id'])
         self.assertDictEqual(result['artists']['all_time'], artists)
         self.assertEqual(result['artists']['count'], 2)
         self.assertDictEqual(result['releases'], releases)
         self.assertDictEqual(result['recordings'], recordings)
+        self.assertGreater(int(result['last_updated'].strftime('%s')), 0)
 
diff --git a/listenbrainz/db/user.py b/listenbrainz/db/user.py
index 57cfd5838b..d0c63a56e0 100644
--- a/listenbrainz/db/user.py
+++ b/listenbrainz/db/user.py
@@ -225,7 +225,6 @@ def get_recently_logged_in_users():
                   FROM "user"
                  WHERE last_login >= NOW() - INTERVAL ':x days'
                 """.format(columns=','.join(USER_GET_COLUMNS))), {
-                    #TODO(param): think about the name of this variable in config
-                    'x': config.STATS_CALCULATE_LOGIN
+                    'x': config.STATS_CALCULATION_LOGIN_TIME
                 })
         return [dict(row) for row in result]

From 54e80b68449ac54fdaaf29376ee3f0cfefe4ff87 Mon Sep 17 00:00:00 2001
From: Param Singh <param@metabrainz.org>
Date: Thu, 5 Oct 2017 21:48:13 +0530
Subject: [PATCH 18/20] Don't align equals signs

---
 listenbrainz/stats/calculate.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/listenbrainz/stats/calculate.py b/listenbrainz/stats/calculate.py
index 094e7a8003..13502c5baa 100644
--- a/listenbrainz/stats/calculate.py
+++ b/listenbrainz/stats/calculate.py
@@ -9,8 +9,8 @@
 def calculate_user_stats():
     for user in db_user.get_recently_logged_in_users():
         recordings = stats_user.get_top_recordings(musicbrainz_id=user['musicbrainz_id'])
-        artists    = stats_user.get_top_artists(musicbrainz_id=user['musicbrainz_id'])
-        releases   = stats_user.get_top_releases(musicbrainz_id=user['musicbrainz_id'])
+        artists = stats_user.get_top_artists(musicbrainz_id=user['musicbrainz_id'])
+        releases = stats_user.get_top_releases(musicbrainz_id=user['musicbrainz_id'])
         artist_count = stats_user.get_artist_count(musicbrainz_id=user['musicbrainz_id'])
 
         db_stats.insert_user_stats(

From 18de1960ce9dd5f29c49ce4352d829c0f3b19f55 Mon Sep 17 00:00:00 2001
From: Param Singh <param@metabrainz.org>
Date: Thu, 5 Oct 2017 22:21:48 +0530
Subject: [PATCH 19/20] Change docstring to better english

---
 listenbrainz/db/user.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/listenbrainz/db/user.py b/listenbrainz/db/user.py
index d0c63a56e0..a147af63a9 100644
--- a/listenbrainz/db/user.py
+++ b/listenbrainz/db/user.py
@@ -216,8 +216,8 @@ def update_latest_import(musicbrainz_id, ts):
 
 
 def get_recently_logged_in_users():
-    """Returns a list of users who have logged-in in the last X days, X is
-       defined in the config.
+    """Returns a list of users who have logged-in in the
+    last config.STATS_CALCULATION_LOGIN_TIME days
     """
     with db.engine.connect() as connection:
         result = connection.execute(sqlalchemy.text("""

From 650d079cbc6c3441b93ddf3ed2cd4742d13cbd2e Mon Sep 17 00:00:00 2001
From: Param Singh <param@metabrainz.org>
Date: Thu, 5 Oct 2017 22:55:35 +0530
Subject: [PATCH 20/20] Put the bigquery initialization code into a module

This way, both bigquery-writer and the stats stuff can use the
same initialization code.
---
 .../bigquery-writer/bigquery-writer.py        | 18 ++++------
 listenbrainz/bigquery.py                      | 33 +++++++++++++++++++
 listenbrainz/stats/__init__.py                | 18 ++--------
 listenbrainz/webserver/scheduler.py           |  4 ++-
 4 files changed, 44 insertions(+), 29 deletions(-)
 create mode 100644 listenbrainz/bigquery.py

diff --git a/listenbrainz/bigquery-writer/bigquery-writer.py b/listenbrainz/bigquery-writer/bigquery-writer.py
index 6689f916c8..2133dfc940 100755
--- a/listenbrainz/bigquery-writer/bigquery-writer.py
+++ b/listenbrainz/bigquery-writer/bigquery-writer.py
@@ -12,10 +12,11 @@
 
 from googleapiclient import discovery
 from googleapiclient.errors import HttpError
+from listenbrainz.bigquery import create_bigquery_object
+from listenbrainz.bigquery import NoCredentialsVariableException, NoCredentialsFileException
 from oauth2client.client import GoogleCredentials
 
 REPORT_FREQUENCY = 5000
-APP_CREDENTIALS_FILE = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS')
 ERROR_RETRY_DELAY = 3 # number of seconds to wait until retrying an operation
 DUMP_JSON_WITH_ERRORS = True
 
@@ -153,18 +154,11 @@ def start(self):
             sleep(66666)
             return
 
-        if not APP_CREDENTIALS_FILE:
-            self.log.error("BiqQueryWriter not started, the GOOGLE_APPLICATION_CREDENTIALS env var is not defined.")
+        try:
+            self.bigquery = create_bigquery_object()
+        except (NoCredentialsFileException, NoCredentialsVariableException):
+            self.log.error("Credential File not present or invalid! Sleeping...")
             sleep(1000)
-            return
-
-        if not os.path.exists(APP_CREDENTIALS_FILE):
-            self.log.error("BiqQueryWriter not started, %s is missing." % APP_CREDENTIALS_FILE)
-            sleep(1000)
-            return
-
-        credentials = GoogleCredentials.get_application_default()
-        self.bigquery = discovery.build('bigquery', 'v2', credentials=credentials)
 
         while True:
             try:
diff --git a/listenbrainz/bigquery.py b/listenbrainz/bigquery.py
new file mode 100644
index 0000000000..a6add0e5f1
--- /dev/null
+++ b/listenbrainz/bigquery.py
@@ -0,0 +1,33 @@
+import os
+from googleapiclient import discovery
+import googleapiclient
+from oauth2client.client import GoogleCredentials
+
+APP_CREDENTIALS_FILE = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS')
+
+def create_bigquery_object():
+    """ Initiates the connection to Google BigQuery. Returns a BigQuery object. """
+
+    if not APP_CREDENTIALS_FILE:
+        logger.error("The GOOGLE_APPLICATIONS_CREDENTIALS variable is undefined, cannot connect to BigQuery")
+        raise NoCredentialsVariableException
+
+    if not os.path.exists(APP_CREDENTIALS_FILE):
+        logger.error("The BigQuery credentials file does not exist, cannot connect to BigQuery")
+        raise NoCredentialsFileException
+
+    credentials = GoogleCredentials.get_application_default()
+    return discovery.build('bigquery', 'v2', credentials=credentials)
+
+
+# Exceptions
+class BigQueryException(Exception):
+    pass
+
+
+class NoCredentialsVariableException(BigQueryException):
+    pass
+
+
+class NoCredentialsFileException(BigQueryException):
+    pass
diff --git a/listenbrainz/stats/__init__.py b/listenbrainz/stats/__init__.py
index 98c3ffd9de..3cce86f510 100644
--- a/listenbrainz/stats/__init__.py
+++ b/listenbrainz/stats/__init__.py
@@ -1,9 +1,6 @@
-from googleapiclient import discovery
-import googleapiclient
-from oauth2client.client import GoogleCredentials
 import os
 import logging
-from listenbrainz.stats.exceptions import NoCredentialsVariableException, NoCredentialsFileException
+from listenbrainz.bigquery import create_bigquery_object
 import listenbrainz.config as config
 import time
 
@@ -12,25 +9,14 @@
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
-APP_CREDENTIALS_FILE = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS')
-
 bigquery = None
 
 
 def init_bigquery_connection():
     """ Initiates the connection to Google BigQuery """
 
-    if not APP_CREDENTIALS_FILE:
-        logger.error("The GOOGLE_APPLICATIONS_CREDENTIALS variable is undefined, cannot connect to BigQuery")
-        raise NoCredentialsVariableException
-
-    if not os.path.exists(APP_CREDENTIALS_FILE):
-        logger.error("The BigQuery credentials file does not exist, cannot connect to BigQuery")
-        raise NoCredentialsFileException
-
     global bigquery
-    credentials = GoogleCredentials.get_application_default()
-    bigquery = discovery.build('bigquery', 'v2', credentials=credentials)
+    bigquery = create_bigquery_object()
 
 
 def get_parameters_dict(parameters):
diff --git a/listenbrainz/webserver/scheduler.py b/listenbrainz/webserver/scheduler.py
index dc592229b9..6a58c82382 100644
--- a/listenbrainz/webserver/scheduler.py
+++ b/listenbrainz/webserver/scheduler.py
@@ -1,9 +1,10 @@
 """ This module contains code that triggers jobs we want to run
     on regular intervals.
 """
-import logging
 from apscheduler.schedulers.background import BackgroundScheduler
+from listenbrainz import stats
 from listenbrainz.stats.calculate import calculate_stats
+import logging
 
 class ScheduledJobs():
     """Schedule the scripts that need to be run at regular intervals """
@@ -15,6 +16,7 @@ def __init__(self, conf):
 
         self.conf = conf
         self.scheduler = BackgroundScheduler()
+        stats.init_bigquery_connection()
         self.add_jobs()
         self.run()