Skip to content

Commit

Permalink
Simpler and more reliable metrics computation (#54)
Browse files Browse the repository at this point in the history
  • Loading branch information
abulte committed May 15, 2018
1 parent 96a6f2a commit 7accfc4
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 131 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
## 1.1.1 (2018-03-15)

- Handle multiple resources for same url [#49](https://github.com/opendatateam/udata-piwik/pull/49)
- Simpler and more reliable metrics computation [#54](https://github.com/opendatateam/udata-piwik/pull/54)

## 1.1.0 (2018-03-13)

Expand Down
41 changes: 38 additions & 3 deletions tests/test_piwik.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pytest

from datetime import date
from datetime import date, datetime, timedelta

from udata import frontend, settings
from udata.app import create_app
Expand All @@ -19,6 +19,7 @@
from udata.tests.plugin import drop_db

from udata_piwik.counter import counter
from udata_piwik.metrics import upsert_metric_for_day

from .conftest import PiwikSettings
from .client import visit, has_data, reset, download
Expand All @@ -41,7 +42,7 @@ def app(request):

@pytest.fixture(scope='module')
def dataset_resource():
resource = ResourceFactory()
resource = ResourceFactory(url='http://schéma.org')
dataset = DatasetFactory(resources=[resource])
# 2x visit
visit(dataset)
Expand All @@ -52,6 +53,22 @@ def dataset_resource():
return dataset, resource


@pytest.fixture(scope='module')
def dataset_resource_w_previous_data():
resource = ResourceFactory()
dataset = DatasetFactory(resources=[resource])
day = datetime.now() - timedelta(days=1)
data = {'nb_uniq_visitors': 5, 'nb_hits': 5, 'nb_visits': 5}
upsert_metric_for_day(resource, day, data)
day = datetime.now() - timedelta(days=2)
data = {'nb_uniq_visitors': 10, 'nb_hits': 10, 'nb_visits': 10}
upsert_metric_for_day(resource, day, data)
visit(dataset)
download(resource)
download(resource, latest=True)
return dataset, resource


@pytest.fixture(scope='module')
def two_datasets_one_resource_url():
resource_1 = ResourceFactory(url='http://udata.world')
Expand Down Expand Up @@ -107,11 +124,14 @@ def reset_piwik():
@pytest.fixture(scope='module')
def fixtures(app, reset_piwik, dataset_resource, organization,
user, reuse, post, community_resource,
two_datasets_one_resource_url):
two_datasets_one_resource_url, dataset_resource_w_previous_data):
# wait for Piwik to be populated
assert has_data()
counter.count_for(date.today())
# count twice to be ensure idempotence on one day
counter.count_for(date.today())
dataset, resource = dataset_resource
d_w_previous_data, r_w_previous_data = dataset_resource_w_previous_data
return {
'dataset': dataset,
'organization': organization,
Expand All @@ -121,6 +141,8 @@ def fixtures(app, reset_piwik, dataset_resource, organization,
'post': post,
'community_resource': community_resource,
'two_datasets_one_resource_url': two_datasets_one_resource_url,
'dataset_w_previous_data': d_w_previous_data,
'resource_w_previous_data': r_w_previous_data,
}


Expand All @@ -147,6 +169,8 @@ def test_dataset_metric(fixtures):
assert metric.date == date.today().isoformat()
assert metric.values == {'nb_hits': 2, 'nb_uniq_visitors': 1,
'nb_visits': 1}
fixtures['dataset'].reload()
assert fixtures['dataset'].metrics['views'] == 1


def test_resource_metric(fixtures):
Expand All @@ -157,6 +181,15 @@ def test_resource_metric(fixtures):
# 1 hit on permalink, 1 on url
assert metric.values == {'nb_hits': 2, 'nb_uniq_visitors': 2,
'nb_visits': 2}
fixtures['dataset'].reload()
resource = fixtures['dataset'].resources[0]
assert resource.metrics == {'views': 2}


def test_resource_metric_with_previous_data(fixtures):
fixtures['dataset_w_previous_data'].reload()
resource = fixtures['dataset_w_previous_data'].resources[0]
assert resource.metrics == {'views': 17}


def test_community_resource_metric(fixtures):
Expand All @@ -167,6 +200,8 @@ def test_community_resource_metric(fixtures):
# 1 hit on permalink, 1 on url
assert metric.values == {'nb_hits': 2, 'nb_uniq_visitors': 2,
'nb_visits': 2}
fixtures['community_resource'].reload()
assert fixtures['community_resource'].metrics['views'] == 2


def test_two_datasets_one_resource_url(fixtures):
Expand Down
7 changes: 4 additions & 3 deletions udata_piwik/download_counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,7 @@ def detect_by_resource_id(self, resource_id, row):
'data': row,
})
except CommunityResource.DoesNotExist:
raise Exception('No object found for resource_id %s' %
resource_id)
log.error('No object found for resource_id %s' % resource_id)

def detect_by_hashed_url(self, hashed_url, row):
found = False
Expand All @@ -95,10 +94,12 @@ def detect_by_hashed_url(self, hashed_url, row):
except CommunityResource.DoesNotExist:
pass
if not found:
raise Exception('No object found for urlhash %s' % hashed_url)
log.error('No object found for urlhash %s' % hashed_url)

def detect_download_objects(self):
for row in self.rows:
if 'url' not in row:
continue
last_url_match = re.match(LATEST_URL_REGEX, row['url'])
resource_id = last_url_match and last_url_match.group(1)
if resource_id:
Expand Down
126 changes: 1 addition & 125 deletions udata_piwik/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,110 +14,12 @@
User, Organization, Reuse, Dataset, Resource, CommunityResource
)

from .utils import is_today

KEYS = 'nb_uniq_visitors nb_hits nb_visits'.split()


log = logging.getLogger(__name__)


class HitsMetric(Metric):
name = 'nb_hits'
display_name = _('Hits')

def get_value(self):
pass


class DatasetHits(HitsMetric):
model = Dataset


class ResourceHits(HitsMetric):
model = Resource


class CommunityResourceHits(HitsMetric):
model = CommunityResource


class ReuseHits(HitsMetric):
model = Reuse


class OrganizationHits(HitsMetric):
model = Organization


class UserHits(HitsMetric):
model = User


class VisitsMetric(Metric):
name = 'nb_visits'
display_name = _('Visits')

def get_value(self):
pass


class DatasetVisits(VisitsMetric):
model = Dataset


class ResourceVisits(VisitsMetric):
model = Resource


class CommunityResourceVisits(VisitsMetric):
model = CommunityResource


class ReuseVisits(VisitsMetric):
model = Reuse


class OrganizationVisits(VisitsMetric):
model = Organization


class UserVisits(VisitsMetric):
model = User


class VisitorsMetric(Metric):
name = 'nb_uniq_visitors'
display_name = _('Visitors')

def get_value(self):
pass


class DatasetVisitors(VisitorsMetric):
model = Dataset


class ResourceVisitors(VisitorsMetric):
model = Resource


class CommunityResourceVisitors(VisitorsMetric):
model = CommunityResource


class ReuseVisitors(VisitorsMetric):
model = Reuse


class OrganizationVisitors(VisitorsMetric):
model = Organization


class UserVisitors(VisitorsMetric):
model = User


class ViewsMetric(Metric):
name = 'views'
display_name = _('Views')
Expand Down Expand Up @@ -170,7 +72,7 @@ class OrgResourcesDownloads(Metric):

def get_value(self):
ids = itertools.chain(*[
[r.id for r in d.resources] for d in
[getattr(r, 'id', None) for r in d.resources or []] for d in
(Dataset.objects(organization=self.target).only('resources') or [])
])
return int(Metrics.objects(object_id__in=ids, level='daily')
Expand Down Expand Up @@ -211,12 +113,6 @@ def upsert_metric_for_day(obj, day, data):
oid = obj.id if hasattr(obj, 'id') else obj
if not isinstance(day, basestring):
day = (day or date.today()).isoformat()

if hasattr(obj, 'metrics') and day == date.today().isoformat():
# Update object current metrics
for k in KEYS:
obj.metrics[k] = data[k]

commands = dict(('inc__values__{0}'.format(k), data[k]) for k in KEYS)
metrics = Metrics.objects(object_id=oid, level='daily', date=day)
return metrics.update_one(upsert=True, **commands)
Expand All @@ -225,26 +121,6 @@ def upsert_metric_for_day(obj, day, data):
def clear_metrics_for_day(day):
if not isinstance(day, basestring):
day = (day or date.today()).isoformat()

if is_today(day):
commands = dict(('set__metrics__{0}'.format(k), 0) for k in KEYS)
for model in Organization, Reuse, User:
try:
model.objects.update(**commands)
except Exception:
log.exception('Unable to clean %s', model.__name__)
for dataset in Dataset.objects:
dcommands = commands.copy()
for i, _r in enumerate(dataset.resources):
dcommands.update({
'set__resources__{0}__metrics__{1}'.format(i, k): 0
for k in KEYS
})
try:
dataset.update(**dcommands)
except Exception:
log.exception('Unable to clear dataset %s', dataset.id)

commands = dict(('unset__values__{0}'.format(k), 1) for k in KEYS)
metrics = Metrics.objects(level='daily', date=day)
return metrics.update(upsert=False, **commands)

0 comments on commit 7accfc4

Please sign in to comment.