Skip to content
This repository has been archived by the owner on May 24, 2019. It is now read-only.

Commit

Permalink
Remove contributions after computing aggregates #191
Browse files Browse the repository at this point in the history
- Add new tests
- Change aggregation logic to compute in memory
- Remove ContributorQuerySet
  • Loading branch information
jaredlockhart committed Dec 3, 2015
1 parent 184479d commit 03ca681
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 166 deletions.
158 changes: 80 additions & 78 deletions leaderboard/contributors/models.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,8 @@
import operator

from bulk_update.helper import bulk_update
from django.db import models

from leaderboard.locations.models import Country


class ContributorQuerySet(models.QuerySet):
"""
A queryset for Contributors with additional
support for country filtering and observation
annotation.
"""

def filter_country(self, country_code):
"""
Filter for contributors within the country
defined by the provided ISO2 country code.
"""
return self.filter(
contribution__tile__country__iso2=country_code)

def annotate_observations(self):
"""
Add an 'observations' field to the contributor
objects which counts the number of contributions
made by the contributor, and sort by the
greatest contributors first.
"""
return self.annotate(
observations=models.Sum('contribution__observations')
).filter(observations__gt=0).order_by('-observations')


class Contributor(models.Model):
"""
Expand All @@ -40,8 +13,6 @@ class Contributor(models.Model):
uid = models.CharField(max_length=255, default='')
name = models.CharField(max_length=255, default='')

objects = ContributorQuerySet.as_manager()

def __unicode__(self):
return self.name

Expand Down Expand Up @@ -78,58 +49,94 @@ class Meta:
ordering = ('rank',)

def __unicode__(self):
return unicode(self.id)

@staticmethod
def compute_ranks():
"""
Compute the number of observations and ranks for
each contributor for each country and globally.
"""
# When country is None, compute the global ranks
countries = [None] + list(Country.objects.all())
return u'country: {} contributor: {} observations: {} rank: {}'.format(
self.id,
self.country_id,
self.contributor_id,
self.observations,
self.rank,
)

@classmethod
def _compute_ranks(cls, contributions):
# Pull the entire set of ranks into memory.
contributor_ranks = {
(rank.contributor_id, rank.country_id): rank
for rank in ContributorRank.objects.all()
}

new_contributor_ranks = []
updated_contributor_ranks = []

for country in countries:
contributors = Contributor.objects.all()

if country:
contributors = contributors.filter_country(country.iso2)

ranked_contributors = enumerate(
contributors.annotate_observations(), start=1)

for rank, contributor in ranked_contributors:

country_id = country.id if country else None
contributor_rank = contributor_ranks.get(
(contributor.id, country_id), None)

if contributor_rank:
contributor_rank.rank = rank
contributor_rank.observations = contributor.observations
updated_contributor_ranks.append(contributor_rank)
for contribution in contributions:
# Each contribution counts towards the rank in the country in which
# it was made, as well as the global rank for that contributor.
for country_id in (contribution.tile.country_id, None):
rank_key = (contribution.contributor_id, country_id)
contributor_rank = contributor_ranks.get(rank_key, None)

if contributor_rank is not None:
# This rank exists and so we can update its observation
# count.
contributor_rank.observations += contribution.observations
else:
new_contributor_ranks.append(ContributorRank(
contributor=contributor,
country=country,
rank=rank,
observations=contributor.observations,
))

# This contributor has no rank for that country, we should
# create a new one.
contributor_ranks[rank_key] = ContributorRank(
contributor_id=contribution.contributor_id,
country_id=country_id,
observations=contribution.observations,
)

# Create a list of country ids from the contribution keys.
# This saves us from needing to query the database for the country ids.
country_ids = set([
country_id for (contributor_id, country_id)
in contributor_ranks.keys()
])

for country_id in country_ids:
country_ranks = sorted(
# We have every rank for a country in memory already,
# so we can just filter and sort this dataset.
[
rank for rank in contributor_ranks.values()
if rank.country_id == country_id
],
key=operator.attrgetter('observations'),
reverse=True,
)

# Assign each contributor their new ranks for this country.
for rank, contributor_rank in enumerate(country_ranks, start=1):
contributor_rank.rank = rank

# Update the ranks which already appear in the database.
bulk_update(
updated_contributor_ranks,
[
rank for rank in contributor_ranks.values()
if rank.id is not None
],
update_fields=['rank', 'observations'],
batch_size=100,
)
ContributorRank.objects.bulk_create(new_contributor_ranks)

# Insert the new ranks which we created.
ContributorRank.objects.bulk_create(
[rank for rank in contributor_ranks.values() if rank.id is None],
)

# Remove the contributions which we used to calculate the new ranks.
contribution_ids = [contribution.id for contribution in contributions]
Contribution.objects.filter(id__in=contribution_ids).delete()

@classmethod
def compute_ranks(cls):
"""
Compute the number of observations and ranks for
each contributor for each country and globally.
"""
# Pull all contributions into memory, we will only work on this
# dataset while new contributions enter the database.
contributions = list(Contribution.objects.all().select_related('tile'))
cls._compute_ranks(contributions)


class Contribution(models.Model):
Expand All @@ -145,9 +152,4 @@ class Meta:
unique_together = ('date', 'tile', 'contributor')

def __unicode__(self):
return u'{user}-{date}-{tile}: {observations}'.format(
user=self.contributor,
date=self.date,
tile=self.tile,
observations=self.observations,
)
return unicode(self.date)
142 changes: 54 additions & 88 deletions leaderboard/contributors/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,101 +38,14 @@ class Meta:
model = Contribution


class TestContributorQuerySet(TestCase):

def test_filter_country(self):
country1 = CountryFactory()
country2 = CountryFactory()

contributor1 = ContributorFactory()
contributor2 = ContributorFactory()
contributor3 = ContributorFactory()

ContributionFactory(
contributor=contributor1,
tile=TileFactory(country=country1),
)

ContributionFactory(
contributor=contributor2,
tile=TileFactory(country=country2),
)

ContributionFactory(
contributor=contributor3,
tile=TileFactory(country=country1),
)

ContributionFactory(
contributor=contributor3,
tile=TileFactory(country=country2),
)

contributors = Contributor.objects.filter_country(country2.iso2)
self.assertEqual(set(contributors), set([contributor2, contributor3]))

def test_annotate_observations(self):
country = CountryFactory()

contributor1 = ContributorFactory()

for i in range(10):
ContributionFactory(
contributor=contributor1,
tile=TileFactory(country=country),
)

contributor2 = ContributorFactory()

for i in range(20):
ContributionFactory(
contributor=contributor2,
tile=TileFactory(country=country),
)

annotated_contributors = Contributor.objects.annotate_observations()
contributor_observations = [
(contributor, contributor.observations) for
contributor in annotated_contributors
]
expected_observations = [(contributor2, 20), (contributor1, 10)]
self.assertEqual(contributor_observations, expected_observations)

def test_observations_annotated_and_filtered_by_country(self):
contributor = ContributorFactory()

country1 = CountryFactory()
country2 = CountryFactory()

for country in (country1, country2):
for i in range(10):
ContributionFactory(
contributor=contributor,
tile=TileFactory(country=country),
)

annotated_contributor = (Contributor.objects
.filter_country(country1.iso2)
.annotate_observations()
.get())

self.assertEqual(annotated_contributor.observations, 10)

annotated_contributor = (Contributor.objects
.annotate_observations()
.get())

self.assertEqual(annotated_contributor.observations, 20)


class TestContributorRank(TestCase):

def create_contribution(self, contributor, country):
return Contribution.objects.create(
contributor=contributor,
date=datetime.date.today(),
observations=1,
tile=TileFactory(country=country)
tile=TileFactory(country=country),
)

def setUp(self):
Expand Down Expand Up @@ -244,4 +157,57 @@ def test_new_contributions_updates_existing_ranks(self):
self.assertEqual(ContributorRank.objects.count(), 6)
rank = ContributorRank.objects.get(
contributor=self.contributor1, country=self.country1)

self.assertEqual(rank.observations, 3)

def test_compute_ranks_removes_observations_when_complete(self):
self.assertEqual(Contribution.objects.count(), 0)

for i in range(3):
self.create_contribution(self.contributor1, self.country1)

self.assertEqual(Contribution.objects.count(), 3)

ContributorRank.compute_ranks()

self.assertEqual(Contribution.objects.count(), 0)

def test_contribution_set_frozen_during_rank_computation(self):
contributor = ContributorFactory()
country = CountryFactory()

self.assertEqual(Contribution.objects.count(), 0)

for i in range(3):
self.create_contribution(contributor, country)

self.assertEqual(Contribution.objects.count(), 3)

contributions = list(Contribution.objects.all().select_related())
self.assertEqual(Contribution.objects.count(), 3)

for i in range(3):
self.create_contribution(contributor, country)

self.assertEqual(Contribution.objects.count(), 6)

ContributorRank._compute_ranks(contributions)

self.assertEqual(ContributorRank.objects.get(
contributor=contributor, country=None).observations, 3)
self.assertEqual(Contribution.objects.count(), 3)

def test_compute_ranks_updates_existing_ranks(self):
self.assertEqual(ContributorRank.objects.get(
country=None, contributor=self.contributor1).rank, 1)
self.assertEqual(ContributorRank.objects.get(
country=None, contributor=self.contributor2).rank, 2)

ContributionFactory(contributor=self.contributor2, observations=10)

ContributorRank.compute_ranks()

self.assertEqual(ContributorRank.objects.get(
country=None, contributor=self.contributor1).rank, 2)
self.assertEqual(ContributorRank.objects.get(
country=None, contributor=self.contributor2).rank, 1)

0 comments on commit 03ca681

Please sign in to comment.