Skip to content

Commit

Permalink
Make spatial completion ponderation f(level, population) with priorit…
Browse files Browse the repository at this point in the history
…y on level (#811)
  • Loading branch information
noirbizarre committed Mar 1, 2017
1 parent 5178cf8 commit 353eb1e
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 11 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
[#809](https://github.com/opendatateam/udata/pull/809)
- Fix metric update after transfer
[#810](https://github.com/opendatateam/udata/pull/810)
- Improve spatial completion ponderation (spatial zones reindexation required)
[#811](https://github.com/opendatateam/udata/pull/811)

## 1.0.3 (2017-02-21)

Expand Down
40 changes: 29 additions & 11 deletions udata/core/spatial/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,22 @@
from udata.search import ModelSearchAdapter, register
from udata.search.analysis import standard

from .models import GeoZone
from .models import GeoZone, admin_levels, ADMIN_LEVEL_MAX


__all__ = ('GeoZoneSearch', )


PONDERATION_STEP = 10000
# Compute weight relative to 10⁸.
# Only 12 countries + UE have emore population.
# This is not significative in the ranking algorithm for countries.
# Given ES indexes weight as an integer,
# zones start to gain weight at MAX_POPULATION / PONDERATION_STEP
# Here starts at 10k inhabitants
MAX_POPULATION = 1E8


def labels_for_zone(zone):
'''
Extract all known zone labels
Expand Down Expand Up @@ -43,15 +53,23 @@ class Meta:
payloads=True)

@classmethod
def compute_weight(cls, population):
"""Weight must be in the interval [0..2147483647]"""
if 0 <= population <= 2147483647:
return population
else:
if population < 0: # country/eh population is -99.
return 0
else: # World population is 6772425850.
return 2147483647
def compute_weight(cls, zone):
'''
Give a weight to the zone according to its administrative level first
and then its population.
Scoring is in [0 .. ~(ADMIN_LEVEL_MAX * (10 + 1) * PONDERATION_STEP)]
'''
# Each level give a step
level = max(admin_levels.get(zone.level, ADMIN_LEVEL_MAX), 1)
level_weight = (ADMIN_LEVEL_MAX / level) * 10 * PONDERATION_STEP
# Population gives 0 < weight < PONDERATION_STEP
# to rank between level steps only
# NB: to be realy progressive, we should take the max population
# by administrative level but it would either to much time consumption
# or too much refactoring (storing the max population by level)
population = min(max(0, zone.population), MAX_POPULATION)
population_weight = (population / MAX_POPULATION) * PONDERATION_STEP
return int(level_weight + population_weight)

@classmethod
def is_indexable(cls, zone):
Expand All @@ -69,6 +87,6 @@ def serialize(cls, zone):
'level': zone.level,
'keys': zone.keys,
},
'weight': cls.compute_weight(zone.population),
'weight': cls.compute_weight(zone),
},
}

0 comments on commit 353eb1e

Please sign in to comment.