Skip to content

Commit

Permalink
Further optimize metapackage summary generation
Browse files Browse the repository at this point in the history
- The key to performance here is to avoid sorting versions at
  all costs, as comparing versions is still expensive with
  python. Before, we've sorted all packages by version, now
  we only sort versions after unicalizing them.
- This change also makes sorting more consistent and meaningful
  in some cases. For instance, different (stringwise) version
  values which compare as equal (versionwise) are now ordered by
  their spread first, then alphabetically.
  • Loading branch information
AMDmi3 committed Oct 11, 2018
1 parent 03187bc commit 6a899c4
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 56 deletions.
22 changes: 13 additions & 9 deletions repology/package.py
@@ -1,4 +1,4 @@
# Copyright (C) 2016-2017 Dmitry Marakasov <amdmi3@amdmi3.ru>
# Copyright (C) 2016-2018 Dmitry Marakasov <amdmi3@amdmi3.ru>
#
# This file is part of repology
#
Expand Down Expand Up @@ -62,19 +62,21 @@ def ToString(cl):

@total_ordering
class UserVisibleVersionInfo:
__slots__ = ['version', 'versionclass', 'metaorder', 'versionflags']
__slots__ = ['version', 'versionclass', 'metaorder', 'versionflags', 'numfamilies']

def __init__(self, version, versionclass, metaorder=0, versionflags=0):
def __init__(self, version, versionclass, metaorder=0, versionflags=0, numfamilies=1):
self.version = version
self.versionclass = versionclass
self.metaorder = metaorder
self.versionflags = versionflags
self.numfamilies = numfamilies

def __eq__(self, other):
return (self.metaorder == other.metaorder and
self.versionclass == other.versionclass and
self.version == other.version and
version_compare(self.version, other.version, self.versionflags, other.versionflags) == 0)
version_compare(self.version, other.version, self.versionflags, other.versionflags) == 0 and
self.numfamilies == other.numfamilies)

def __lt__(self, other):
if self.metaorder < other.metaorder:
Expand All @@ -99,13 +101,15 @@ def __lt__(self, other):
if self.versionclass > other.versionclass:
return False

return self.version < other.version
if self.numfamilies < other.numfamilies:
return True
if self.numfamilies > other.numfamilies:
return False

def flatten_legacy(self):
if self.versionclass == VersionClass.legacy:
self.versionclass = VersionClass.outdated
return self.version < other.version

return self
def __hash__(self):
return hash((self.metaorder, self.versionclass, self.version, self.numfamilies, self.versionflags))


class PackageFlags:
Expand Down
65 changes: 26 additions & 39 deletions repologyapp/metapackages.py
Expand Up @@ -20,7 +20,7 @@
import flask

from repology.database import MetapackageRequest
from repology.package import VersionClass
from repology.package import UserVisibleVersionInfo, VersionClass


class MetapackagesFilterInfo:
Expand Down Expand Up @@ -148,64 +148,51 @@ def packages_to_summary_items(packages, repo=None, maintainer=None):
if repo is not None:
maintainer = None

sumtypes = ('explicit', 'newest', 'outdated', 'ignored')

def summary_factory():
return {
sumtype: []
for sumtype in ['explicit', 'newest', 'outdated', 'ignored']
sumtype: defaultdict(set)
for sumtype in sumtypes
}

# pass1: gather packages under summaries[<effname>][<explicit|newest|outdated|ignored>]
summaries = defaultdict(summary_factory)

# pass 1: gather summaries in the intermediate format:
# dict by metapackage name -> dict by summary type (e.g. table columns) -> hash by versioninfo of sets of families
for package in packages:
target = None

versioninfo = package.get_user_visible_version()

if (repo is not None and repo == package.repo) or (maintainer is not None and maintainer in package.maintainers):
target = summaries[package.effname]['explicit']
elif package.versionclass in [VersionClass.outdated, VersionClass.legacy]:
target = summaries[package.effname]['outdated']
versioninfo.versionclass = VersionClass.outdated # we don't need to differentiate legacy and outdated here
elif package.versionclass in [VersionClass.devel, VersionClass.newest, VersionClass.unique]:
target = summaries[package.effname]['newest']
else:
target = summaries[package.effname]['ignored']

target.append(package)

# pass2: convert package lists into lists of version infos
def condense_version_families(tuples):
if not tuples:
return

current_key = tuples[0][0]
current_values = set([tuples[0][1]])

for key, value in tuples[1:]:
if key != current_key:
yield (current_key, len(current_values))
current_key = key
current_values = set()

current_values.add(value)

yield (current_key, len(current_values))

final_summaries = defaultdict(summary_factory)

for metapackagename, summary in summaries.items():
for sumtype, packages in summary.items():
final_summaries[metapackagename][sumtype] = list(
condense_version_families(
sorted(
[
(package.get_user_visible_version().flatten_legacy(), package.family)
for package in packages
],
reverse=True
)
target[versioninfo].add(package.family)

# pass 2: count families and convert to final format:
# dict by metapackage name -> dict by summary type (e.g. table columns) -> list of versioninfos (with filled numfamilies)
for summary in summaries.values():
for sumtype in sumtypes:
summary[sumtype] = sorted([
UserVisibleVersionInfo(
versioninfo.version,
versioninfo.versionclass,
versioninfo.metaorder,
versioninfo.versionflags,
len(families)
)
)
for versioninfo, families in summary[sumtype].items()
], reverse=True)

return final_summaries
return summaries


def packages_to_metapackages(*packagesets):
Expand Down
16 changes: 8 additions & 8 deletions repologyapp/templates/metapackages_table.html
Expand Up @@ -31,31 +31,31 @@

{% if repo or maintainer %}
<td>
{% for versioninfo, numfamilies in data.explicit %}
{% for versioninfo in data.explicit %}
<span class="version version-{{ versioninfo.versionclass|css_for_versionclass }}">{{ versioninfo.version }}</span>
{% if numfamilies > 1 %}<sup>{{ numfamilies }}</sup>{% endif %}
{% if versioninfo.numfamilies > 1 %}<sup>{{ versioninfo.numfamilies }}</sup>{% endif %}
{% else %}
-
{% endfor %}
</td>
{% endif %}
<td class="text-center"><b>{{ metapackage.num_families }}</b></td>
<td>
{% for versioninfo, numfamilies in data.newest %}
{% for versioninfo in data.newest %}
<span class="version version-{{ versioninfo.versionclass|css_for_versionclass }}">{{ versioninfo.version }}</span>
{%- if numfamilies > 1 %}<sup>{{ numfamilies }}</sup>{% endif %}
{%- if versioninfo.numfamilies > 1 %}<sup>{{ versioninfo.numfamilies }}</sup>{% endif %}
{% endfor %}
</td>
<td>
{% for versioninfo, numfamilies in data.outdated %}
{% for versioninfo in data.outdated %}
<span class="version version-{{ versioninfo.versionclass|css_for_versionclass }}">{{ versioninfo.version }}</span>
{%- if numfamilies > 1 %}<sup>{{ numfamilies }}</sup>{% endif %}
{%- if versioninfo.numfamilies > 1 %}<sup>{{ versioninfo.numfamilies }}</sup>{% endif %}
{% endfor %}
</td>
<td>
{% for versioninfo, numfamilies in data.ignored %}
{% for versioninfo in data.ignored %}
<span class="version version-{{ versioninfo.versionclass|css_for_versionclass }}">{{ versioninfo.version }}</span>
{%- if numfamilies > 1 %}<sup>{{ numfamilies }}</sup>{% endif %}
{%- if versioninfo.numfamilies > 1 %}<sup>{{ versioninfo.numfamilies }}</sup>{% endif %}
{% endfor %}
</td>
</tr>
Expand Down

0 comments on commit 6a899c4

Please sign in to comment.