diff --git a/majortrack/.method.py.swp b/majortrack/.method.py.swp new file mode 100644 index 0000000..6b039d3 Binary files /dev/null and b/majortrack/.method.py.swp differ diff --git a/majortrack/method.py b/majortrack/method.py new file mode 100644 index 0000000..419910c --- /dev/null +++ b/majortrack/method.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""Module documentation goes here + and here + and ... +""" + + +class MajorTrack(object): + r""" + + Parameters + =========== + + clusterings: list, dict + Sequence of clusterings. + **If provided as a `dict`**: + keys: float, datetime + The time points. + values: list, dict + The membership list of each clustering indicating to which cluster + a data source belongs. + See :obj:`~MajorTrack.memberships` for details. + \**kwargs optional parameter: + timepoints: list + The time points of each clustering. + + Note + ----- + If `clusterings` if of type `dict` then the keys will be used as time + points and this optional parameter is ignored, even if provided. + + group_matchup_method: str (default='fraction') + Set the method to calculate the similarity between two clusters from + different clusterings. By default the fraction of identical members is + used as explained in + `the original article '_. + + Attributes + ========== + group_matchup: list + Holds for each time point the tracing and mapping sets of all clusters. + Each element is a `dict` with the keys ``'forward'`` and ``'backward'``. + Both hold a `dict` indicating for a cluster the best matching cluster + along with the similarity score of the particular relation in a `tuple`. + + Example + ------- + :: code-block: python + + self.group_matchup[1] = { + 'backward': {0: (0, 1.0), ...}, + ^ ^ ^ + | | similarity score + | cluster from previous time point + cluster from current time point. + } + + """ + def __init__(self, clusterings, **kwargs): + assert isinstance(clusterings, (list, dict)) + if isinstance(clusterings, list): + self.timepoints = kwargs.pop( + 'timepoints', list(range(len(clusterings))) + ) + self.clusterings = clusterings + # sort both clusterings and timepoints according to timepoints + self.timepoints, self.clusterings = zip( + *sorted( + zip(self.timepoints, self.clusterings), + key=lambda x: x[0] + ) + ) + else: + self.timepoints = sorted(clusterings.keys()) + self.clusterings = list(clusterings[tp] for tp in self.timepoints) + + self.group_matchup_method = kwargs.get( + 'group_matchup_method', + 'fraction' + ) + + def get_group_matchup(self, matchup_method=None): + r""" + Determine majority relation between neighbouring snapshots. + + Parameters + =========== + matchup_method: str (default=None) + If provided this overwrites `self.group_matchup_method. It determines + the method to use when calculating similarities between clusters from + neighbouring snapshots. + + Returns + ======= + self: :class:`.MajorTrack` + with new attribute :ref:`group_matchup`. + + ######## + Between each pair of consecutive time points all groups are compared + and matched (if possible) using `matchup_method`. + + Set: + ---- + - self.group_matchup: List holding for each time point a dictionary + with 'backward'/'forward' matchups. A matchup is a dict indicating + for each group (id) the best match. The best match is given by a + tuple with group id and similarity score. + E.g.: self.group_matchup[1] = { + 'backward': {0: (0, 1.0), ...}, + 'forward': {0: (1, 0.7), ...} + } + """ + if matchup_method is None: + matchup_method = self.group_matchup_method + # if self.group_matchup: + self.group_matchup = [] + # if self.group_similarities: + self.group_similarities = [] + self.group_matchup.append( + { + 'backward': { + _group_id: (None, None) + for _group_id in range(len(self.groupings[0])) + } + } + ) + self.group_similarities.append( + { + 'backward': { + _group_id: None + for _group_id in range(len(self.groupings[0])) + } + } + ) + for _idx in range(self.length - 1): + _group_similarities = self._get_group_similarities( + _idx, _idx + 1, + method=matchup_method + ) + # set forward matchup/similarities for current step + self.group_matchup[-1][ + 'forward' + ] = _group_similarities['forward']['matchup'] + self.group_similarities[-1][ + 'forward' + ] = _group_similarities['forward']['similarities'] + # create backward matchup/similarities for next step + self.group_matchup.append( + {'backward': _group_similarities[ + 'backward' + ]['matchup']} + ) + self.group_similarities.append( + {'backward': _group_similarities[ + 'backward' + ]['similarities']} + ) + # complete forward matchup/similarites with None's + self.group_matchup[-1]['forward'] = { + _group_id: (None, None) + for _group_id in range(len(self.groupings[-1])) + } + self.group_similarities[-1]['forward'] = { + _group_id: None + for _group_id in range(len(self.groupings[-1])) + } + + def get_span(self, idx, span_set, get_indivs=True): + r""" + Create the tracer tree. + + Parameters + =========== + + #### + Get the span (time forward) + """ + span_tree = {} + if isinstance(span_set, int): + span_tree[idx] = [self.groupings[idx][span_set]] + elif isinstance(span_set, str): + span_tree[idx] = filter( + lambda g: span_set in g, + self.groupings[idx] + ) + else: + span_tree[idx] = [span_set] + current_set = set.union(*span_tree[idx]) + for _idx in range(idx + 1, self.length): + next_groupings = self.groupings[_idx] + next_contained = [*filter( + lambda grp: any([memb in current_set for memb in grp]), + next_groupings + )] + if next_contained: + span_tree[_idx] = next_contained + current_set = set.union(*next_contained) + else: + break + if get_indivs: + return span_tree + else: + span_tree_idxs = {} + for _idx in span_tree: + _span_set = set.union(*span_tree[_idx]) + span_tree_idxs[_idx] = [ + i for i in range(len(self.groupings[_idx])) + if any([ + el in _span_set + for el in self.groupings[_idx][i] + ]) + ] + return span_tree_idxs diff --git a/majortrack/tracker.py b/majortrack/tracker.py index f0655f7..60ed099 100644 --- a/majortrack/tracker.py +++ b/majortrack/tracker.py @@ -63,7 +63,7 @@ class MajorTrack(object): Determine if :obj:`~LazyList`'s should be used to store data about dynamic clusters or normal lists. Most likely you want to use normal lists. - + .. bibliography:: ../references.bib Attributes @@ -102,7 +102,7 @@ class MajorTrack(object): group_mappings: list(list) Holds for each slice a list of mapping sets. The list is ordered like - :attr:`~.MajorTrack.grougings`. + :obj:`~.MajorTrack.grougings`. Example -------- @@ -234,6 +234,7 @@ def __init__(self, clusterings, history, **kwargs): else: self.timepoints = sorted(clusterings.keys()) self.clusterings = list(clusterings[tp] for tp in self.timepoints) + assert isinstance(self.clusterings[0], set) self.length = len(self.timepoints) # now determine the slice widths self.slice_widths = kwargs.get('slice_widths', None) @@ -382,11 +383,11 @@ def combined_population( If further arguments are provided (all have to be unnamed), then the union is taken between all of these time points. - + Example ------- .. code-block:: python - + self.resident_population(2,4,5) This will return the combined population of the time points 2, 4 @@ -846,14 +847,15 @@ def get_group_matchup(self, matchup_method=None): Parameters =========== matchup_method: str (default=None) - If provided this overwrites :attr:`~.MajorTrack.group_matchup_method`. + If provided this overwrites + :attr:`~majortrack.MajorTrack.group_matchup_method`. It determines the method to use when calculating similarities between clusters from neighbouring snapshots. Returns ======= self: :class:`.MajorTrack` - with new attribute :attr:`~.MajorTrack.group_matchup`. + with new attribute :obj:`~.MajorTrack.group_matchup`. """ if matchup_method is None: @@ -1956,7 +1958,7 @@ def get_community_shrinkages(self,): None: None Adds new attributes: - - attr:`~.MajorTrack.community_shrinkages` + - :attr:`~.MajorTrack.community_shrinkages` """ # birth events are not growth events self.community_shrinkages = [[]] @@ -2073,7 +2075,7 @@ def get_auto_corrs(self, residents=True): None: None Adds new attributes: - - attr:`~.MajorTrack.community_autocorrs` + - :attr:`~.MajorTrack.community_autocorrs` """ self.community_autocorrs = {} for idx in range(1, self.length):