From a5317f42a45815d89821c9ecc1dd1279215738be Mon Sep 17 00:00:00 2001
From: Jonas I Liechti <jonas.i.liechti@protonmail.ch>
Date: Thu, 20 Feb 2020 20:16:54 +0100
Subject: [PATCH] assert each cluster is a set

---
 majortrack/.method.py.swp | Bin 0 -> 24576 bytes
 majortrack/method.py      | 215 ++++++++++++++++++++++++++++++++++++++
 majortrack/tracker.py     |  18 ++--
 3 files changed, 225 insertions(+), 8 deletions(-)
 create mode 100644 majortrack/.method.py.swp
 create mode 100644 majortrack/method.py
diff --git a/majortrack/.method.py.swp b/majortrack/.method.py.swp
new file mode 100644
index 0000000000000000000000000000000000000000..6b039d365d2fff30b1c383c30c299680a4952fc0
GIT binary patch
literal 24576
zcmeI4d#oH)9mhw?%OZj_1Ve!wdSiC)?cLpbFMZvWt7$2g)>4qR1arCU&hDPQciOo#
z<IL=Rw1V$O`-d-{KBK@tq{>4i5)FwO2}DC;3H*T~L<0tjF{F_MqJrPwnRDjM?9Sd6
z0@27!^6A{2IdjhM{LW+N_t-W@uHJe%TVGyd;5gGTR;+$y>i(fM##Lt<#va!(1G;{U
zW1CxY`26l2Td!HVU}cuKEVCl2t*jYok8W5$W|qvg>v(C++O_S{`1)3(v}U9+KC*sn
zytQHdI$6~7-95Y+29;TJkL!oN+1y*Xoc`P;{!Cn*<>8FmDtn7d3zFZ>HSn&~!0E<C
zW1}O~^?_9vvK43TephNC?{uz#Tm!iVat-7f$Tg5_AlE>yfp@D0LhnT5GwA#YQUNRF
z`8{3d6Xj=NJmBy1yY8PPKkt&~(XRW;<mV0Yyr=8F{Ft9|4dfcgHIQo{*Fdg;Tm!iV
zat-7f$Tg5_AlE>yfp<UyX3H>6Io&Y61}Ofo?EfQY7{=q^Ft`WY0d4~~fg8a$!2)oB
z18U$3umg;N72xIfqYm&h@Lg~PD1(2Xt;fMHWV;^D17I2W5ctFUa39<O9IzRz08gD}
z7{3NT1^dB1uo-LsFD*BW`@p^64sZ+D2LAb8!?+tPfbHNs@b^;<<7x1F@MACsc7wIx
z3~=NW!*~Qd0B!_La2_}T{Ox4O2Yv#60B!(RgEPP@aDqGw9s@rFCYS&#z`x-@c?f(9
z><8oEOmGyAkwah)xCERIj=&l78*n%HJlGBf!Rv6!yZ{b^hn0hd<^+3d$6((FrzQ)5
zHEY?XZ-o{Q3RBnjEm~+G9b?|~TZPOb`}O@Qt4;fE$Ezi)o=iOzaJyaZEmSr=k2|em
zsw|VolD(KU!{$uK`)_nWHk~Q^0Bic(3^`NEW3qH5sT<aIedh3mkOd+4GCM!TCNi~J
zz@uVNdPXv{nz(rCZlBSZER9a3hfR(IJ05A`5lJ<rYB;7K^6_3723WvD(inBSzdD;8
z-{;N}8qJ<gQDV`AY`qqJ%<MtUYAp;i`j@R{qr<5(8S@|GC)8Y<=3&NdJ>_XGh*~X&
z`KB|?i{d$(JJaC|E3whRYN{>LXm(%vujT%CMC;MQv|20{0j)=3h+CF39ZZf)4Q4k>
z0?8g)tt#8$I^3XZ-A8i6*l%>-)mQq=&dl{jwPZ`xzsvpay#2~3k~W5gPO`;(3rdl5
zwMQR)^oEHzITfpWfLZNmV)pLFdY&b!lC3RWi!7uopm9mBDYYJ=X4Cg#=^$JB4zmKG
zJYDmuRjcCS)CwKFBN}fvRknYm%8H}IY^1z?aF~_L<&4s5w=T%yBg1U8JR*yym-?mU
z>2fuxK`HR$C~cB@7L#0I$Fn(`x3G>yw;7h0>9kn<U9iZ4rt9;vL8a9}vyhuj^-R%f
z4YF_sUnx8YBlVQouGDAJ(zG?l9oAT6rlBgLvuIpTvP!HNrRbo{Hmbj=9+S1KW{8~t
zCo{y#kE<(X^t#HnTaaqTwOc4EzSo54p*72y=UUj1qY5pgocfEpu2QKKf7r>xs(#lY
z1qq$<7As;xcy16_4Vw?LPC#wc(=(*5mA8q9^BhVL5_qQNGq=r}t`qQPCxm*KmJ0o}
z?PyT&r_WJr*2B3K3#H~+i?^BH!e!fHZyZ)?liwN>wAQg%$5k*KVkJ?#LKy+A0%W@K
zR<UV!@a6B*)>_`n7lUGAcjb1#^^B)#)R8EpF44K5TPO_6ZxPzU6OD8|*;g)l<%39`
z>*=p3jDiK-2chXSd9u=BW@GphUr=Tca38a$Pw65R9~LuA%0ud{)hx#fYc;9WgwVrm
zXebg$wxLFaI^B@N9nQOQdC^rIEvwz;co~R3>CYicRIR5?T)xkJOi!`?qIUxpx~ys1
z&5lj4d9*I0)qI?d(ZWqmL(EVyUA1INFg>JIp=E<wxK`<dxn4+X%4V3hCPd?Ya>nfB
zoK@Y%o@)IDolsq#<+Ba$2Q!uz)WuqD@zAvFKs8A%V7Z#1iCNn5o3!WHF3nJbZbb+6
zKvyJ0qjlERi^K*(8k?W7u%XFL8mg)})9&yf>RvP};#*8?aS2Un5oonkS=)9^no1Z2
zDkUEr8rq5-!*}Pb7Pb$QwzE3zXZ4{WrJkMqQyq?ZEH;P4A`~^~VMvQ5ZcgY!e7;g?
z9=Y@khflM!X=4pAt@I*B0XgopFBo$6(hi63A$o@XoWWeBTbH}74!$m0ZnHCsH(6}H
zuEVBX9z-RK0ah9+VWXm*w#qtTyR?C>7y}npI)PtlSWbmIbIe;rs=Q;6{r@r8wVPn~
zlKrpZ^AE#*zZH;uUjgTXBe30n3l4$>Fb_To#=u$N1K@So??=Fk;A!w6_z}1nxL^z{
z2hYQve-L~f>;pDXcK%;r&)*Ba0w%z@U>SH0_Wb=|AD97Gfe(WVz?tA>*!GWsn}H3A
z;1uvj^zD1#2G9bcuVgppN3MZf1Gxrr4dfcgHSp#%KyNZVhtI=~@36o#om%K~P7bTU
z=qV;IXm>+aiK%Q)h$<?~Uz=P}+6zHe(j>JxHEAtfITugXgrM*n8miXrGfW!OSuAF|
zJE^cGUXZONU1CC_F2jnRO{*yqoE#_y-Ao>_oWeU2mOBpK`HDmF^U<SeeF-NOc_B$d
zYCP1-ZB%)DSqu}oCA80@*S72)TSG|*9g*cru7J59-2xS9z|*}qE=*T?>!z#HGg~7!
zE;&WxZjuMQ2KyW~h{IdyUebecH)0K@pD?J9Iv{NNZm+;pbTE7Rh(}{zr=coMo^5ls
z(K2Cadevy^kP$?KknA4~j=GMLQ-f*eQCdQp5<_jvhV7<pZE7+tjZm5jpXilfDB+S5
zB61)rA9b{XB2RL{NKXo!8W62;T59P5N#7&U#T5*v)@b(llbq?gl;m9toEkOMz2sS`
z9)35P@HCO2_vy55dj#y(EVnU75}pZF*D#_menLIea!+NG8M0~(XtDb>0Z&+>iMV>`
zF|jt2-i<BN>p(?WQ5eD)Xo`VoT@K*T?j~)WP8dpi-6kqsza)xKk?Vqfa?O35c)|xM
z%vdU@Twf%D^vz(1pe{KG!-?%-Vwd0Paeh%H*BkV2@(}YiaswYD<bZfo#_rlL)}Tiy
z9f+i;R_yldb<&b(@2HTOF@S`y%7MIuoCqtzKWW3u(?1XfKY?)ek{7ZN2>bsT*#AF=
zT}<}B%I|*z_Wn`u9JmF19Bcz;gGXWKFM?~q2f;buRPY+?`agpk!DjF(Z2G&v?cgSG
z0PF!4*aB99*I?H_3J!uR!6vW~jDzLiN!ay=z~{h5uo@WP0oe7Q1x+vjUWNS6fJXuK
z<uLkm2pj+uzyAcNfHx4Qe-L~hd>PDv9pGYcE;x!<{UPuTupd}}V)zule+t|KD2D$@
zuoj#Ho?HeW2ly)30!{|cB3^$NH~{Kk8@K?xAY=F6#`%jfhM%1E>TkVz)fIK_iRkvu
zN>x-l^bK_>dCp_=S$PF{Y`!p+IbGt-F?F}k4wWjC$L4$AGP4(W|Fzuz&hdXy=CS!G
zxk$VnAFnbpzn7ESsNQ7}7EaB3Dy6UVu9Q4BAL}tSy<s54H;(zoo3$pW!tBLIcMvt2
zkaomEl`SBh#h}Ys$3Dx8l3#QP4lliw!IlUQby;?*o*^0DHPi(<i)WXr3}yf|ntBL(
zD<Z|L;Uaq=(XI#?>AINNL<WHfz9Y=0ucxZ&mL1I{YD^V{wBpECWIE1F!Q5hGarOk8
z)6aJ)0PX+phnWqj32{oz;^YmuH=-CKYy0l3@&zP9ygF`~ehi;J4N^~gKA*GPPLO!q
z=y^(Apr=81E=m6fuEPW1x>SkO9!HrneNtb<2&Qs(RKkm_u3DwgBri2=x4E~fqL643
z2a-MLFvJIT>4HhNjQk`|CdHW}14hTxk)$d!WGGlm*AeUvEsDMS%;$)}Q-~C<07Q93
zu1%Z_vWZp=Wu?$95dcOpmC+(RduFV9s#oXM;44JkQSwqQ4!1c4WupukS)(ww=xK^b
zvwA&#kE++jD#evJv8W!4b{V7!gb3Z4GE@~=<Rn6r1><@2H~bv|DS%Q|<b074`P6_U
z(gd_CK=ff_82VPD6LJ}-W;p89n(JHB79|k~wIcX@@k|(cLA6pb{RL~T?E2Fcvk_EA
z$4AG?BWuRijeew1(=)mgL_v1G;Mu~52PI3T^~<_on-<BhV0P>%siKYkicv)W5VLnM
zx)4X^7eL!_bvutM3q#D9WMYWYmsp;%vXs_j@(x@}q0fp^w26X}t1ze?EG)`sLQ(;u
z_TdbYrFxSvXo6VN4wAZB!;Di(7Cb^KAwg?yYn#=jX!Oofr&2V<7d@`#E5^qWb6(7@
zc?*Nz;K+fBR8Bj6tefQOV#t>ZIdarnhH)b7QV%vO+5hVI`;WlRr~LkTum!9H|ALMG
zSMWTz2iy*>15;odSO-o6l-Iu(Yz3Er4WI~40+iQ(82l0hU>s}!tHA}}aoGG{0PBDO
zo`9`?Czt~*a1B@sUWd*9C-4xs75Lx^FacJAm*4}q6C4B~Xn-mxf|CLHJZ=TogK4lE
z41yQo19%vG2Yd-!3N8f8z^~v3xDD(B7lEU&{eK5;20qvh)`8{V3Fz}aZ~zG1qC5HV
z|EK|$aim{sG76+F^)H?KdSXSrWoMQ2oA%gRRCDg@$$dT1Pn=+Dh<DfgcNl8L>6g;u
zc5B2&27WV^x!$Pm?A@-v<<$Sq+wb_weLbFh(N*gq>m}GwQv`3L=`4GCt);+{c6TOY
zmFltH5sgiv{C5~INcGCge}@s<Qpflm2F1APNSa~7@W%g4&P-+4vZ~M9)p}}D#};?w
zfUB~q$jz?S%Q`!|9#45`GRL2<5*bekgPGDYtm&Bs661wY?wDRM<A$l+c(SSW&jetr
zA@Lnnocg_hcrNkt+xRC8Syn17Dwv7Nc9aI^9QWt(#fD&Sz1Jlpmw1_N4Pj&>tz;I7
zHQIzuFX=9eSNwP-?!9&(OvKl+atSu8R<K{-|7|CKP3)IZX85tx){}3L8pY%PsSPQN
zj_mItZP6Q|RM$U+RHIUn1)X8iSJ~Q^iCn2-a+XTJZ^Fz+S>ZEHT$&8MDd7wXJ#u7;
TX0#VK-IcDJw3GM$Ezy4fV@XcO

literal 0
HcmV?d00001

diff --git a/majortrack/method.py b/majortrack/method.py
new file mode 100644
index 0000000..419910c
--- /dev/null
+++ b/majortrack/method.py
@@ -0,0 +1,215 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""Module documentation goes here
+   and here
+   and ...
+"""
+
+
+class MajorTrack(object):
+    r"""
+
+    Parameters
+    ===========
+
+    clusterings: list, dict
+      Sequence of clusterings.
+      **If provided as a `dict`**:
+        keys: float, datetime
+          The time points.
+        values: list, dict
+          The membership list of each clustering indicating to which cluster
+          a data source belongs.
+          See :obj:`~MajorTrack.memberships` for details.
+    \**kwargs optional parameter:
+      timepoints: list
+        The time points of each clustering.
+
+        Note
+        -----
+        If `clusterings` if of type `dict` then the keys will be used as time
+          points and this optional parameter is ignored, even if provided.
+
+      group_matchup_method: str (default='fraction')
+        Set the method to calculate the similarity between two clusters from
+        different clusterings. By default the fraction of identical members is
+        used as explained in
+        `the original article <https://arxiv.org/abs/1912.04261>'_.
+
+    Attributes
+    ==========
+    group_matchup: list
+      Holds for each time point the tracing and mapping sets of all clusters.
+      Each element is a `dict` with the keys ``'forward'`` and ``'backward'``.
+      Both hold a `dict` indicating for a cluster the best matching cluster
+      along with the similarity score of the particular relation in a `tuple`.
+      
+      Example
+      -------
+      :: code-block: python
+
+        self.group_matchup[1] = {
+            'backward': {0: (0, 1.0), ...},
+                         ^   ^  ^
+                         |   |  similarity score
+                         |   cluster from previous time point
+                         cluster from current time point.
+            }
+
+    """
+    def __init__(self, clusterings, **kwargs):
+        assert isinstance(clusterings, (list, dict))
+        if isinstance(clusterings, list):
+            self.timepoints = kwargs.pop(
+                    'timepoints', list(range(len(clusterings)))
+                    )
+            self.clusterings = clusterings
+            # sort both clusterings and timepoints according to timepoints
+            self.timepoints, self.clusterings = zip(
+                    *sorted(
+                        zip(self.timepoints, self.clusterings),
+                        key=lambda x: x[0]
+                        )
+                    )
+        else:
+            self.timepoints = sorted(clusterings.keys())
+            self.clusterings = list(clusterings[tp] for tp in self.timepoints)
+
+        self.group_matchup_method = kwargs.get(
+                    'group_matchup_method',
+                    'fraction'
+                    )
+
+    def get_group_matchup(self, matchup_method=None):
+        r"""
+        Determine majority relation between neighbouring snapshots.
+
+        Parameters
+        ===========
+        matchup_method: str (default=None)
+          If provided this overwrites `self.group_matchup_method. It determines
+          the method to use when calculating similarities between clusters from
+          neighbouring snapshots.
+
+        Returns
+        =======
+        self: :class:`.MajorTrack`
+          with new attribute :ref:`group_matchup`.
+
+        ########
+        Between each pair of consecutive time points all groups are compared
+        and matched (if possible) using `matchup_method`.
+
+        Set:
+        ----
+        - self.group_matchup: List holding for each time point a dictionary
+            with 'backward'/'forward' matchups. A matchup is a dict indicating
+            for each group (id) the best match. The best match is given by a
+            tuple with group id and similarity score.
+            E.g.: self.group_matchup[1] = {
+                'backward': {0: (0, 1.0), ...},
+                'forward': {0: (1, 0.7), ...}
+                }
+        """
+        if matchup_method is None:
+            matchup_method = self.group_matchup_method
+        # if self.group_matchup:
+        self.group_matchup = []
+        # if self.group_similarities:
+        self.group_similarities = []
+        self.group_matchup.append(
+                {
+                    'backward': {
+                        _group_id: (None, None)
+                        for _group_id in range(len(self.groupings[0]))
+                        }
+                    }
+                )
+        self.group_similarities.append(
+                {
+                    'backward': {
+                        _group_id: None
+                        for _group_id in range(len(self.groupings[0]))
+                        }
+                    }
+                )
+        for _idx in range(self.length - 1):
+            _group_similarities = self._get_group_similarities(
+                    _idx, _idx + 1,
+                    method=matchup_method
+                    )
+            # set forward matchup/similarities for current step
+            self.group_matchup[-1][
+                    'forward'
+                    ] = _group_similarities['forward']['matchup']
+            self.group_similarities[-1][
+                    'forward'
+                    ] = _group_similarities['forward']['similarities']
+            # create backward matchup/similarities for next step
+            self.group_matchup.append(
+                    {'backward': _group_similarities[
+                        'backward'
+                        ]['matchup']}
+                    )
+            self.group_similarities.append(
+                    {'backward': _group_similarities[
+                        'backward'
+                        ]['similarities']}
+                    )
+        # complete forward matchup/similarites with None's
+        self.group_matchup[-1]['forward'] = {
+                _group_id: (None, None)
+                for _group_id in range(len(self.groupings[-1]))
+                }
+        self.group_similarities[-1]['forward'] = {
+                _group_id: None
+                for _group_id in range(len(self.groupings[-1]))
+                }
+
+    def get_span(self, idx, span_set, get_indivs=True):
+        r"""
+        Create the tracer tree.
+
+        Parameters
+        ===========
+
+        ####
+        Get the span (time forward)
+        """
+        span_tree = {}
+        if isinstance(span_set, int):
+            span_tree[idx] = [self.groupings[idx][span_set]]
+        elif isinstance(span_set, str):
+            span_tree[idx] = filter(
+                    lambda g: span_set in g,
+                    self.groupings[idx]
+                    )
+        else:
+            span_tree[idx] = [span_set]
+        current_set = set.union(*span_tree[idx])
+        for _idx in range(idx + 1, self.length):
+            next_groupings = self.groupings[_idx]
+            next_contained = [*filter(
+                    lambda grp: any([memb in current_set for memb in grp]),
+                    next_groupings
+                    )]
+            if next_contained:
+                span_tree[_idx] = next_contained
+                current_set = set.union(*next_contained)
+            else:
+                break
+        if get_indivs:
+            return span_tree
+        else:
+            span_tree_idxs = {}
+            for _idx in span_tree:
+                _span_set = set.union(*span_tree[_idx])
+                span_tree_idxs[_idx] = [
+                        i for i in range(len(self.groupings[_idx]))
+                        if any([
+                            el in _span_set
+                            for el in self.groupings[_idx][i]
+                            ])
+                        ]
+            return span_tree_idxs
diff --git a/majortrack/tracker.py b/majortrack/tracker.py
index f0655f7..60ed099 100644
--- a/majortrack/tracker.py
+++ b/majortrack/tracker.py
@@ -63,7 +63,7 @@ class MajorTrack(object):
         Determine if :obj:`~LazyList`'s should be used to store data about
         dynamic clusters or normal lists.
         Most likely you want to use normal lists.
-        
+
     .. bibliography:: ../references.bib
 
     Attributes
@@ -102,7 +102,7 @@ class MajorTrack(object):
 
     group_mappings: list(list)
       Holds for each slice a list of mapping sets. The list is ordered like
-      :attr:`~.MajorTrack.grougings`.
+      :obj:`~.MajorTrack.grougings`.
 
       Example
       --------
@@ -234,6 +234,7 @@ def __init__(self, clusterings, history, **kwargs):
         else:
             self.timepoints = sorted(clusterings.keys())
             self.clusterings = list(clusterings[tp] for tp in self.timepoints)
+        assert isinstance(self.clusterings[0], set)
         self.length = len(self.timepoints)
         # now determine the slice widths
         self.slice_widths = kwargs.get('slice_widths', None)
@@ -382,11 +383,11 @@ def combined_population(
 
         If further arguments are provided (all have to be unnamed), then the
         union is taken between all of these time points.
-            
+
         Example
         -------
         .. code-block:: python
-              
+
           self.resident_population(2,4,5)
 
         This will return the combined population of the time points 2, 4
@@ -846,14 +847,15 @@ def get_group_matchup(self, matchup_method=None):
         Parameters
         ===========
         matchup_method: str (default=None)
-          If provided this overwrites :attr:`~.MajorTrack.group_matchup_method`.
+          If provided this overwrites
+          :attr:`~majortrack.MajorTrack.group_matchup_method`.
           It determines the method to use when calculating similarities between
           clusters from neighbouring snapshots.
 
         Returns
         =======
         self: :class:`.MajorTrack`
-          with new attribute :attr:`~.MajorTrack.group_matchup`.
+          with new attribute :obj:`~.MajorTrack.group_matchup`.
 
         """
         if matchup_method is None:
@@ -1956,7 +1958,7 @@ def get_community_shrinkages(self,):
         None: None
           Adds new attributes:
 
-          - attr:`~.MajorTrack.community_shrinkages`
+          - :attr:`~.MajorTrack.community_shrinkages`
         """
         # birth events are not growth events
         self.community_shrinkages = [[]]
@@ -2073,7 +2075,7 @@ def get_auto_corrs(self, residents=True):
         None: None
           Adds new attributes:
 
-          - attr:`~.MajorTrack.community_autocorrs`
+          - :attr:`~.MajorTrack.community_autocorrs`
         """
         self.community_autocorrs = {}
         for idx in range(1, self.length):