From a5317f42a45815d89821c9ecc1dd1279215738be Mon Sep 17 00:00:00 2001 From: Jonas I Liechti Date: Thu, 20 Feb 2020 20:16:54 +0100 Subject: [PATCH] assert each cluster is a set --- majortrack/.method.py.swp | Bin 0 -> 24576 bytes majortrack/method.py | 215 ++++++++++++++++++++++++++++++++++++++ majortrack/tracker.py | 18 ++-- 3 files changed, 225 insertions(+), 8 deletions(-) create mode 100644 majortrack/.method.py.swp create mode 100644 majortrack/method.py diff --git a/majortrack/.method.py.swp b/majortrack/.method.py.swp new file mode 100644 index 0000000000000000000000000000000000000000..6b039d365d2fff30b1c383c30c299680a4952fc0 GIT binary patch literal 24576 zcmeI4d#oH)9mhw?%OZj_1Ve!wdSiC)?cLpbFMZvWt7$2g)>4qR1arCU&hDPQciOo# z4i5)FwO2}DC;3H*T~L<0tjF{F_MqJrPwnRDjM?9Sd6 z0@27!^6A{2IdjhM{LW+N_t-W@uHJe%TVGyd;5gGTR;+$y>i(fM##Lt<#va!(1G;{U zW1CxY`26l2Td!HVU}cuKEVCl2t*jYok8W5$W|qvg>v(C++O_S{`1)3(v}U9+KC*sn zytQHdI$6~7-95Y+29;TJkL!oN+1y*Xoc`P;{!Cn*<>8FmDtn7d3zFZ>HSn&~!0Eyfp@D0LhnT5GwA#YQUNRF z`8{3d6Xj=NJmBy1yY8PPKkt&~(XRW;yfp6Io&Y61}Ofo?EfQY7{=q^Ft`WY0d4~~fg8a$!2)oB z18U$3umg;N72xIfqYm&h@Lg~PD1(2Xt;fMHWV;^D17I2W5ctFUa39<8oEOmGyAkwah)xCERIj=&l78*n%HJlGBf!Rv6!yZ{b^hn0hd<^+3d$6((FrzQ)5 zHEY?XZ-o{Q3RBnjEm~+G9b?|~TZPOb`}O@Qt4;fE$Ezi)o=iOzaJyaZEmSr=k2|em zsw|VolD(KU!{$uK`)_nWHk~Q^0Bic(3^`NEW3qH5sT_XGh*~X& z`KB|?i{d$(JJaC|E3whRYN{>LXm(%vujT%CMC;MQv|20{0j)=3h+CF39ZZf)4Q4k> z0?8g)tt#8$I^3XZ-A8i6*l%>-)mQq=&dl{jwPZ`xzsvpay#2~3k~W5gPO`;(3rdl5 zwMQR)^oEHzITfpWfLZNmV)pLFdY&b!lC3RWi!7uopm9mBDYYJ=X4Cg#=^$JB4zmKG zJYDmuRjcCS)CwKFBN}fvRknYm%8H}IY^1z?aF~_L<&4s5w=T%yBg1U8JR*yym-?mU z>2fuxK`HR$C~cB@7L#0I$Fn(`x3G>yw;7h0>9knxs(#lY z1qq$<7As;xcy16_4Vw?LPC#wc(=(*5mA8q9^BhVL5_qQNGq=r}t`qQPCxm*KmJ0o} z?PyT&r_WJr*2B3K3#H~+i?^BH!e!fHZyZ)?liwN>wAQg%$5k*KVkJ?#LKy+A0%W@K zR_`n7lUGAcjb1#^^B)#)R8EpF44K5TPO_6ZxPzU6OD8|*;g)l<%39` z>*=p3jDiK-2chXSd9u=BW@GphUr=Tca38a$Pw65R9~LuA%0ud{)hx#fYc;9WgwVrm zXebg$wxLFaI^B@N9nQOQdC^rIEvwz;co~R3>CYicRIR5?T)xkJOi!`?qIUxpx~ys1 z&5lj4d9*I0)qI?d(ZWqmL(EVyUA1INFg>JIp=EnfB zoK@Y%o@)IDolsq#<+Ba$2Q!uz)WuqD@zAvFKs8A%V7Z#1iCNn5o3!WHF3nJbZbb+6 zKvyJ0qjlERi^K*(8k?W7u%XFL8mg)})9&yf>RvP};#*8?aS2Un5oonkS=)9^no1Z2 zDkUEr8rq5-!*}Pb7Pb$QwzE3zXZ4{WrJkMqQyq?ZEH;P4A`~^~VMvQ5ZcgY!e7;g? z9=Y@khflM!X=4pAt@I*B0XgopFBo$6(hi63A$o@XoWWeBTbH}74!$m0ZnHCsH(6}H zuEVBX9z-RK0ah9+VWXm*w#qtTyR?C>7y}npI)PtlSWbmIbIe;rs=Q;6{r@r8wVPn~ zlKrpZ^AE#*zZH;uUjgTXBe30n3l4$>Fb_To#=u$N1K@So??=Fk;A!w6_z}1nxL^z{ z2hYQve-L~f>;pDXcK%;r&)*Ba0w%z@U>SH0_Wb=|AD97Gfe(WVz?tA>*!GWsn}H3A z;1uvj^zD1#2G9bcuVgppN3MZf1Gxrr4dfcgHSp#%KyNZVhtI=~@36o#om%K~P7bTU z=qV;IXm>+aiK%Q)h$JxHEAtfITugXgrM*n8miXrGfW!OSuAF| zJE^cGUXZONU1CC_F2jnRO{*yqoE#_y-Ao>_oWeU2mOBpK`HDmF^U!z#HGg~7! zE;&WxZjuMQ2KyW~h{IdyUebecH)0K@pD?J9Iv{NNZm+;pbTE7Rh(}{zr=coMo^5ls z(K2Cadevy^kP$?KknA4~j=GMLQ-f*eQCdQp5<_jvhV70Z&+>iMV>` zF|jt2-ip(?WQ5eD)Xo`VoT@K*T?j~)WP8dpi-6kqsza)xKk?Vqfa?O35c)|xM z%vdU@Twf%D^vz(1pe{KG!-?%-Vwd0Paeh%H*BkV2@(}YiaswYDbsT*#AF= zT}<}B%I|*z_Wn`u9JmF19Bcz;gGXWKFM?~q2f;buRPY+?`agpk!DjF(Z2G&v?cgSG z0PF!4*aB99*I?H_3J!uR!6vW~jDzLiN!ay=z~{h5uo@WP0oe7Q1x+vjUWNS6fJXuK zPDv9pGYcE;x!<{UPuTupd}}V)zule+t|KD2D$@ zuoj#Ho?HeW2ly)30!{|cB3^$NH~{Kk8@K?xAY=F6#`%jfhM%1E>TkVz)fIK_iRkvu zN>x-l^bK_>dCp_=S$PF{Y`!p+IbGt-F?F}k4wWjC$L4$AGP4(W|Fzuz&hdXy=CS!G zxk$VnAFnbpzn7ESsNQ7}7EaB3Dy6UVu9Q4BAL}tSyAINNLSkO9!HrneNtb<2&Qs(RKkm_u3DwgBri2=x4E~fqL643 z2a-MLFvJIT>4HhNjQk`|CdHW}14hTxk)$d!WGGlm*AeUvEsDMS%;$)}Q-~C<07Q93 zu1%Z_vWZp=Wu?$95dcOpmC+(RduFV9s#oXM;44JkQSwqQ4!1c4WupukS)(ww=xK^b zvwA&#kE++jD#evJv8W!4b{V7!gb3Z4GE@~=<@2H~bv|DS%Q|%siKYkicv)W5VLnM zx)4X^7eL!_bvutM3q#D9WMYWYmsp;%vXs_j@(x@}q0fp^w26X}t1ze?EG)`sLQ(;u z_TdbYrFxSvXo6VN4wAZB!;Di(7Cb^KAwg?yYn#=jX!Oofr&2V<7d@`#E5^qWb6(7@ zc?*Nz;K+fBR8Bj6tefQOV#t>ZIdarnhH)b7QV%vO+5hVI`;WlRr~LkTum!9H|ALMG zSMWTz2iy*>15;odSO-o6l-Iu(Yz3Er4WI~40+iQ(82l0hU>s}!tHA}}aoGG{0PBDO zo`9`?Czt~*a1B@sUWd*9C-4xs75Lx^FacJAm*4}q6C4B~Xn-mxf|CLHJZ=TogK4lE z41yQo19%vG2Yd-!3N8f8z^~v3xDD(B7lEU&{eK5;20qvh)`8{V3Fz}aZ~zG1qC5HV z|EK|$aim{sG76+F^)H?KdSXSrWoMQ2oA%gRRCDg@$$dT1Pn=+Dh6g;u zc5B2&27WV^x!$Pm?A@-v<<$Sq+wb_weLbFh(N*gq>m}GwQv`3L=`4GCt);+{c6TOY zmFltH5sgiv{C5~INcGCge}@stz;I7 zHQIzuFX=9eSNwP-?!9&(OvKl+atSu8RZEHT$&8MDd7wXJ#u7; TX0#VK-IcDJw3GM$Ezy4fV@XcO literal 0 HcmV?d00001 diff --git a/majortrack/method.py b/majortrack/method.py new file mode 100644 index 0000000..419910c --- /dev/null +++ b/majortrack/method.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""Module documentation goes here + and here + and ... +""" + + +class MajorTrack(object): + r""" + + Parameters + =========== + + clusterings: list, dict + Sequence of clusterings. + **If provided as a `dict`**: + keys: float, datetime + The time points. + values: list, dict + The membership list of each clustering indicating to which cluster + a data source belongs. + See :obj:`~MajorTrack.memberships` for details. + \**kwargs optional parameter: + timepoints: list + The time points of each clustering. + + Note + ----- + If `clusterings` if of type `dict` then the keys will be used as time + points and this optional parameter is ignored, even if provided. + + group_matchup_method: str (default='fraction') + Set the method to calculate the similarity between two clusters from + different clusterings. By default the fraction of identical members is + used as explained in + `the original article '_. + + Attributes + ========== + group_matchup: list + Holds for each time point the tracing and mapping sets of all clusters. + Each element is a `dict` with the keys ``'forward'`` and ``'backward'``. + Both hold a `dict` indicating for a cluster the best matching cluster + along with the similarity score of the particular relation in a `tuple`. + + Example + ------- + :: code-block: python + + self.group_matchup[1] = { + 'backward': {0: (0, 1.0), ...}, + ^ ^ ^ + | | similarity score + | cluster from previous time point + cluster from current time point. + } + + """ + def __init__(self, clusterings, **kwargs): + assert isinstance(clusterings, (list, dict)) + if isinstance(clusterings, list): + self.timepoints = kwargs.pop( + 'timepoints', list(range(len(clusterings))) + ) + self.clusterings = clusterings + # sort both clusterings and timepoints according to timepoints + self.timepoints, self.clusterings = zip( + *sorted( + zip(self.timepoints, self.clusterings), + key=lambda x: x[0] + ) + ) + else: + self.timepoints = sorted(clusterings.keys()) + self.clusterings = list(clusterings[tp] for tp in self.timepoints) + + self.group_matchup_method = kwargs.get( + 'group_matchup_method', + 'fraction' + ) + + def get_group_matchup(self, matchup_method=None): + r""" + Determine majority relation between neighbouring snapshots. + + Parameters + =========== + matchup_method: str (default=None) + If provided this overwrites `self.group_matchup_method. It determines + the method to use when calculating similarities between clusters from + neighbouring snapshots. + + Returns + ======= + self: :class:`.MajorTrack` + with new attribute :ref:`group_matchup`. + + ######## + Between each pair of consecutive time points all groups are compared + and matched (if possible) using `matchup_method`. + + Set: + ---- + - self.group_matchup: List holding for each time point a dictionary + with 'backward'/'forward' matchups. A matchup is a dict indicating + for each group (id) the best match. The best match is given by a + tuple with group id and similarity score. + E.g.: self.group_matchup[1] = { + 'backward': {0: (0, 1.0), ...}, + 'forward': {0: (1, 0.7), ...} + } + """ + if matchup_method is None: + matchup_method = self.group_matchup_method + # if self.group_matchup: + self.group_matchup = [] + # if self.group_similarities: + self.group_similarities = [] + self.group_matchup.append( + { + 'backward': { + _group_id: (None, None) + for _group_id in range(len(self.groupings[0])) + } + } + ) + self.group_similarities.append( + { + 'backward': { + _group_id: None + for _group_id in range(len(self.groupings[0])) + } + } + ) + for _idx in range(self.length - 1): + _group_similarities = self._get_group_similarities( + _idx, _idx + 1, + method=matchup_method + ) + # set forward matchup/similarities for current step + self.group_matchup[-1][ + 'forward' + ] = _group_similarities['forward']['matchup'] + self.group_similarities[-1][ + 'forward' + ] = _group_similarities['forward']['similarities'] + # create backward matchup/similarities for next step + self.group_matchup.append( + {'backward': _group_similarities[ + 'backward' + ]['matchup']} + ) + self.group_similarities.append( + {'backward': _group_similarities[ + 'backward' + ]['similarities']} + ) + # complete forward matchup/similarites with None's + self.group_matchup[-1]['forward'] = { + _group_id: (None, None) + for _group_id in range(len(self.groupings[-1])) + } + self.group_similarities[-1]['forward'] = { + _group_id: None + for _group_id in range(len(self.groupings[-1])) + } + + def get_span(self, idx, span_set, get_indivs=True): + r""" + Create the tracer tree. + + Parameters + =========== + + #### + Get the span (time forward) + """ + span_tree = {} + if isinstance(span_set, int): + span_tree[idx] = [self.groupings[idx][span_set]] + elif isinstance(span_set, str): + span_tree[idx] = filter( + lambda g: span_set in g, + self.groupings[idx] + ) + else: + span_tree[idx] = [span_set] + current_set = set.union(*span_tree[idx]) + for _idx in range(idx + 1, self.length): + next_groupings = self.groupings[_idx] + next_contained = [*filter( + lambda grp: any([memb in current_set for memb in grp]), + next_groupings + )] + if next_contained: + span_tree[_idx] = next_contained + current_set = set.union(*next_contained) + else: + break + if get_indivs: + return span_tree + else: + span_tree_idxs = {} + for _idx in span_tree: + _span_set = set.union(*span_tree[_idx]) + span_tree_idxs[_idx] = [ + i for i in range(len(self.groupings[_idx])) + if any([ + el in _span_set + for el in self.groupings[_idx][i] + ]) + ] + return span_tree_idxs diff --git a/majortrack/tracker.py b/majortrack/tracker.py index f0655f7..60ed099 100644 --- a/majortrack/tracker.py +++ b/majortrack/tracker.py @@ -63,7 +63,7 @@ class MajorTrack(object): Determine if :obj:`~LazyList`'s should be used to store data about dynamic clusters or normal lists. Most likely you want to use normal lists. - + .. bibliography:: ../references.bib Attributes @@ -102,7 +102,7 @@ class MajorTrack(object): group_mappings: list(list) Holds for each slice a list of mapping sets. The list is ordered like - :attr:`~.MajorTrack.grougings`. + :obj:`~.MajorTrack.grougings`. Example -------- @@ -234,6 +234,7 @@ def __init__(self, clusterings, history, **kwargs): else: self.timepoints = sorted(clusterings.keys()) self.clusterings = list(clusterings[tp] for tp in self.timepoints) + assert isinstance(self.clusterings[0], set) self.length = len(self.timepoints) # now determine the slice widths self.slice_widths = kwargs.get('slice_widths', None) @@ -382,11 +383,11 @@ def combined_population( If further arguments are provided (all have to be unnamed), then the union is taken between all of these time points. - + Example ------- .. code-block:: python - + self.resident_population(2,4,5) This will return the combined population of the time points 2, 4 @@ -846,14 +847,15 @@ def get_group_matchup(self, matchup_method=None): Parameters =========== matchup_method: str (default=None) - If provided this overwrites :attr:`~.MajorTrack.group_matchup_method`. + If provided this overwrites + :attr:`~majortrack.MajorTrack.group_matchup_method`. It determines the method to use when calculating similarities between clusters from neighbouring snapshots. Returns ======= self: :class:`.MajorTrack` - with new attribute :attr:`~.MajorTrack.group_matchup`. + with new attribute :obj:`~.MajorTrack.group_matchup`. """ if matchup_method is None: @@ -1956,7 +1958,7 @@ def get_community_shrinkages(self,): None: None Adds new attributes: - - attr:`~.MajorTrack.community_shrinkages` + - :attr:`~.MajorTrack.community_shrinkages` """ # birth events are not growth events self.community_shrinkages = [[]] @@ -2073,7 +2075,7 @@ def get_auto_corrs(self, residents=True): None: None Adds new attributes: - - attr:`~.MajorTrack.community_autocorrs` + - :attr:`~.MajorTrack.community_autocorrs` """ self.community_autocorrs = {} for idx in range(1, self.length):