In [1]:
import sys,os
sys.path.append(os.path.expanduser('~/caryocar'))

In [2]:
import scipy
import networkx
import itertools
from collections import Counter

# Collectors coworking Networks

In [3]:
class CoworkingNetwork(networkx.Graph):
    """
    Class for coworking networks. Extends networkx Graph class.
    
    Parameters
    ----------
    cliques : iterable
        An iterable of iterables containing names used to compose cliques 
        in the network.
        
    namesMap (optional) : caryocar.NamesMap.
        A caryocar NamesMap object for normalizing nodes names.
    
    Examples
    --------
    >>> collectors = [ ['a','b','c'], ['d','e'], ['a','c'] ]
    >>> cwn = CoworkingNetwork(cliques=collectors)
    
    >>> cwn.nodes(data=True)
    { 'a': {'count': 2}, 
      'b': {'count': 1}, 
      'c': {'count': 2}, 
      'd': {'count': 1}, 
      'e': {'count': 1} }    
      
    >>> cwn.edges(data=True)
    [ ('a', 'b', {'count': 1}), 
      ('a', 'c', {'count': 2}), 
      ('b', 'c', {'count': 1}), 
      ('d', 'e', {'count': 1}) ]
    """
    def __init__(self, data=None, cliques=None, namesMap=None, **attr):
       
        if cliques is not None:
            if namesMap:
                nmap = namesMap.getMap()
                cliques = [ [ nmap[n] for n in nset ] for nset in cliques ]
            
            # prevent self-loops
            cliques = [ list(set(nset)) for nset in cliques ]
            
            edgesLists = map( lambda n: itertools.combinations(n,r=2), cliques )
            data = [ edge for edgesList in edgesLists for edge in edgesList ]
            
        super().__init__(data=data,**attr)
    
        # insert nodes and set count attribute
        nodes_counts = Counter( col for clique in cliques for col in clique )
        nodes = nodes_counts.keys()
        
        self.add_nodes_from(nodes)
        networkx.set_node_attributes(self,values=nodes_counts,name='count')
        
        # set edges count attribute
        edges = data
        edges_counts = Counter(edges)
        networkx.set_edge_attributes(self,values=edges_counts,name='count')

In [4]:
collectors = [
    # col1, col2, col3 and col4 are connected
    ['col1','col2','col3','col4'],
    ['col1','col2','col3'],
    ['col1','col2','col3'],
    ['col1','col3','col2'],
    ['col1','col2'],
    ['col1','col2'],
    ['col1','col2'],
    ['col1','col3'],
    ['col2','col3'],
    ['col2','col4'],
    ['col2','col4'],
    ['col4'],
    # col5 is isolated
    ['col5'],
    ['col5'],
    # col7 and col8 are connected
    ['col7','col8'],
    ['col7','col8'],
    # col9 would lead to self loop
    ['col9','col9'],
    ['col9','col9']
]

In [5]:
cwn=CoworkingNetwork(cliques=collectors)

#### Tests

In [6]:
# nodes with no connections are also included in the network
assert( 'col5' in cwn.nodes() )

In [7]:
# all edges have a 'count' attribute
assert( all( data.get('count') is not None for u,v,data in cwn.edges(data=True) ) )

# all nodes have a 'count' attribute
assert( all( data.get('count') is not None for u,data in cwn.nodes(data=True)) )

In [8]:
# col1 holds 7 ties with col2
assert(cwn['col1']['col2'].get('count')==7)

# col5 appears 2 times but holds no ties with anyone
assert( cwn.nodes(data=True)['col5'].get('count')==2 )
assert( sum(v.get('count') for k,v in cwn['col5'].items() )==0 )

# col4 appears 4 times but holds 5 ties
assert( cwn.nodes(data=True)['col4'].get('count')==4 )
assert( sum( v.get('count') for k,v in cwn['col4'].items() )==5 )

In [9]:
# connected components subgraphs can be derived
[ list(sg.nodes()) for sg in list(networkx.connected_component_subgraphs(cwn)) ]

[['col3', 'col4', 'col2', 'col1'], ['col7', 'col8'], ['col5'], ['col9']]

In [10]:
# self-loops are not allowed: 
## make sure col9 is not linked to itself
assert( cwn['col9'].get('col9') is None )

# col9 appears twice
assert( cwn.nodes(data=True)['col9'].get('count')==2 )

### Using names maps

In [11]:
from caryocar.cleaning import NamesMap

remapping = {
    'col7':'col8',
    'col8':'COL8',
    'col3':'COL_3'
}

nm = NamesMap( names=[ n for clique in collectors for n in clique  ], 
               normalizationFunc=lambda x: x, 
               remappingIndex=remapping )

In [12]:
cwn = CoworkingNetwork(cliques=collectors,namesMap=nm)

#### Tests

In [13]:
cwn.edges(data=True)

EdgeDataView([('col4', 'COL_3', {'count': 1}), ('col4', 'col2', {'count': 3}), ('col4', 'col1', {'count': 1}), ('COL_3', 'col2', {'count': 5}), ('COL_3', 'col1', {'count': 5}), ('col2', 'col1', {'count': 7})])

In [14]:
# names which were remapped to another one are excluded (col7 remaps to col8)
assert( cwn.nodes().get('col7') is None )

# col8 (which was also remapped from col7) occurrs 2 times
assert( cwn.nodes(data=True)['COL8'].get('count')==2 )

In [15]:
# ties between two variants of an entity get suppressed (col7-col8, if col7 remaps to col8)
assert( sum(cwn['COL8'])==0 )

---

# Species-collectors Networks

In [275]:
class SpeciesCollectorsNetwork(networkx.Graph):
    """
    Class for Species-collectors networks. Extends networkx Graph class.
    
    Parameters
    ----------
    species : List or iterable
        A list containing names of species to be associated, in order, to elements in the collectors list.
        
    collectors : List or iterable
        A list containing lists of collectors names, to be associated, in order, to elements in the species list.
        
    namesMap (optional) : caryocar.NamesMap
        A caryocar NamesMap object for normalizing nodes names.
    
    Notes
    -----
    For the model to be created both the species and collectors lists must have the same length.
    The ordering of both species and collectors list is important for creating bipartite edges.
    
    Examples
    --------
    >>> cols=[ ['col1','col2','col3'],
               ['col1','col2'],
               ['col2','col3'],
               ['col4','col5'],
               ['col4'],
               ['col5','col4'] ]
      
    >>> spp=['sp1','sp2','sp3','sp2','sp3','sp2']
    
    >>> scn = SpeciesCollectorsNetwork( species=spp, collectors=cols )
    
    >>> scn.nodes(data=True)
    { 'sp1': {'bipartite': 1, 'count': 1}, 
      'col1': {'bipartite': 0, 'count': 2}, 
      'col2': {'bipartite': 0, 'count': 3}, 
      'col3': {'bipartite': 0, 'count': 2}, 
      'sp2': {'bipartite': 1, 'count': 3}, 
      'sp3': {'bipartite': 1, 'count': 2}, 
      'col4': {'bipartite': 0, 'count': 3}, 
      'col5': {'bipartite': 0, 'count': 2} }
      
    >>> scn.edges(data=True)
    [ ('sp1', 'col1', {'count': 1}), 
      ('sp1', 'col2', {'count': 1}), 
      ('sp1', 'col3', {'count': 1}), 
      ('col1', 'sp2', {'count': 1}), 
      ('col2', 'sp2', {'count': 1}), 
      ('col2', 'sp3', {'count': 1}), 
      ('col3', 'sp3', {'count': 1}), 
      ('sp2', 'col4', {'count': 2}), 
      ('sp2', 'col5', {'count': 2}), 
      ('sp3', 'col4', {'count': 1}) ]    
    """
    def __init__(self, data=None, species=None, collectors=None, namesMap=None, **attr):
        
        self._parseInputData(species,collectors)
        
        self._biadj_matrix = None
        
        set_bipartite_attr=False # a flag for setting bipartite attribute after graph creation
        if species is not None and collectors is not None:
            if namesMap:
                nmap = namesMap.getMap()
                collectors = [ [ nmap[n] for n in nset ] for nset in collectors ]
            
            # build edges
            if len(species)==len(collectors):
                species = list(species)
                collectors = list(collectors)
                
                data = [ (sp,col) for i,sp in enumerate(species) for col in collectors[i] ]
                set_bipartite_attr=True

        super().__init__(data=data,**attr)
        
        if set_bipartite_attr:
            networkx.set_node_attributes( self, values=dict( (n,1) for n in species), name='bipartite' )
            networkx.set_node_attributes( self, values=dict( (n,0) for cols in collectors for n in cols), name='bipartite' )
            
        # set nodes count attribute
        nodes_cols_counts = Counter( c for cols in collectors for c in cols )
        nodes_sp_counts = Counter( species )
        nodes_counts = nodes_cols_counts.copy()
        nodes_counts.update(nodes_sp_counts)
        networkx.set_node_attributes( self, values=nodes_counts, name='count' )

        # set edges count attribute
        edges = data
        networkx.set_edge_attributes( self, values=Counter(edges), name='count' ) 
    
    def _parseInputData( self, species, collectors ):
        # Check format
        if not all( isinstance(lst,list) for lst in collectors ) and \
               all( isinstance(c,str) for lst in collectors for c in lst ):
            raise ValueError("Collectors data input must be in the format of list of lists of strings.")
        
        if not all( isinstance(sp,str) for sp in species ):
            raise ValueError("Species data input must be in the format of list of strings.")
            
        # Check lengths
        if len(species)!=len(collectors):
            raise ValueError("Species and collectors data lists have different lengths.")
        return
    
    def _buildBiadjMatrix( self, col_sp_order=None ):
        col_sp_order=(sorted(self.listCollectorsNodes()),sorted(self.listSpeciesNodes())) if col_sp_order is None else col_sp_order
        m = networkx.bipartite.biadjacency_matrix(self,
                                                  row_order=col_sp_order[0],
                                                  column_order=col_sp_order[1],
                                                  weight='count')
        self._biadj_matrix = (*col_sp_order,m)
        
    def _getBiadjMatrix( self ):
        if self._biadj_matrix is None:
            self._buildBiadjMatrix()
        return self._biadj_matrix
        
    def listSpeciesNodes(self,data=False):
        """
        Lists nodes from the species set.
        
        Parameters
        ----------
        data : string or bool, default=False
            If False only nodes ids are returned.
            If True nodes ids are returned with their respective attribute dicts as (n, attrDict).
            If a string is passed (with an attribute name) then its value is returned in a 2-tuple (n, attrValue).
        
        Returns
        -------
        Either a list of tuples (n,attrDict) or (n,attrValue) where n is the node's id; or a list of nodes id's n.
        
        Note
        ----
        It is not guaranteed that the same order will be mainained in multiple calls of this function.
        """
        spNodes = set( n for n,b in self.nodes(data='bipartite') if b==1 )
        if data==False:
            return [ n for n in self.nodes(data=data) if n in spNodes ]
        else:
            return [ (n,d) for n,d in self.nodes(data=data) if n in spNodes ]
        
    def listCollectorsNodes(self,data=False):
        """
        Lists nodes from the collectors set.
        
        Parameters
        ----------
        data : string or bool, default=False
            If False only nodes ids are returned.
            If True nodes ids are returned with their respective attribute dicts as (n, attrDict).
            If a string is passed (with an attribute name) then its value is returned in a 2-tuple (n, attrValue).
        
        Returns
        -------
        Either a list of tuples (n,attrDict) or (n,attrValue) where n is the node's id; or a list of nodes id's n.
        
        Note
        ----
        It is not guaranteed that the same order will be mainained in multiple calls of this function.
        """
        colNodes = set( n for n,b in self.nodes(data='bipartite') if b==0 )
        if data==False:
            return [ n for n in self.nodes(data=data) if n in colNodes ]
        return [ (n,d) for n,d in self.nodes(data=data) if n in colNodes ]
    
    def getSpeciesBag( self, collector ):
        """
        Parameters
        ----------
        collector : string
          The id of the collector from which to derive the species bag vector.
          
        Returns
        -------
        A tuple (spIds, vector), where the first element is a list containing all species names and
        the second is the vector containing their counts.
        The species bag vector is stored as a 1xn SciPy sparse matrix.
        """
        if self._biadj_matrix is None:
            self._buildBiadjMatrix()
            
        colList, spList, m = self._getBiadjMatrix()
        i = colList.index(collector)
        vector = m.getrow(i)
        return (spList, vector)
    
    def getInterestVector( self, species ):
        """
        Parameters
        ----------
        species : string
          The id of the species from which to derive the interest vector.
          
        Returns
        -------
        A tuple (colIds, vector), where the first element is a list containing all collectors names and
        the second is the vector containing their counts.
        The interest vector is stored as a 1xn SciPy sparse matrix.
        """
        if self._biadj_matrix is None:
            self._buildBiadjMatrix()
        
        colList, spList, m = self._getBiadjMatrix()
        m = m.transpose()
        i = spList.index(species)
        vector = m.getrow(i)
        return (colList,vector)



In [276]:
cols=[['col1','col2','col3'],
      ['col1','col2'],
      ['col2','col3'],
      ['col4','col5'],
      ['col4'],
      ['col5','col4']]

spp=['sp1','sp2','sp3','sp2','sp3','sp2']

In [277]:
scn = SpeciesCollectorsNetwork(species=spp,collectors=cols,)

#### Tests

In [278]:
# Collectors and species input data
# Collectors input data must be list of lists of strings
# 1
cols=['col1','col2','col3']
spp=['sp1','sp2','sp3']
error=None
try:
    scn=SpeciesCollectorsNetwork(species=spp,collectors=cols)
except ValueError as e:
    error=e
    
assert(isinstance(error,ValueError))

# 2
cols=[[['col1','col2','col3']]]
spp=['sp1','sp2','sp3']
error=None
try:
    scn=SpeciesCollectorsNetwork(species=spp,collectors=cols)
except ValueError as e:
    error=e
    
assert(isinstance(error,ValueError))

# Species input data must be list of strings
cols=[['col1'],['col2'],['col3']]
spp = [['sp1'],'sp2','sp3']
error=None
try:
    scn=SpeciesCollectorsNetwork(species=spp,collectors=cols)
except ValueError as e:
    error=e
    
assert(isinstance(error,ValueError))

# Correct case
cols=[['col1'],['col2'],['col3']]
spp = ['sp1','sp2','sp3']
assert( SpeciesCollectorsNetwork(species=spp,collectors=cols) )

# Species and collectors input data lists must have the same lengths
#1 (correct case)
cols=[['col1', 'col4'],['col2'],['col3']]
spp = ['sp1','sp2','sp3']
assert( SpeciesCollectorsNetwork(species=spp,collectors=cols) )

#2
cols=[['col1', 'col4'],['col2']]
spp = ['sp1','sp2','sp3']
error=None
try:
    scn=SpeciesCollectorsNetwork(species=spp,collectors=cols)
except ValueError as e:
    error=e
    
#3
cols=[['col1', 'col4'],['col2']]
spp = ['sp1']
error=None
try:
    scn=SpeciesCollectorsNetwork(species=spp,collectors=cols)
except ValueError as e:
    error=e

In [None]:
# Species a

In [211]:
scn.getSpeciesBag('col1')[1].todense()

matrix([[1, 1, 0]], dtype=int64)

In [176]:
scn.edges(data=True)

EdgeDataView([('sp1', 'col1', {'count': 1}), ('sp1', 'col2', {'count': 1}), ('sp1', 'col3', {'count': 1}), ('col1', 'sp2', {'count': 1}), ('col2', 'sp2', {'count': 1}), ('col2', 'sp3', {'count': 1}), ('col3', 'sp3', {'count': 1}), ('sp2', 'col4', {'count': 2}), ('sp2', 'col5', {'count': 2}), ('sp3', 'col4', {'count': 1})])

In [164]:
scn.listSpeciesNodes(data=False)

['sp1', 'sp2', 'sp3']

In [170]:
scn.listCollectorsNodes(data='counta')

[('col1', None),
 ('col2', None),
 ('col3', None),
 ('col4', None),
 ('col5', None)]

In [165]:
list(scn.nodes(data=False))

['sp1', 'col1', 'col2', 'col3', 'sp2', 'sp3', 'col4', 'col5']

In [122]:
list(scn.nodes(data='bipartite'))

[('sp1', 1),
 ('col1', 0),
 ('col2', 0),
 ('col3', 0),
 ('sp2', 1),
 ('sp3', 1),
 ('col4', 0),
 ('col5', 0)]

In [137]:
list(scn.nodes(data=False))

['sp1', 'col1', 'col2', 'col3', 'sp2', 'sp3', 'col4', 'col5']

In [124]:
scn.nodes & {scn.nodes(data='bipartite)}

SyntaxError: EOL while scanning string literal (<ipython-input-124-02c2b496f3e8>, line 1)

In [64]:
networkx.bipartite.

({'sp1', 'sp2', 'sp3'}, {'col1', 'col2', 'col3', 'col4', 'col5'})

In [93]:
scn.edges

EdgeView([('sp1', 'col1'), ('sp1', 'col2'), ('sp1', 'col3'), ('col1', 'sp2'), ('col2', 'sp2'), ('col2', 'sp3'), ('col3', 'sp3'), ('sp2', 'col4'), ('sp2', 'col5'), ('sp3', 'col4')])

In [104]:
networkx.bipartite.sets(scn)[0]

{'sp1', 'sp2', 'sp3'}

In [86]:
{n for n, d in scn.nodes(data=True) if d['bipartite']==0}

{'col1', 'col2', 'col3', 'col4', 'col5'}

In [71]:
scn.nodes(data=True)

NodeDataView({'sp1': {'bipartite': 1, 'count': 1}, 'col1': {'bipartite': 0, 'count': 2}, 'col2': {'bipartite': 0, 'count': 3}, 'col3': {'bipartite': 0, 'count': 2}, 'sp2': {'bipartite': 1, 'count': 3}, 'sp3': {'bipartite': 1, 'count': 2}, 'col4': {'bipartite': 0, 'count': 3}, 'col5': {'bipartite': 0, 'count': 2}})

In [59]:
scn.getSpeciesBag('col4')[1].todense()

matrix([[0, 2, 1]], dtype=int64)

In [43]:
print(scn._biadj_matrix[0])
print(scn._biadj_matrix[1])

['col1', 'col2', 'col3', 'col4', 'col5']
['sp1', 'sp2', 'sp3']


In [41]:
scn._biadj_matrix[2].todense()

matrix([[1, 1, 0],
        [1, 1, 1],
        [1, 0, 1],
        [0, 2, 1],
        [0, 2, 0]], dtype=int64)

In [49]:
scn.getInterest('sp2')[1].todense()

matrix([[1, 1, 0, 2, 2]], dtype=int64)

In [52]:
scn.getSpeciesBag('col4')[1].todense()

matrix([[0, 2, 1]], dtype=int64)

In [26]:
scn.getSpeciesBag('col4')[1].todense()

matrix([[0, 1, 1]], dtype=int64)

In [None]:
networkx.bipartite.biadjacency_matrix()

In [451]:
scn.nodes(data=True)

NodeDataView({'sp1': {'bipartite': 1}, 'col1': {'bipartite': 0}, 'col2': {'bipartite': 0}, 'col3': {'bipartite': 0}, 'sp2': {'bipartite': 1}, 'sp3': {'bipartite': 1}, 'col4': {'bipartite': 0}, 'col5': {'bipartite': 0}, 'col6': {'bipartite': 0}})

In [None]:
CoworkingNetwork()