In [1]:
import sys,os
sys.path.append(os.path.expanduser('~/caryocar'))

In [None]:
import numpy
import networkx
import itertools
from collections import Counter

# Collectors coworking Networks

In [312]:
class CoworkingNetwork(networkx.Graph):
    """
    Class for coworking networks. Extends networkx Graph class.
    
    Parameters
    ----------
    cliques : iterable
        An iterable of iterables containing names used to compose cliques 
        in the network.
        
    namesMap : NamesMap
        A NamesMap object for normalizing nodes names.
    
    Examples
    --------
    >>> collectors = [ ['a','b','c'], ['d','e'], ['a','c'] ]
    >>> cwn = CoworkingNetwork(cliques=collectors)
    
    >>> cwn.nodes(data=True)
    { 'a': {'count': 2}, 
      'b': {'count': 1}, 
      'c': {'count': 2}, 
      'd': {'count': 1}, 
      'e': {'count': 1} }    
      
    >>> cwn.edges(data=True)
    [ ('a', 'b', {'count': 1}), 
      ('a', 'c', {'count': 2}), 
      ('b', 'c', {'count': 1}), 
      ('d', 'e', {'count': 1}) ]
      

    """
    def __init__(self, data=None, cliques=None, namesMap=None, **attr):
       
        if cliques is not None:
            if namesMap:
                nmap = namesMap.getMap()
                cliques = [ [ nmap[n] for n in nset ] for nset in cliques ]
            
            # prevent self-loops
            cliques = [ list(set(nset)) for nset in cliques ]
            
            edgesLists = map( lambda n: itertools.combinations(n,r=2), cliques )
            data = [ edge for edgesList in edgesLists for edge in edgesList ]
            
        super().__init__(data=data,**attr)
    
        # insert nodes and set count attribute
        nodes_counts = Counter( col for clique in cliques for col in clique )
        nodes = nodes_counts.keys()
        
        self.add_nodes_from(nodes)
        networkx.set_node_attributes(self,nodes_counts,'count')
        
        # set edges count attribute
        edges = data
        edges_counts = Counter(edges)
        networkx.set_edge_attributes(self,edges_counts,'count')

In [372]:
collectors = [
    # col1, col2, col3 and col4 are connected
    ['col1','col2','col3','col4'],
    ['col1','col2','col3'],
    ['col1','col2','col3'],
    ['col1','col3','col2'],
    ['col1','col2'],
    ['col1','col2'],
    ['col1','col2'],
    ['col1','col3'],
    ['col2','col3'],
    ['col2','col4'],
    ['col2','col4'],
    ['col4'],
    # col5 is isolated
    ['col5'],
    ['col5'],
    # col7 and col8 are connected
    ['col7','col8'],
    ['col7','col8'],
    # col9 would lead to self loop
    ['col9','col9'],
    ['col9','col9']
]

In [373]:
cwn=CoworkingNetwork(cliques=collectors)

#### Tests

In [374]:
# nodes with no connections are also included in the network
assert( 'col5' in cwn.nodes() )

In [375]:
# all edges have a 'count' attribute
assert( all( data.get('count') is not None for u,v,data in cwn.edges(data=True) ) )

# all nodes have a 'count' attribute
assert( all( data.get('count') is not None for u,data in cwn.nodes(data=True)) )

In [376]:
# col1 holds 7 ties with col2
assert(cwn['col1']['col2'].get('count')==7)

# col5 appears 2 times but holds no ties with anyone
assert( cwn.nodes(data=True)['col5'].get('count')==2 )
assert( sum(v.get('count') for k,v in cwn['col5'].items() )==0 )

# col4 appears 4 times but holds 5 ties
assert( cwn.nodes(data=True)['col4'].get('count')==4 )
assert( sum( v.get('count') for k,v in cwn['col4'].items() )==5 )

In [377]:
# connected components subgraphs can be derived
[ list(sg.nodes()) for sg in list(networkx.connected_component_subgraphs(cwn)) ]

[['col1', 'col3', 'col4', 'col2'], ['col7', 'col8'], ['col5'], ['col9']]

In [383]:
# self-loops are not allowed: 
## make sure col9 is not linked to itself
assert( cwn['col9'].get('col9') is None )

# col9 appears twice
assert( cwn.nodes(data=True)['col9'].get('count')==2 )

### Using names maps

In [297]:
from caryocar.cleaning import NamesMap

remapping = {
    'col7':'col8',
    'col8':'COL8',
    'col3':'COL_3'
}

nm = NamesMap( names=[ n for clique in collectors for n in clique  ], 
               normalizationFunc=lambda x: x, 
               remappingIndex=remapping )

In [298]:
cwn = CoworkingNetwork(cliques=collectors,namesMap=nm)

#### Tests

In [301]:
cwn.edges(data=True)

EdgeDataView([('col1', 'col2', {'count': 7}), ('col1', 'COL_3', {'count': 5}), ('col1', 'col4', {'count': 1}), ('col2', 'COL_3', {'count': 1}), ('col2', 'col4', {'count': 3}), ('COL_3', 'col4', {'count': 1}), ('COL8', 'COL8', {'count': 2})])

In [None]:
# col1 becomes col2 and therefore, COL2

---

In [None]:
class SpeciesCollectorsNetwork(networkx.Graph):
    """
    Class for Species-collectors networks
    
    Attributes
    ----------
    _biadj_matrix : (colList, spList, m), where m is a scipy sparse matrix
    
    Parameters
    ----------
    
    A dataframe with two columns: an atomized collectors names 
    """
    def __init__(self, data=None, species=None, collectorsNames=None, weighted=False, namesMap=None, **attr):
        
        self._biadj_matrix = None
        
        set_bipartite_attr=False
        if species is not None and collectorsNames is not None:
            if namesMap:
                nmap = namesMap.getMap()
                collectorsNames = [ [ nmap[n] for n in nset ] for nset in collectorsNames ]
            
            # build edges
            if len(species)==len(collectorsNames):
                species = list(species)
                collectorsNames = list(collectorsNames)
                
                data = [ (sp,col) for i,sp in enumerate(species) for col in collectorsNames[i] ]
                set_bipartite_attr=True

        super().__init__(data=data,**attr)
        
        if set_bipartite_attr:
            networkx.set_node_attributes( self, 'bipartite', dict( (n,1) for n in species) )
            networkx.set_node_attributes( self, 'bipartite', dict( (n,0) for cols in collectorsNames for n in cols) )
            
        if weighted:
            edges = data
            edges_weights = Counter(edges)

            for (u,v),w in edges_weights.items():
                try:
                    self[u][v]['weight'] += w
                except:
                    self[u][v]['weight'] = w    
    
    def _buildBiadjMatrix( self, col_sp_order=None ):
        col_sp_order=(sorted(self.getCollectorsNodes()),sorted(self.getSpeciesNodes())) if col_sp_order is None else col_sp_order
        m = networkx.bipartite.biadjacency_matrix(self,row_order=col_sp_order[0],column_order=col_sp_order[1])
        self._biadj_matrix = (*col_sp_order,m)
                    
    def getSpeciesNodes(self,data=False):
        return [ (n,d) if data==True else n for n,d in self.nodes(data=True) if d['bipartite']==1 ]
        
    def getCollectorsNodes(self,data=False):
        return [ (n,d) if data==True else n for n,d in self.nodes(data=True) if d['bipartite']==0 ]
    
    def getSpeciesBag( self, collectorName ):
        """
        Parameters
        ----------
        
        Returns
        -------
        A tuple (spIds, vector), where the first element is a list containing all species names and
        the second is the vector containing their counts.
        """
        if self._biadj_matrix is None:
            self._buildBiadjMatrix()
            
        colList, spList, m = self._biadj_matrix
        i = colList.index(collectorName)
        vector = m.getrow(i)
        return (spList, vector)
    
    def getInterest( self, speciesName ):
        """
        Returns
        -------
        The same as the getSpeciesBag method
        """
        if self._biadj_matrix is None:
            self._buildBiadjMatrix()
        
        colList, spList, m = self._biadj_matrix
        m = m.transpose()
        i = spList.index(speciesName)
        vector = m.getrow(i)
        return (colList,vector)

