Import Libraries

In [78]:
from pyvis.network import Network
import networkx as nx
import random
from igraph import Graph

## Create Helper Functions

### Create paper function

In [79]:
# recursive function that will traverse the nodes
def createPaper(network, authors, probStop):
    '''
    Will take network, list of authors, and probStop as input
    '''
    currAuthorID = authors[-1]
    newNeighbors = set(network.neighbors(currAuthorID)).difference(set(authors))

    # base condition: stop at node if probStop hit or there are no new neighbors to traverse
    if random.random() < probStop or len(newNeighbors) == 0:
        return
    
    # create list reprsenting probabilities for the neighboring nodes of the current coauthor
    probs = []
    for neighbor in newNeighbors:
        nData = network.get_edge_data(currAuthorID, neighbor)
        probs.extend([neighbor] * nData["weight"])

    # Select coauthor from neighbors probabilities list
    coauthorID = random.choice(probs)

    # update all edges of coauthors to this new author
    for author in authors:
        # if there is not an edge, create one
        if not network.has_edge(author, coauthorID) and author != coauthorID:
            network.add_edge(author, coauthorID, weight=0, width=1)
        newWeight = network.get_edge_data(author, coauthorID)["weight"] + 1
        #network.update(edges=[ (author, coauthorID, {"weight": newWeight, "width": newWeight//2}) ])
        network.update(edges=[ (author, coauthorID, {"weight": newWeight}) ])

    # call function recursively with coauthor
    authors.append(coauthorID)
    createPaper(network, authors, probStop)


### Split community function

In [80]:
def splitCommunity(network, nodes):
    '''
    Function will take the networkx network as input and the list of nodes in the community
        It will then test if it should split the community or not
    Returns nothing, will update the network
    '''
    # split into two communities
    subGraph = network.subgraph(nodes)
    newGraph = Graph.from_networkx(subGraph)

    # create subgraph and split
    clusters = newGraph.community_leading_eigenvector(clusters=2)

    # recreate network
    nt = Network()
    # populates the nodes and edges data structures
    nt.from_nx(subGraph)
    nt.show('docs/models/testsub.html')

    # compare unweighted modularity of new communities to the initial, return if there should not be change in community structure
    # Q: modularity of just the partition or the whole graph with the new partition?
    if newGraph.modularity(set(subGraph.nodes())) > clusters.modularity:
        return

    # update the colors and group name of the nodes in the new community
    # must know all the groups and community names and pick different ones
    for node in clusters[0]:
        network.update(nodes=[(node, {"label": "g1", "color": "green"})])
    for node in clusters[1]:
        network.update(nodes=[(node, {"label": "g2", "color": "red"})])
    # update the papers? Need a data structure of the papers
    

Define initial parameters

In [81]:
# define time steps
timeSteps = 30

# Probabilities
# probability that you generate new author
probNewAuthor = 0.5
# probability that you stop at a given node
probStop = 0.7

# define fields
fieldColors = {"CS": "blue", 
                "Math": "green", 
                "Physics": "red"}
fields = list(fieldColors.keys())

# define initial scholars, will be in form (id, scholarField, color)
scholarField = random.choice(fields)
nodeID = 0


Create Model

In [82]:
network = nx.Graph()
scholarField = random.choice(fields)
network.add_node(nodeID, label=scholarField, color=fieldColors[scholarField])
# go through time steps, add new scholar and paper at each step
for i in range(1, timeSteps):

    # Choose first author, either new scholar or random choice
    currNodes = list(network.nodes())
    authors = [random.choice(currNodes)]

    # with probability, add new author to network set as main author with a coauthor
    if random.random() < probNewAuthor:
        # generate author and field
        scholarField = random.choice(fields)
        nodeID += 1
        author = nodeID
        network.add_node(author, label=scholarField, color=fieldColors[scholarField])

        # generate random coauthor from currNodes, which doesn't have the new node added in
        coauthorID = random.choice(currNodes)
        network.add_edge(author, coauthorID, weight=1, width=1)

        # update authors list
        authors = [author, coauthorID]

    # Add new paper, calling function
    createPaper(network, authors, probStop)

Display Network

In [83]:
nt = Network()
# populates the nodes and edges data structures
nt.from_nx(network)
print(network.edges.data())
nt.show('docs/models/modularity.html')

[(0, 1, {'weight': 2, 'width': 1}), (0, 5, {'weight': 3, 'width': 1}), (0, 8, {'weight': 1, 'width': 1}), (0, 18, {'weight': 1, 'width': 1}), (1, 2, {'weight': 3, 'width': 1}), (1, 3, {'weight': 3, 'width': 1}), (1, 6, {'weight': 1, 'width': 1}), (1, 11, {'weight': 1, 'width': 1}), (1, 14, {'weight': 1, 'width': 1}), (1, 18, {'weight': 1, 'width': 1}), (1, 5, {'weight': 1, 'width': 1}), (2, 4, {'weight': 2, 'width': 1}), (2, 6, {'weight': 1, 'width': 1}), (2, 7, {'weight': 1, 'width': 1}), (2, 9, {'weight': 1, 'width': 1}), (2, 10, {'weight': 2, 'width': 1}), (2, 11, {'weight': 2, 'width': 1}), (2, 12, {'weight': 1, 'width': 1}), (2, 15, {'weight': 1, 'width': 1}), (2, 16, {'weight': 1, 'width': 1}), (4, 10, {'weight': 1, 'width': 1}), (5, 18, {'weight': 1, 'width': 1}), (11, 16, {'weight': 2, 'width': 1}), (12, 13, {'weight': 1, 'width': 1}), (16, 17, {'weight': 1, 'width': 1})]


### Calculate modularity using iGraph

In [84]:
newGraph = Graph.from_networkx(network)
clusters = newGraph.community_leading_eigenvector(clusters=2)
# check if modularity of clusters is greater than whole
print(clusters)

# loop and update cluster colors
for node in clusters[0]:
    network.update(nodes=[(node, {"label": "g1", "color": "green"})])
for node in clusters[1]:
    network.update(nodes=[(node, {"label": "g2", "color": "red"})])
print(set(network.nodes()))
print(f'test mod1: {newGraph.modularity(set(network.nodes()))}')
print(f'test mod2: {clusters.modularity}')
splitCommunity(network, [0, 1, 3, 4, 5, 9, 10, 12])

Clustering with 19 elements and 2 clusters
[0] 0, 1, 3, 5, 8, 14, 18
[1] 2, 4, 6, 7, 9, 10, 11, 12, 13, 15, 16, 17
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}
test mod1: -0.09600000000000003
test mod2: 0.3672


In [85]:
# recreate network
nt = Network()
# populates the nodes and edges data structures
nt.from_nx(network)
print(network.edges.data())
nt.show('docs/models/modularity.html')

[(0, 1, {'weight': 2, 'width': 1}), (0, 5, {'weight': 3, 'width': 1}), (0, 8, {'weight': 1, 'width': 1}), (0, 18, {'weight': 1, 'width': 1}), (1, 2, {'weight': 3, 'width': 1}), (1, 3, {'weight': 3, 'width': 1}), (1, 6, {'weight': 1, 'width': 1}), (1, 11, {'weight': 1, 'width': 1}), (1, 14, {'weight': 1, 'width': 1}), (1, 18, {'weight': 1, 'width': 1}), (1, 5, {'weight': 1, 'width': 1}), (2, 4, {'weight': 2, 'width': 1}), (2, 6, {'weight': 1, 'width': 1}), (2, 7, {'weight': 1, 'width': 1}), (2, 9, {'weight': 1, 'width': 1}), (2, 10, {'weight': 2, 'width': 1}), (2, 11, {'weight': 2, 'width': 1}), (2, 12, {'weight': 1, 'width': 1}), (2, 15, {'weight': 1, 'width': 1}), (2, 16, {'weight': 1, 'width': 1}), (4, 10, {'weight': 1, 'width': 1}), (5, 18, {'weight': 1, 'width': 1}), (11, 16, {'weight': 2, 'width': 1}), (12, 13, {'weight': 1, 'width': 1}), (16, 17, {'weight': 1, 'width': 1})]
