In [51]:
%%capture
!pip install networkx
!pip install pandas

In [53]:
import json
import networkx as nx
import pandas as pd

# A. Temporal Graphs

In [37]:
def create_graph(graph_data: list, target_year: int, weighted: bool = False):
    graph = nx.Graph()
    for author1, author2, year in graph_data:
        if year != target_year: continue
        if graph.has_edge(author1, author2):
            graph[author1][author2]["weight"] = graph[author1][author2]["weight"] + 1
        else:
            graph.add_edge(author1, author2, weight=1)
    return graph
        

In [33]:
def load_json(file_path: str):
    with open(file_path, "r") as file:
        graph_data = json.load(file)
        return graph_data

In [71]:
test_data = load_json("./test.json")
test_graph = create_graph(test_data, 1)

In [34]:
graph_data = load_json("./tmp_dblp_coauthorship.json")

In [36]:
print(graph_data[0])

['Alin Deutsch', 'Mary F. Fernandez', 1998]


In [62]:
dblp2005 = create_graph(graph_data, 2005)

In [63]:
dblp2006 = create_graph(graph_data, 2006)

In [64]:
dblp2005w = create_graph(graph_data, 2005, weighted=True)

In [65]:
table = pd.DataFrame(columns=["Graph Name", "Node Count", "Edge Count"])
table.loc[len(table)] = ["dblp2005", dblp2005.number_of_nodes(), dblp2005.number_of_edges()]
table.loc[len(table)] = ["dblp2006", dblp2006.number_of_nodes(), dblp2006.number_of_edges()]
table.loc[len(table)] = ["dblp2005w", dblp2005w.number_of_nodes(), dblp2005w.number_of_edges()]

print(table)

  Graph Name  Node Count  Edge Count
0   dblp2005      180227      403026
1   dblp2006      201298      465988
2  dblp2005w      180227      403026


## Giant Connected Components

In [66]:
def giant_connected_component(graph):
    giant_cc_nodeset = max(nx.connected_components(graph), key=len)
    return graph.subgraph(giant_cc_nodeset)

In [67]:
dblp2005 = giant_connected_component(dblp2005)

In [68]:
dblp2006 = giant_connected_component(dblp2006)

In [69]:
dblp2005w = giant_connected_component(dblp2005w)

## Report

In [70]:
table = pd.DataFrame(columns=["Graph Name", "GCC Node Count", "GCC Edge Count"])
table.loc[len(table)] = ["dblp2005", dblp2005.number_of_nodes(), dblp2005.number_of_edges()]
table.loc[len(table)] = ["dblp2006", dblp2006.number_of_nodes(), dblp2006.number_of_edges()]
table.loc[len(table)] = ["dblp2005w", dblp2005w.number_of_nodes(), dblp2005w.number_of_edges()]

print(table)

  Graph Name  GCC Node Count  GCC Edge Count
0   dblp2005          106943          300043
1   dblp2006          123808          356968
2  dblp2005w          106943          300043


# B. Node and Edge Importance in Graphs

In [None]:
def report_node_importance(graph, top_n=50):
    pagerank = nx.pagerank(graph)
    ranked_nodes = sorted(pagerank, key=lambda node: pagerank[node])
    
    node_importance_table = pd.DataFrame(columns=["Author name", "Pagerank score"])
    for i in range(0,top_n):
        if i >= len(ranked_nodes): break
        node = ranked_nodes[i]
        rank = pagerank[node]
        node_importance_table.loc[len(node_importance_table)] = [node, rank]
    
    print(f"Node importance:\n{node_importance_table}")

def report_edge_importance(graph, top_n=20, normalized=False):
    betweenness = nx.edge_betweenness_centrality(graph, normalized=normalized)
    ranked_edges = sorted(betweenness, key=lambda edge: betweenness[edge])
    
    edge_importance_table = pd.DataFrame(columns=["Author 1", "Author 2", "Betweeness score"])
    for i in range(0, top_n):
        if i >= len(ranked_edges): break
        edge = ranked_edges[i]
        rank = betweenness[edge]
        edge_importance_table.loc[len(edge_importance_table)] = [edge[0], edge[1], rank]
        
    print(f"Edge importance:\n{edge_importance_table}")
    

## Report

In [None]:
print("Graph: dblp2005")
report_node_importance(dblp2005)
report_edge_importance(dblp2005)

In [None]:
print("Graph: dblp2006")
report_node_importance(dblp2006)
report_edge_importance(dblp2006)

In [None]:
print("Graph: dblp2005w")
report_node_importance(dblp2005w)
report_edge_importance(dblp2005w)

# C. Link Prediction in Graphs