In [1]:
import pandas as pd

from Framework import UtilFuncs
from Framework.NodeContainer import NodeContainer
from StandfordCitationNetworkAnalysis.CitationNetworkBuilder import CitationNetworkBuilder
from StandfordCitationNetworkAnalysis.CoauthorNetworkBuilder import CoauthorNetworkBuilder

import networkx as nx

In [2]:
data_container :NodeContainer = UtilFuncs.PickleRead("./Data/CitationCleaned.dat")
citation_builder = CitationNetworkBuilder()
citgraph_dict = citation_builder.GenGraphs(data_container)

load:./Data/CitationCleaned.dat


In [3]:

def BuildGraphStatistics( graph_dict ):
    year_list = []

    avg_clustering_list = []
    avg_indegree_list = []

    size_list = []

    for year in graph_dict:
        year_list.append(year)
        graph :nx.DiGraph = graph_dict[year]
        # average path length
        #avg_pathlen = nx.average_shortest_path_length(graph)
        # average clustering coefficnet
        avg_clustering = nx.average_clustering(graph)
        # average degree
        node_num = len(graph.nodes)
        size_list.append(node_num)
        sum_indegree_val = 0
        for (node_id, indegree_val) in graph.in_degree:
            sum_indegree_val += indegree_val
        avg_indegree = sum_indegree_val / node_num
        avg_indegree_list.append(avg_indegree)
        avg_clustering_list.append(avg_clustering)
        
    stat_df = pd.DataFrame()
    stat_df['Year'] = year_list
    stat_df['Size'] = size_list
    stat_df['Average Degree'] = avg_indegree_list
    stat_df['Average Clustering Coefficient'] = avg_clustering_list
    return stat_df


In [4]:
cit_df = BuildGraphStatistics(citgraph_dict)

In [5]:
print(cit_df)

    Year   Size  Average Degree  Average Clustering Coefficient
0   1992   1122        0.566845                        0.024924
1   1993   2841        1.654347                        0.064983
2   1994   4924        2.615556                        0.092311
3   1995   7077        3.974848                        0.118828
4   1996   9543        5.565126                        0.133781
5   1997  12123        7.235090                        0.142680
6   1998  14765        8.476871                        0.148330
7   1999  17484        9.557767                        0.151704
8   2000  20534       10.614347                        0.153493
9   2001  23587       11.525968                        0.154398
10  2002  26793       12.465233                        0.156670
11  2003  27770       12.704609                        0.156925


In [6]:
data_container :NodeContainer = UtilFuncs.PickleRead("./Data/CitationCleaned.dat")
coauthor_builder = CoauthorNetworkBuilder()
coauthorgraph_dict = coauthor_builder.GenGraphs(data_container)
coauthor_df = BuildGraphStatistics(coauthorgraph_dict)

load:./Data/CitationCleaned.dat


In [7]:
print(coauthor_df)

    Year   Size  Average Degree  Average Clustering Coefficient
0   1992   1338        0.657698                        0.026681
1   1993   2666        0.726932                        0.031752
2   1994   4059        0.782951                        0.032664
3   1995   5443        0.837590                        0.034689
4   1996   6940        0.886023                        0.035411
5   1997   8269        0.931310                        0.034235
6   1998   9671        0.968256                        0.033857
7   1999  11033        1.003444                        0.034593
8   2000  13055        1.003064                        0.031068
9   2001  14880        1.011895                        0.029120
10  2002  16181        1.060070                        0.032426
11  2003  16490        1.075076                        0.033565
