In [1]:
%load_ext autoreload
%autoreload 2
from mesh_import import mesh

  from tqdm.autonotebook import tqdm


In [2]:
import igraph as ig
from CwnGraph import CwnBase, CwnSense
from tqdm.autonotebook import tqdm
import numpy as np
import pandas as pd

In [3]:
cns_dir = mesh.get_data_dir() / "cns"
graph_a_path = cns_dir / "cwn_sense_graph_a.pkl"
graph_b_path = cns_dir / "cwn_sense_graph_b.pkl"
graph_c_path = cns_dir / "cwn_sense_graph_c.pkl"

In [4]:
Ga = ig.read(graph_a_path, format="pickle")
Gb = ig.read(graph_b_path, format="pickle")
Gc = ig.read(graph_c_path, format="pickle")

In [23]:
Gc.simplify()
Gb.simplify()

<igraph.Graph at 0x24e2912b5e0>

In [6]:
print("---- Graph A ----")
print(Ga.summary())
print("---- Graph C ----")
print(Gc.summary())

---- Graph A ----
IGRAPH UN-- 28827 3122 -- 
+ attr: name (v), rel_type (e)
---- Graph C ----
IGRAPH UN-- 28827 25371 -- 
+ attr: name (v)


## Graph A Avg Distance

In [8]:
psum = 0
pcount = 0
for v in tqdm(Ga.vs):
    us = list(range(v.index))
    plen = [x for x in Ga.shortest_paths(v, us)[0] if x != float('inf')]
    psum += sum(plen)
    pcount += len(plen)

HBox(children=(FloatProgress(value=0.0, max=28827.0), HTML(value='')))




In [9]:
Ga_avg_distance = psum/pcount

## Graph B avg_distance

In [39]:
psum = 0
pcount = 0
for v in tqdm(Ga.vs):
    us = list(range(v.index))
    plen = [x for x in Gb.shortest_paths(v, us)[0] if x != float('inf')]
    psum += sum(plen)
    pcount += len(plen)
Gb_avg_distance = psum/pcount

HBox(children=(FloatProgress(value=0.0, max=28827.0), HTML(value='')))




## Graph C avg_distance

In [10]:
psum = 0
pcount = 0
for v in tqdm(Gc.vs):
    us = list(range(v.index))
    plen = [x for x in Gc.shortest_paths(v, us)[0] if x != float('inf')]
    psum += sum(plen)
    pcount += len(plen)

HBox(children=(FloatProgress(value=0.0, max=28827.0), HTML(value='')))




In [11]:
Gc_avg_distance = psum/pcount

## Structural summarization

In [20]:
import pandas as pd
def summarize_graph(G, avg_distance):
    deg = G.degree()
    cliques = G.cliques(min=3)
    compos = G.components()
    res = pd.Series({
        "nV": G.vcount(), 
        "nE": G.ecount(),
        "Avg Degree": np.mean(deg),
        "Max Degree": np.max(deg),
        "Diameter": G.diameter(), 
        "Avg distance": avg_distance,
        "Global clustering coeff.": G.transitivity_undirected(),
        "Avg local clustering coeff.": np.nanmean(G.transitivity_local_undirected()),
        "Degree assortativity": G.assortativity_degree(),
        "Largest clique size": max(len(x) for x in cliques),
        "Isolated nodes": sum(1 for x in compos if len(x) == 1),
        "Largest component": max(len(x) for x in compos)
    })
    return res

In [21]:
ga_prop = summarize_graph(Ga, Ga_avg_distance)

In [40]:
gb_prop = summarize_graph(Gb, Gb_avg_distance)

In [30]:
gc_prop = summarize_graph(Gc, Gc_avg_distance)

In [31]:
wn_prop = pd.Series({
    "nV": 32611, 
    "nE": 119463,
    "Avg Degree": 7.32,
    "Max Degree": 152,
    "Diameter": 23, 
    "Avg distance": 6.89,
    "Global clustering coeff.": 0.36,
    "Avg local clustering coeff.": 0.62,
    "Degree assortativity": 0.26,
    "Largest clique size": 34,
    "Isolated nodes": 35000,
    "Largest component": 32611
})

* Veremyev, A., Semenov, A., Pasiliao, E. L., & Boginski, V. (2019). Graph-based exploration and clustering analysis of semantic spaces. Applied Network Science, 4(1), 104.
https://appliednetsci.springeropen.com/articles/10.1007/s41109-019-0228-y

In [41]:
from IPython.display import HTML
HTML(pd.DataFrame({"Graph_A": ga_prop, "Graph_B": gb_prop, "Graph_C": gc_prop, "WordNet(Veremyev,2019)": wn_prop}).to_html())

Unnamed: 0,Graph_A,Graph_B,Graph_C,"WordNet(Veremyev,2019)"
nV,28827.0,28827.0,28827.0,32611.0
nE,3122.0,14838.0,25371.0,119463.0
Avg Degree,0.216602,1.029452,1.760225,7.32
Max Degree,214.0,37.0,214.0,152.0
Diameter,22.0,11.0,25.0,23.0
Avg distance,9.656632,1.624989,8.762392,6.89
Global clustering coeff.,0.002857,0.904604,0.480762,0.36
Avg local clustering coeff.,0.050679,0.866859,0.754835,0.62
Degree assortativity,-0.201782,0.872689,0.02966,0.26
Largest clique size,4.0,20.0,21.0,34.0


## Graph  centralities

In [33]:
def compute_local_measures(G, suffix):
    nV = G.vcount()    
    frame = pd.DataFrame(dict(        
        closeness = G.closeness(normalized=True),
        betweenness = np.array(G.betweenness())/((nV-1)*(nV-2)/2),
        degree = G.degree(),
        trans = G.transitivity_local_undirected(),
        ncompo = [len(G.subcomponent(v)) for v in G.vs]
    ))
    frame.index = G.vs["name"]
    frame.columns = [x+suffix for x in frame.columns]
    return frame

In [34]:
Ga_measures = compute_local_measures(Ga, "A")
Gb_measures = compute_local_measures(Gb, "B")
Gc_measures = compute_local_measures(Gc, "C")

  closeness = G.closeness(normalized=True),


In [35]:
Ga_measures

Unnamed: 0,closenessA,betweennessA,degreeA,transA,ncompoA
03000101,0.000035,0.0,0,,1
03000102,0.000035,0.0,0,,1
03000201,0.000035,0.0,0,,1
03000202,0.000035,0.0,0,,1
03000203,0.000035,0.0,0,,1
...,...,...,...,...,...
14609902,0.000035,0.0,0,,1
14610001,0.000035,0.0,0,,1
14610101,0.000035,0.0,0,,1
14610201,0.000035,0.0,0,,1


In [36]:
Gb_measures

Unnamed: 0,closenessB,betweennessB,degreeB,transB,ncompoB
03000101,0.000035,0.0,0,,1
03000102,0.000035,0.0,0,,1
03000201,0.000035,0.0,0,,1
03000202,0.000035,0.0,0,,1
03000203,0.000035,0.0,0,,1
...,...,...,...,...,...
14609902,0.000035,0.0,0,,1
14610001,0.000035,0.0,1,,2
14610101,0.000035,0.0,0,,1
14610201,0.000035,0.0,0,,1


In [37]:
Gc_measures

Unnamed: 0,closenessC,betweennessC,degreeC,transC,ncompoC
03000101,0.000035,0.000000e+00,1,,21
03000102,0.000035,0.000000e+00,0,,1
03000201,0.000035,0.000000e+00,0,,1
03000202,0.000035,0.000000e+00,0,,1
03000203,0.000035,0.000000e+00,0,,1
...,...,...,...,...,...
14609902,0.000035,0.000000e+00,0,,1
14610001,0.000035,2.407001e-09,2,0.0,3
14610101,0.000035,0.000000e+00,1,,3
14610201,0.000035,0.000000e+00,1,,3


In [38]:
Ga_measures.to_csv(cns_dir / "Ga_measures.csv", index=True)
Gb_measures.to_csv(cns_dir / "Gb_measures.csv", index=True)
Gc_measures.to_csv(cns_dir / "Gc_measures.csv", index=True)