In [16]:
import networkx as nx
import matplotlib.pyplot as plt
from networkx.algorithms import community
import pickle 
import pandas as pd

## Todos

TODO: Print who the members of each community are in the various algorithms

TODO: LSA / Semantic analysis

TODO: Add diagnostics showing how long some tasks take to run

TODO: Recreate a graph, maybe in GEPHI, using whatever communinty structure we end up with



## Load Graph
Read in the edge list, which contains all relevant information: source author -> target author, number of times cited

In [17]:
# Use tiny_edge_list.csv and small_edge_list.csv
# G = nx.read_weighted_edgelist("./tiny_edge_list.csv", delimiter=",")
G_directed = nx.read_weighted_edgelist("./tiny_edge_list.csv", delimiter=",",create_using= nx.DiGraph())
G = nx.read_weighted_edgelist("./tiny_edge_list.csv", delimiter=",")

In [18]:
# To convert files to graphml 
# nx.write_graphml(G_directed, "small_edge_list.graphml")

In [19]:
print(nx.info(G_directed))

Name: 
Type: DiGraph
Number of nodes: 179
Number of edges: 228
Average in degree:   1.2737
Average out degree:   1.2737


## Various node orderings

In [20]:
# Is it weighted?
degreeTable = pd.DataFrame(G_directed.in_degree, columns = ["Name","In Degree"])
sd = degreeTable.sort_values(by=['In Degree'], ascending=False)
sd.head(10)

Unnamed: 0,Name,In Degree
122,FINK E,90
35,CAIRNS D,58
86,DAHLSTROM D,40
1,BRUZINA R,34
81,CAPOBIANCO R,6
0,ALVIN J,0
116,TANZER M,0
117,TRATTER A,0
118,WALSH P,0
119,WRATHALL M,0


In [21]:
degreeTable = pd.DataFrame(G_directed.out_degree, columns = ["Name","Out Degree"])
sd = degreeTable.sort_values(by=['Out Degree'], ascending=False)
sd.head(10)

Unnamed: 0,Name,Out Degree
17,HOMAN C,5
23,MORAN D,3
1,BRUZINA R,3
6,DASTUR F,3
43,CHERNAVIN G,3
66,LOZAR J,3
9,GIUBIATO G,3
19,INVERSO H,3
15,HIMANKA J,3
0,ALVIN J,2


In [22]:
# Not sure if results differ between G and G_directed...
h,a=nx.hits(G)

In [23]:
authTable =  pd.DataFrame.from_dict(a,orient='index', columns=["Authority"])
sd = authTable.sort_values(by=['Authority'], ascending=False)
sd.head(20)

Unnamed: 0,Authority
FINK E,0.081331
BRUZINA R,0.077549
CAIRNS D,0.024191
DEPRAZ N,0.021869
CHERNAVIN G,0.019397
HIMANKA J,0.018945
DASTUR F,0.018945
SMYTH B,0.018323
BERTOLINI S,0.017619
GIUBILATO G,0.017412


In [24]:
hubTable =  pd.DataFrame.from_dict(h,orient='index', columns=["Hubitude"])
sd = hubTable.sort_values(by=['Hubitude'], ascending=False)
sd.head(20)

Unnamed: 0,Hubitude
FINK E,0.081331
BRUZINA R,0.077549
CAIRNS D,0.024191
DEPRAZ N,0.021869
CHERNAVIN G,0.019397
HIMANKA J,0.018945
DASTUR F,0.018945
SMYTH B,0.018323
BERTOLINI S,0.017619
GIUBILATO G,0.017412


## Community Detection

See https://networkx.github.io/documentation/stable/reference/algorithms/community.html

### Helper Methods

In [170]:
# Print out the number of members of each communinty and sub-community.  
# Note that results can a generator, in which case after running this once the generator is "unwound" and must be recreated
def conmunity_stats(results, level = 2):
    for l1 in results:
        print("Level 1:", len(l1), "members")
        print_community(l1)
        if level > 1:
            for l2 in l1:
                print("Level 2:", len(l2),"members")
                print_community(l2, hasmembers=False) # TODO: Not working

In [162]:
def print_community(community, hasmembers=True):
    print(len(community))
    if(hasmembers):
        for comm in community:
            print(comm)
    else:
        print(community)

### Girvan Newman

Takes a long time...

In [163]:
cg_gn = community.girvan_newman(G)

In [164]:
top_level = next(cg_gn)

In [167]:
next_level = next(cg_gn)

StopIteration: 

In [None]:
# sorted(map(sorted, next_level))

In [None]:
# Save communites
# pickle.dump(next_level, open("gn_communities.obj", "wb"))

In [None]:
# Testing re-open
# cg_gn = pickle.load(open("gn_communities.obj", "rb"))

In [None]:
sample = conmunity_stats(cg_gn,1)

### K Clique Communities

In [168]:
cg_kc = community.k_clique_communities(G,3)

In [169]:
conmunity_stats(cg_kc,2)

Level 1: 26 members
26
CAIRNS D
PLOTKA W
HENRY M
BRUZINA R
HOMAN C
IKEDA Y
ALVIN J
INVERSO H
GRONDIN V
SMYTH B
BERTOLINI S
LORELLE P
HIMANKA J
DASTUR F
BADGER L
DUPONT C
DEPRAZ N
MORAN D
ROGGERO J
GIUBILATO G
BANHAM G
NIEL L
FINK E
GIUBIATO G
BEHNKE E
HOBBS D
Level 2: 8 members
8
CAIRNS D
Level 2: 8 members
8
PLOTKA W
Level 2: 7 members
7
HENRY M
Level 2: 9 members
9
BRUZINA R
Level 2: 7 members
7
HOMAN C
Level 2: 7 members
7
IKEDA Y
Level 2: 7 members
7
ALVIN J
Level 2: 9 members
9
INVERSO H
Level 2: 9 members
9
GRONDIN V
Level 2: 7 members
7
SMYTH B
Level 2: 11 members
11
BERTOLINI S
Level 2: 9 members
9
LORELLE P
Level 2: 9 members
9
HIMANKA J
Level 2: 8 members
8
DASTUR F
Level 2: 8 members
8
BADGER L
Level 2: 8 members
8
DUPONT C
Level 2: 8 members
8
DEPRAZ N
Level 2: 7 members
7
MORAN D
Level 2: 9 members
9
ROGGERO J
Level 2: 11 members
11
GIUBILATO G
Level 2: 8 members
8
BANHAM G
Level 2: 6 members
6
NIEL L
Level 2: 6 members
6
FINK E
Level 2: 10 members
10
GIUBIATO G
Level 2: 8

### Label propagation

In [171]:
cg_label = community.label_propagation_communities(G)

In [172]:
conmunity_stats(cg_label,1)

Level 1: 36 members
36
OVERGAARD S
DAHLSTROM D
SCHALOW F
BERTORELLO A
ENGELLAND C
ZOLLER D
ANDERSEN N
KINKAID J
WALSH P
CROWE B
BURCH M
O R
TRATTER A
ALTOBRANDO A
WRATHALL M
ALTMAN M
FAGENBLAT M
MCMANUS D
TANZER M
LANGBEHN C
DE L
BROGAN M
FERRER R
FLAMARIQUE L
RADINKOVIC Z
ADRIAN J
AVERCHI M
STUNKEL K
MORAN D
CALABRESSE C
O L
SERON D
BROWNLEE T
FERENCZ F
SCULT A
DOYON M
Level 1: 101 members
101
ELDRIDGE P
BLECHA I
GRUNER S
FORSTER W
TERZI P
MASI F
ARGUELLES F
HART J
BRUZINA R
FEYAERTS J
GHITTI J
HOGENOVA A
LOZAR J
CIBULKA J
GIUBIATO G
AGGLETON S
BJORK U
LIBERATI N
IKEDA Y
CHERNAVIN G
GRONDIN V
HART K
IJSSELING S
GUERRERO J
LEASK I
DETISTOVA A
JACOBS H
ALLOA E
HOMAN C
WEBER J
GIRARDI L
HUSSERL E
ASCARATE L
CORA G
BESOLI S
BECKMANN Z
FINK E
JOHNSON F
BARKER J
LORIES D
CRISTIN R
KINGWELL M
ALDEA A
GUILLEN G
GRONDIN J
DE P
INVERSO H
CAMILLERI S
HEINAMAA S
LEE E
BERTOLINI S
COPILAS E
GARCIA E
GROS A
GIUBILATO G
DAVIDSON S
DECLERCK G
GHANOTAKIS G
SHESTOVA E
VECINO M
LOSONCZ M
ALVIN J
CAVALLA

### Bipartition

In [173]:
cg_kg = community.kernighan_lin_bisection(G)

In [174]:
conmunity_stats(cg_kg,1)

Level 1: 89 members
89
ELDRIDGE P
BLECHA I
GRUNER S
FIELD J
FORSTER W
ARGUELLES F
GONNELLA S
FEYAERTS J
GHITTI J
HOGENOVA A
LOZAR J
CIBULKA J
BJORK U
AGGLETON S
LIBERATI N
IKEDA Y
HART K
GRONDIN V
CHERNAVIN G
IJSSELING S
GUERRERO J
LEASK I
DETISTOVA A
JACOBS H
ALLOA E
SHEEHAN T
GIRARDI L
HUSSERL E
ASCARATE L
CORA G
BESOLI S
BECKMANN Z
FINK E
JOHNSON F
BARKER J
LORIES D
CRISTIN R
KINGWELL M
GUILLEN G
CAPOBIANCO R
ALDEA A
DE P
INVERSO H
HEINAMAA S
CAMILLERI S
BERTOLINI S
COPILAS E
GARCIA E
GROS A
GIUBILATO G
DAVIDSON S
CROWE B
DECLERCK G
GHANOTAKIS G
LOSONCZ M
CAVALLARO M
LIEBSCH B
JONKUS D
HIMANKA J
CAI W
KENNEDY T
COSTELLOE T
GARRIDO J
BANHAM G
KOCHLER H
LAWLOR L
HENRY M
CABRERA C
BRENNAN E
DE N
ARANDATORRES C
CHRISTENSEN J
DE W
LELAND D
HOPKINS B
CROWELL S
HOBBS D
CERBONE D
BOROBIA J
ELLISTON F
LAHBIB O
JANER D
LUFT S
LOZANO A
BADGER L
DASTUR F
DEPRAZ N
KISIEL T
MARBACH E
Level 1: 90 members
90
DAHLSTROM D
ZOLLER D
HOLMES R
ANDERSEN N
KINKAID J
TERZI P
MASI F
ROGGERO J
HART J
THEODORO

### Greedy

In [61]:
cg_greedy = community.greedy_modularity_communities(G)

In [62]:
conmunity_stats(cg_greedy,1)

Level 1 58
Level 1 43
Level 1 39
Level 1 32
Level 1 7


### Fluid

In [175]:
cg_fluid = community.asyn_fluidc(G,5)

In [176]:
conmunity_stats(cg_fluid,1)

Level 1: 1 members
1
LEE N
Level 1: 38 members
38
HOLMES R
BRISART R
SOBERANO R
INVERSO H
IJSSELING S
HOPKINS B
MOONEY T
LEASK I
KRISTJANSSON K
HUGHES D
CAIRNS D
NEMETH T
BEGOUT B
HAUSER K
BRUDZINSKA J
EDIE J
ROPERO N
PINTOS P
RABANAQUE L
MICHEL B
BOJANIC P
ZIPPEL N
FLYNN T
HUSSERL E
SOKOLOWSKI R
SEPP H
DASTUR F
CORA G
JARAN F
BESOLI S
DONOHOE J
MORAN D
NI L
KWAN T
EMBREE L
GIUBIATO G
BEHNKE E
DOYON M
Level 1: 65 members
65
BARBER M
BARKER J
JOHNSON F
HENRY M
LORIES D
ELDRIDGE P
BRENNAN E
AGGLETON S
CRISTIN R
DE N
BJORK U
GUILLEN G
LIBERATI N
BLECHA I
ALDEA A
ARANDATORRES C
CHRISTENSEN J
KINGWELL M
DE P
CAMILLERI S
DE W
GRUNER S
LELAND D
HEINAMAA S
HART K
FORSTER W
COPILAS E
GARCIA E
GUERRERO J
ARGUELLES F
CIBULKA J
DETISTOVA A
CROWELL S
DAVIDSON S
DECLERCK G
GHANOTAKIS G
ALLOA E
MARBACH E
CERBONE D
LOSONCZ M
BOROBIA J
ELLISTON F
GHITTI J
JANER D
LAHBIB O
CAVALLARO M
LOZANO A
LIEBSCH B
GIRARDI L
HOGENOVA A
LUFT S
ASCARATE L
JONKUS D
CAI W
BADGER L
LOZAR J
COSTELLOE T
BECKMANN Z
KISIEL 

### LFR 
Failed

In [85]:
# n = 250
# tau1 = 3
# tau2 = 1.5
# mu = 0.1
# LFR = community.LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=5, min_community=20, seed=10)
# cg_LFR = {frozenset(LFR.nodes[v]['community']) for v in G}

## Draw graph

In [None]:
# nx.draw(G, node_size = 10, with_labels=True, font_size = 12)