# 

In [22]:
import numpy as np
import pandas as pd
import networkx as nx
import scipy as sp

In [25]:
edges = pd.DataFrame()
edges['sources'] = [1,1,1,2,2,3,3,4,4,5,5,5]
edges['targets'] = [2,4,5,3,1,2,5,1,5,1,3,4]
edges['weights'] = [1,1,1,1,1,1,1,1,1,1,1,1]

# vertex, direction and weight define a graph

# undirected graph
G = nx.from_pandas_edgelist(edges,source='sources',target='targets',edge_attr='weights')

# directed graph
# G = nx.from_pandas_edgelist(edges,source='sources',target='targets',edge_attr='weights',create_using=nx.DiGraph)

# Adjacency matrix
adj_df = nx.to_pandas_adjacency(G)
print(adj_df)

# degree
# undirected graph degree is the sum of in-degree and out-degree
# directed graph has in-degree and out-degree
print(nx.degree(G))

# subgraph

# connected graph and unconnected graph

# connected components

# undirected graph connected components
# means the biggest connected subgraph  
# connected graph has only one connected component
print(list(nx.connected_components(G)))

# directed graph connected components
# strong: each two nodes are reachable from each other
# weak: each two nodes are reachable from each other if we ignore the direction of edges

     1    2    4    5    3
1  0.0  1.0  1.0  1.0  0.0
2  1.0  0.0  0.0  0.0  1.0
4  1.0  0.0  0.0  1.0  0.0
5  1.0  0.0  1.0  0.0  1.0
3  0.0  1.0  0.0  1.0  0.0
[(1, 3), (2, 2), (4, 2), (5, 3), (3, 2)]
[{1, 2, 3, 4, 5}]


In [29]:
# diameter
# the longest shortest path between any two nodes in a connected graph
print(nx.diameter(G))

# 度中心性 (Degree Centrality)
# Normalized by N-1 (N is the number of nodes in the graph)
print(nx.degree_centrality(G))

# 特征向量中心性 (Eigenvector Centrality)
# adjacency matrix's eigenvector corresponding to the largest eigenvalue
# Ax = λx 相当于对x做了一个线性变换，λ表示x被拉伸的倍数, x表示变换的方向
matrix = adj_df.values
eigvals, eigvecs = np.linalg.eig(matrix)
print("特征值：", eigvals)
print("特征向量：", eigvecs)

print(nx.eigenvector_centrality(G))

2
{1: 0.75, 2: 0.5, 4: 0.5, 5: 0.75, 3: 0.5}
特征值： [ 2.48119430e+00 -2.00000000e+00 -1.17008649e+00 -1.92682876e-16
  6.88892183e-01]
特征向量： [[-5.29899099e-01 -5.00000000e-01 -4.32486630e-01 -5.00000000e-01
  -1.79338395e-01]
 [-3.57751240e-01  5.00000000e-01  1.99294651e-01 -5.00000000e-01
   5.76450945e-01]
 [-4.27132287e-01 -6.95843372e-16  7.39238740e-01 -1.97119045e-16
  -5.20657368e-01]
 [-5.29899099e-01  5.00000000e-01 -4.32486630e-01  5.00000000e-01
  -1.79338395e-01]
 [-3.57751240e-01 -5.00000000e-01  1.99294651e-01  5.00000000e-01
   5.76450945e-01]]
{1: 0.5298988890761731, 2: 0.35775191431708964, 4: 0.4271316779596084, 5: 0.5298988890761731, 3: 0.35775191431708964}


In [30]:
# betweeness
# the proportion of shortest paths that pass through a node
print(nx.betweenness_centrality(G))

# closeness
# the reciprocal of the sum of the shortest path distances from a node to all other nodes
print(nx.closeness_centrality(G))

# pagerank
# the probability of arriving at each node by randomly following edges
print(nx.pagerank(G))

# HITS
# Hubs and Authorities
# Hubs point to many Authorities, and Authorities are pointed to by many Hubs
print(nx.hits(G))


{1: 0.25, 2: 0.08333333333333333, 4: 0.0, 5: 0.25, 3: 0.08333333333333333}
{1: 0.8, 2: 0.6666666666666666, 4: 0.6666666666666666, 5: 0.8, 3: 0.6666666666666666}
{1: 0.24369622576678, 2: 0.17225629712058643, 4: 0.16809495422526696, 5: 0.24369622576678, 3: 0.1722562971205864}
({1: 0.24059715204600782, 2: 0.16243456471667697, 4: 0.19393656647463042, 5: 0.24059715204600782, 3: 0.16243456471667694}, {1: 0.2405971520460078, 2: 0.16243456471667694, 4: 0.19393656647463048, 5: 0.2405971520460078, 3: 0.162434564716677})
