# Python社会网络分析初步

社会网络通常被抽象化为数学上的图，一个**图（graph）**由一组**节点（vertice）**和一组**边（edge）**组成。每条边表示顶点之间的连接。如果两个顶点通过一条边连接，则它们是**邻居（neighbor）**，一个顶点的**度（degree）**是其邻居的数量。

NetworkX是研究图和网络的一个常用模块，包含了大量图论算法，在分析复杂网络方面非常高效。先安装该模块：

In [ ]:
!pip install networkx

### 示例：寻找最短路径

In [ ]:
import networkx as nx
g = nx.Graph()
g.add_edge('a', 'b', weight=0.1)
g.add_edge('b', 'c', weight=1.5)
g.add_edge('a', 'c', weight=1.0)
g.add_edge('c', 'd', weight=2.2)

In [ ]:
nx.draw_networkx(g)

In [ ]:
nx.shortest_path(g, 'b', 'd')

In [ ]:
nx.shortest_path(g, 'b', 'd', weight='weight')

### 加入节点

In [ ]:
g = nx.Graph()

In [ ]:
g.add_node(1)

In [ ]:
g.add_nodes_from([2, 3])

In [ ]:
nx.draw(g)

### 生成图

In [ ]:
h = nx.path_graph(5)
h.nodes

In [ ]:
nx.draw(h)

### 从其他图添加节点（不保留边）

In [ ]:
g.add_nodes_from(h)

In [ ]:
nx.draw(g)

### 移除节点

In [ ]:
g.remove_nodes_from([0,1])

In [ ]:
nx.draw(g)

### 节点可以是各种对象

In [ ]:
import math
g = nx.Graph()
g.add_node('string')
g.add_node(math.cos) # cosine function
f = open('temp.txt', 'w') # file handle
g.add_node(f)

In [ ]:
g.nodes()

### 添加边的方法

In [ ]:
g = nx.Graph()
g.add_edge(1, 2)
e = (2, 3)
g.add_edge(*e)

In [ ]:
nx.draw(g)

In [ ]:
g.add_edges_from([(1, 2), (1, 3)])

In [ ]:
g.add_edges_from(h.edges())

In [ ]:
nx.draw(g)

In [ ]:
el = [(x,x+1) for x in range(10)]

In [ ]:
g = nx.Graph()
g.add_edges_from(el)

In [ ]:
nx.draw(g)

In [ ]:
g.remove_edge(1,2)

In [ ]:
nx.draw(g)

### 获取图的属性

In [ ]:
g.number_of_nodes()

In [ ]:
g.number_of_edges()

In [ ]:
g.nodes()

In [ ]:
g.edges()

### 获取某节点的邻居

In [ ]:
list(g.neighbors(3))

### 获取某节点的度（邻居数量）

In [ ]:
g.degree()

### 添加节点或边时，可以任意加入自定义的属性

In [ ]:
g.add_node(1, time='10am',date='924',year='1998')
g.node[1]['date']

In [ ]:
g.node[1]

In [ ]:
g.add_edge(1, 2, weight=4.0)

In [ ]:
g[1][2]['weight']

In [ ]:
g[1][2]

### 节点和边的迭代

In [ ]:
for node in g.nodes():
    print(node,g.degree(node))

In [ ]:
g.edges(data=True)

In [ ]:
g.add_edge(1, 3, weight=2.5)
g.add_edge(1, 2, weight=1.5)
for n1, n2, attr in g.edges(data=True):
    print(n1, n2, attr.get('weight'))

## 有向图

In [ ]:
dg = nx.DiGraph()
dg.add_weighted_edges_from([(1, 4, 0.5), (3, 1, 0.75)])
nx.draw_networkx(dg)

In [ ]:
dg.out_degree(1,weight='weight')

In [ ]:
dg.degree(1, weight='weight')

In [ ]:
list(dg.successors(1))

In [ ]:
list(dg.predecessors(1))

### 有向图转换为无向图

In [ ]:
dg = nx.DiGraph()
dg.add_edges_from([(1,2),(2,3),(3,2)])
nx.draw(dg)

In [ ]:
nx.draw(dg.to_undirected())

### 生成经典的图模型

In [ ]:
nx.draw(nx.complete_graph(10))

In [ ]:
nx.draw(nx.complete_bipartite_graph(3, 5))

In [ ]:
nx.draw(nx.barbell_graph(10, 10))

In [ ]:
nx.draw(nx.lollipop_graph(10, 20))

#### Erdős–Rényi随机图

In [ ]:
nx.draw(nx.erdos_renyi_graph(100, 0.15))

#### Barabási–Albert无标度网络

In [ ]:
g = nx.barabasi_albert_graph(1000, 5)

In [ ]:
nx.draw(g)

In [ ]:
import seaborn as sns
sns.distplot([d for n,d in g.degree()],kde=False)

#### Watts–Strogatz小世界图

In [ ]:
g = nx.watts_strogatz_graph(100, 8, 0.1)

In [ ]:
nx.draw(g)

In [ ]:
nx.draw_circular(g)

## 分析Twitter数据集

### 读取文件，构造关注网络（有向图）

[https://snap.stanford.edu/data/ego-Twitter.html](https://snap.stanford.edu/data/ego-Twitter.html)

In [ ]:
dG = nx.read_edgelist('twitter_combined.txt',create_using=nx.DiGraph)

In [ ]:
dG.number_of_edges()

### 构造互粉网络（无向图）

In [ ]:
uG = dG.to_undirected()

In [ ]:
uG.number_of_nodes()

In [ ]:
uG.number_of_edges()

### 互粉网络的度分布

In [ ]:
degrees = [d for n,d in uG.degree() if d > 100]

In [ ]:
sns.distplot(degrees,bins=200,kde=False)

### 平均度

In [ ]:
import numpy as np
np.mean(degrees)

### 计算强连通分量个数

In [ ]:
nx.number_strongly_connected_components(dG)

### 计算弱连通分量个数

In [ ]:
nx.number_weakly_connected_components(dG)

### 计算集聚系数

In [ ]:
node = np.random.choice(uG.node)
nx.clustering(uG,node)

In [ ]:
nx.average_clustering(uG)

### 计算节点中心性

In [ ]:
cG,*_ = nx.connected_component_subgraphs(uG)

In [ ]:
cG.order()

#### 中介中心性

In [ ]:
nx.betweenness_centrality(cG)

#### 接近中心性

In [ ]:
nx.closeness_centrality(cG)

#### 特征向量中心性

In [ ]:
nx.eigenvector_centrality_numpy(cG)

### 找到中心性最大的节点

In [ ]:
eic = nx.eigenvector_centrality_numpy(cG)
s_nodes = sorted(eic.items(),key=lambda x:x[1],reverse=True)

In [ ]:
s_nodes[:10]

### 用gephi进行可视化

[https://gephi.org/](https://gephi.org/)

In [ ]:
nx.write_gexf(uG,'twitter.gefx')