# Analysis of Structural Properties

In [3]:
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import nxviz as nv
from collections import Counter

%matplotlib inline

## Node degree, network average degree, degree distribution

In [9]:
# input edgelist from file
G = nx.read_edgelist("../datafiles/social/facebook/fb.txt")

In [10]:
degrees = [deg for node, deg in nx.degree(G)]

In [11]:
# kmin - minimum degree
kmin = np.min(degrees)

# kmax - maximum degree
kmax = np.max(degrees)

# kavg - average degree of the network
kavg = np.mean(degrees)

**Degree distribution**: helps us understand connectivity trends in networks and how edges are distributed among nodes (does everyone have similar number of connections, or do we have hubs, nodes with significantly higher number of connections?)

In [12]:
def degree_distr(net):
    degrees = dict(net.degree()) 
    hist = list(Counter(degrees.values()).items()) 
    hist.sort(key=lambda x:x[0])
    hist = np.array(hist)
    return hist

## Paths on Networks: average path length, shortest path, longest path

In [20]:
G=nx.Graph()
# assign weight to edge
G.add_edge('Mary','Steven', weight=5)

In [21]:
# average path length in graph
nx.average_shortest_path_length(G, weight=None)

1.0

## 3. Clustering coefficient, triangles

In [22]:
# triangles
nx.triangles(G)

{'Mary': 0, 'Steven': 0}

In [23]:
# clustering coefficient of a node
nx.clustering(G, 'Mary')

0

In [24]:
# clustering coefficient of all nodes (returns a dictionary)
nx.clustering(G)

{'Mary': 0, 'Steven': 0}

In [25]:
# clustering coefficient of the network
cc = nx.clustering(G)
avg_clust = sum(cc.values()) / len(cc)
print(avg_clust)

0.0


## 4. Centrality measures

In [26]:
# degree centrality
nx.degree_centrality(G)

{'Mary': 1.0, 'Steven': 1.0}

In [27]:
# betweenness centrality of network
nx.betweenness_centrality(G)

{'Mary': 0.0, 'Steven': 0.0}

In [28]:
# closeness centrality of network
nx.closeness_centrality(G)

{'Mary': 1.0, 'Steven': 1.0}

In [29]:
# eigenvector centrality of network
nx.eigenvector_centrality(G)

{'Mary': 0.7071067811865476, 'Steven': 0.7071067811865476}

## Components

In [30]:
# checks whether the network is connected
nx.is_connected(G)

True

In [31]:
# find number of connected components
nx.number_connected_components(G)

1

In [32]:
# get the nodes in the same component as *n*
nx.node_connected_component(G, 'Mary')

{'Mary', 'Steven'}

## 6. Assortativity

* Pearson correlation coefficient [-1; 1]
* social networks are highly assortative (homophily): high degree nodes connect to other high degree nodes
* technological are disassortative: high degree nodes connect to low degree nodes

Assortativity computed based on:
* degree

In [34]:
# Barabasi-Albert (scale-free) network 
ba = nx.barabasi_albert_graph(50, 3)

In [35]:
nx.degree_assortativity_coefficient(ba)

-0.14488106841772236