In [17]:
from numpy  import *
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
from scipy.stats import pearsonr
import math

# Hamsterster

In [2]:
# If the data file has only two columns, use this:
G= G=nx.read_edgelist("data/hamsterster.txt", nodetype=int)

In [3]:
G = G.to_undirected()
G.remove_edges_from(nx.selfloop_edges(G))
Gcc = sorted(nx.connected_components(G), key=len, reverse=True)
G = G.subgraph(Gcc[0])
G = nx.convert_node_labels_to_integers(G, first_label=0)
N = len(G)
M = G.number_of_edges()
print('Number of nodes:', N)
print('Number of edges:', M)

Number of nodes: 1788
Number of edges: 12476


In [4]:
if nx.is_connected(G) == True:
    l = nx.average_shortest_path_length(G)
    print("Average shortest path length:", "%3.4f"%l)
else:
    print("The graph has more than one connected component")

d = nx.diameter(G)
print('Network diameter:', d)

Average shortest path length: 3.4526
Network diameter: 14


# USairport500

In [5]:
# If the data file has only two columns, use this:
G= G=nx.read_edgelist("data/USairport500.txt", nodetype=int)

In [6]:
# If the data file has only two columns, use this:
G= G=nx.read_edgelist("data/USairport500.txt", nodetype=int)
G = G.to_undirected()
G.remove_edges_from(nx.selfloop_edges(G))
Gcc = sorted(nx.connected_components(G), key=len, reverse=True)
G = G.subgraph(Gcc[0])
G = nx.convert_node_labels_to_integers(G, first_label=0)
N = len(G)
M = G.number_of_edges()
print('Number of nodes:', N)
print('Number of edges:', M)

Number of nodes: 500
Number of edges: 2980


In [7]:
if nx.is_connected(G) == True:
    l = nx.average_shortest_path_length(G)
    print("Average shortest path length:", "%3.4f"%l)
else:
    print("The graph has more than one connected component")

d = nx.diameter(G)
print('Network diameter:', d)

Average shortest path length: 2.9910
Network diameter: 7


In [8]:
E = nx.global_efficiency(G)
print('Network efficiency', E)
leff = nx.local_efficiency(G)
print('The average local efficiency of the network:', leff)

Network efficiency 0.3713796736325798
The average local efficiency of the network: 0.6866344366650847


# Advogato

In [9]:
# If the data file has only two columns, use this:
G= G=nx.read_edgelist("data/Advogato.txt", nodetype=int)
G = G.to_undirected()
G.remove_edges_from(nx.selfloop_edges(G))
Gcc = sorted(nx.connected_components(G), key=len, reverse=True)
G = G.subgraph(Gcc[0])
G = nx.convert_node_labels_to_integers(G, first_label=0)
r=nx.degree_assortativity_coefficient(G)
print("Assortativity = ","%3.4f"%r)

Assortativity =  -0.0957


# USairport500


In [35]:
# If the data file has only two columns, use this:
G= G=nx.read_edgelist("data/USairport500.txt", nodetype=int)
G = G.to_undirected()
G.remove_edges_from(nx.selfloop_edges(G))
Gcc = sorted(nx.connected_components(G), key=len, reverse=True)
G = G.subgraph(Gcc[0])
G = nx.convert_node_labels_to_integers(G, first_label=0)


In [36]:
shortest_path_lengths = dict(nx.all_pairs_shortest_path_length(G))
largest_component_nodes = list(G.nodes)

shortest_path_lengths_largest_component = {
    node: {k: v for k, v in lengths.items() if k in largest_component_nodes}
    for node, lengths in shortest_path_lengths.items()
}
path_lengths = [
    length
    for lengths in shortest_path_lengths_largest_component.values()
    for length in lengths.values()
]
length_counts = np.bincount(path_lengths)


total_paths = sum(length_counts)
path_probabilities = length_counts / total_paths

# Calcular a entropia de Shannon
shannon_entropy = -np.sum(path_probabilities * np.log2(path_probabilities))

print("Entropia de Shannon dos comprimentos dos menores caminhos:", shannon_entropy)

Entropia de Shannon dos comprimentos dos menores caminhos: 1.9007137451744507


# word_adjacencies

In [53]:
# Carrega a rede de palavras
G = nx.read_edgelist("data/word_adjacencies.txt")

# Encontra o maior componente conectado
largest_cc = max(nx.connected_components(G), key=len)
G = G.subgraph(largest_cc)

# Calcula o grau de cada nó
vk = dict(G.degree())

# Calcula os graus dos nós conectados por uma aresta
ki = []
kj = []
for edge in G.edges():
    ki.append(vk[edge[0]])
    kj.append(vk[edge[1]])

# Calcula a correlação de Pearson entre os graus dos nós
corr, _ = pearsonr(ki, kj)
print('Coeficiente de correlação de Pearson entre graus dos nós:', corr)

# Calcula o grau médio dos vizinhos para cada nó
knn = []
for i in G.nodes():
    aux = nx.average_neighbor_degree(G, nodes=[i])
    knn.append(float(aux[i]))
knn = np.array(knn)
print("Grau médio dos vizinhos da rede:", "%3.2f" % np.mean(knn))

# Calcula a correlação entre k e knn(k)
knnk = []
ks = []
for k in np.arange(np.min(list(vk.values())), np.max(list(vk.values())) + 1):
    aux = [vk[node] == k for node in G.nodes()]
    if len(knn[aux]) > 0:
        av_knn = np.mean(knn[aux])
        knnk.append(av_knn)
        ks.append(k)

# Calcula a correlação de Pearson entre k e knn(k)
rho = np.corrcoef(ks, knnk)[0, 1]
print('Coeficiente de correlação de Pearson entre k e knn(k):', rho)

Coeficiente de correlação de Pearson entre graus dos nós: 0.006684810197378903
Grau médio dos vizinhos da rede: 14.76
Coeficiente de correlação de Pearson entre k e knn(k): -0.710832214935246
