In [1]:
import numpy as np
import matplotlib.pyplot as plt
import math
import networkx as nx
from scipy.stats import pearsonr

### Questão 1

In [2]:
hamsterster = nx.read_edgelist("hamsterster.txt", nodetype=int)
hamsterster = hamsterster.to_undirected()
hamsterster.remove_edges_from(nx.selfloop_edges(hamsterster))
Gcc = sorted(nx.connected_components(hamsterster), key=len, reverse=True)
hamsterster = hamsterster.subgraph(Gcc[0])
hamsterster = nx.convert_node_labels_to_integers(hamsterster, first_label=0)

In [3]:
if nx.is_connected(hamsterster) == True:
    l = nx.average_shortest_path_length(hamsterster)
    print("Average shortest path length:", "%3.4f"%l)
else:
    print("The graph has more than one connected component")

Average shortest path length: 3.4526


In [4]:
d = nx.diameter(hamsterster)
print('Network diameter:', d)

Network diameter: 14


### Questão 2

In [5]:
USairport = nx.read_edgelist("USairport500.txt", nodetype = int)

In [6]:
USairport = USairport.to_undirected()
USairport.remove_edges_from(nx.selfloop_edges(USairport))
Gcc = sorted(nx.connected_components(USairport), key = len, reverse = True)
USairport = USairport.subgraph(Gcc[0])
USairport = nx.convert_node_labels_to_integers(USairport, first_label = 0)

In [7]:
if nx.is_connected(USairport) == True:
    l = nx.average_shortest_path_length(USairport)
    print("Average shortest path length:", "%3.4f"%l)
else:
    print("The graph has more than one connected component")

Average shortest path length: 2.9910


In [8]:
N = len(USairport)
if nx.is_connected(USairport) == True:
    D = np.zeros(shape=(N,N)) # D is the matrix of distances
    vl = []
    for i in np.arange(0,N):
        for j in np.arange(i+1, N):
            if(i != j):
                aux = nx.shortest_path(USairport,i,j)
                dij = len(aux)-1
                D[i][j] = dij
                D[j][i] = dij
                vl.append(dij)

In [9]:
print("Variancia: ", np.sqrt(np.std(D)))

Variancia:  0.9555674829910944


### Questão 3

In [10]:
USairport = nx.read_edgelist("USairport500.txt", nodetype = int)
Gcc = sorted(nx.connected_components(USairport), key = len, reverse = True)
USairport = USairport.subgraph(Gcc[0])
USairport = nx.convert_node_labels_to_integers(USairport, first_label = 0)

In [11]:
def degree_distribution(vk):
    maxk = np.max(vk)
    mink = np.min(vk)
    kvalues= np.arange(0,maxk+1) 
    Pk = np.zeros(maxk+1) 
    for k in vk:
        Pk[k] = Pk[k] + 1
    Pk = Pk/sum(Pk) 
    return kvalues,Pk

In [12]:
def shannon_entropy(G):
    k,Pk = degree_distribution(G)
    H = 0
    for p in Pk:
        if(p > 0):
            H = H - p*math.log(p, 2)
    return H

In [13]:
shannon = shannon_entropy(vl)
print("Entropia de Shannon: ",shannon)

Entropia de Shannon:  1.883667007854659


### Questão 4

In [14]:
advogato = nx.read_edgelist("advogato.txt", nodetype = int)

In [15]:
Gcc = sorted(nx.connected_components(advogato), key = len, reverse = True)
advogato = advogato.subgraph(Gcc[0])
advogato = nx.convert_node_labels_to_integers(advogato, first_label = 0)

In [16]:
r=nx.degree_assortativity_coefficient(advogato)
print("Assortativity = ","%3.4f"%r)

Assortativity =  -0.0846


### Questão 5

In [17]:
word = nx.read_edgelist("word_adjacencies.txt", nodetype = int)

In [18]:
knn = []
for i in word.nodes():
    aux =  nx.average_neighbor_degree(word, nodes = [i])
    knn.append(float(aux[i]))
knn = np.array(knn)
print("Average degree of the neighborhood of the network:", "%3.2f"%np.mean(knn))

Average degree of the neighborhood of the network: 14.76


In [19]:
vk = dict(word.degree())
vk = list(vk.values())

In [20]:
knnk = list()
ks = list()
for k in np.arange(np.min(vk), np.max(vk)):
    aux = vk == k
    if(len(knn[aux]) > 0):
        av_knn = np.mean(knn[aux]) #average clustering among all the nodes with degree k
        knnk.append(av_knn)
        ks.append(k)

In [21]:
rho = np.corrcoef(ks, knnk)[0,1]
print('Pearson correlation coefficient:', rho)

Pearson correlation coefficient: -0.6753041480047248
