In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

In [3]:
import operator
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt



def save_highest_10(degree_lst, filename):
    df = pd.DataFrame(degree_lst, columns=['channel', 'degree'])
    df.to_csv('results/' + filename + '.csv')


    
def global_indeces(graph_names_lst):
    global_cc = []
    avg_path = []
    for g in graph_names_lst:

        adj = np.load('../part_1/results/npy/' + g + '.npy')
        G = nx.from_numpy_matrix(adj, create_using=nx.DiGraph())
        
        if nx.is_weakly_connected(G):
            global_cc.append(nx.average_clustering(G))
            avg_path.append(nx.average_shortest_path_length(G))
            
        else:
            global_cc.append(nx.average_clustering(G))
            
            G_un = nx.from_numpy_matrix(adj)
            connected_components = nx.connected_component_subgraphs(G_un)
            subgraphs = map(lambda smallGraph : G.subgraph(smallGraph.nodes()), connected_components)
            avgShortestPaths = np.mean(list(map(lambda x: nx.average_shortest_path_length(x), subgraphs )))
            avg_path.append(avgShortestPaths)

    return(global_cc, avg_path)



def global_plot(global_cc, avg_path, filename):
    densities = [5, 10, 20, 30, 50]
    
    plt.subplot(1,2,1)
    plt.plot(densities, global_cc, 'o-')
    plt.title('global_cc - PDC')

    plt.subplot(1,2,2)
    plt.plot(densities, avg_path, 'o-')
    plt.title('avg_path - PDC')
    #plt.show()
    plt.savefig('results/' + filename + '.png')
    

def topology(G, filename):
    plt.figure(figsize=(20,10))
    plt.subplot(1,3,1)
    values=[]
    for x in list(G.degree):
        values.append(x[1])
    plt.hist(values)
    plt.xlabel('Degree')
    plt.ylabel('Frequency of nodes')
    plt.title('Degree')

    plt.subplot(1,3,2)
    values=[]
    for x in list(G.in_degree):
        values.append(x[1])
    plt.hist(values)
    plt.xlabel('Degree')
    plt.ylabel('Frequency of nodes')
    plt.title('In-Degree')

    plt.subplot(1,3,3)
    values=[]
    for x in list(G.out_degree):
        values.append(x[1])
    plt.hist(values)
    plt.xlabel('Degree')
    plt.ylabel('Frequency of nodes')
    plt.title('Out-Degree')
    
    plt.savefig('results/' + filename + '.png')

In [35]:
######## 2.1

### PDC

# Read data
adj_eo_pdc = np.load('../part_1/results/npy/eo_pdc_20.npy')
G_eo_pdc =nx.from_numpy_matrix(adj_eo_pdc, create_using=nx.DiGraph())

adj_ec_pdc = np.load('../part_1/results/npy/ec_pdc_20.npy')
G_ec_pdc =nx.from_numpy_matrix(adj_ec_pdc, create_using=nx.DiGraph())

## GLOBAL INDICES 

# Clustering Coefficient

CC_eo_pdc = nx.average_clustering(G_eo_pdc)
CC_ec_pdc = nx.average_clustering(G_ec_pdc)

# Average shortest path length

avg_path_eo_pdc = nx.average_shortest_path_length(G_eo_pdc)
avg_path_ec_pdc = nx.average_shortest_path_length(G_ec_pdc)


## LOCAL INDICES 

# degree 
degree_10_eo_pdc = sorted(list(G_eo_pdc.degree), reverse=True, key=operator.itemgetter(1))[:10]
save_highest_10(degree_10_eo_pdc, 'degree_eo_pdc')
degree_10_ec_pdc = sorted(list(G_ec_pdc.degree), reverse=True, key=operator.itemgetter(1))[:10]
save_highest_10(degree_10_ec_pdc, 'degree_ec_pdc')

# in_degree
in_degree_10_eo_pdc = sorted(list(G_eo_pdc.in_degree), reverse=True, key=operator.itemgetter(1))[:10]
save_highest_10(in_degree_10_eo_pdc, 'in_degree_eo_pdc')
in_degree_10_ec_pdc =sorted(list(G_ec_pdc.in_degree), reverse=True, key=operator.itemgetter(1))[:10]
save_highest_10(in_degree_10_ec_pdc, 'in_degree_ec_pdc')

# out_degree
out_degree_10_eo_pdc = sorted(list(G_eo_pdc.out_degree), reverse=True, key=operator.itemgetter(1))[:10]
save_highest_10(out_degree_10_eo_pdc, 'out_degree_eo_pdc')
out_degree_10_ec_pdc = sorted(list(G_ec_pdc.out_degree), reverse=True, key=operator.itemgetter(1))[:10]
save_highest_10(out_degree_10_ec_pdc, 'out_degree_ec_pdc')



######## 2.2

######## 2.3

### DTF

# Read data
adj_eo_dtf = np.load('../part_1/results/npy/eo_dtf_20.npy')
G_eo_dtf =nx.from_numpy_matrix(adj_eo_dtf, create_using=nx.DiGraph())

adj_ec_dtf = np.load('../part_1/results/npy/ec_dtf_20.npy')
G_ec_dtf =nx.from_numpy_matrix(adj_ec_dtf, create_using=nx.DiGraph())

## GLOBAL INDICES 

# Clustering Coefficient

CC_eo_dtf = nx.average_clustering(G_eo_dtf)
CC_ec_dtf = nx.average_clustering(G_ec_dtf)

# Average shortest path length

avg_path_eo_dtf = nx.average_shortest_path_length(G_eo_dtf)
avg_path_ec_dtf = nx.average_shortest_path_length(G_ec_dtf)

# save
cc = pd.DataFrame([[CC_eo_pdc, CC_ec_pdc], [CC_eo_dtf, CC_ec_dtf]], columns=['pdc', 'dtf'])
avg_path = pd.DataFrame([[avg_path_eo_pdc, avg_path_ec_pdc], [avg_path_eo_dtf, avg_path_ec_dtf]], columns=['pdc', 'dtf'])

cc.to_csv('results/clustering_coefficient.csv')
avg_path.to_csv('results/average_shortest_path.csv')


######## 2.4

lst_eo = ['eo_pdc_05', 'eo_pdc_10', 'eo_pdc_20', 'eo_pdc_30', 'eo_pdc_50']
lst_ec = ['ec_pdc_05', 'ec_pdc_10', 'ec_pdc_20', 'ec_pdc_30', 'ec_pdc_50']

global_cc_eo = global_indeces(lst_eo)[0]
avg_path_eo = global_indeces(lst_eo)[1]

global_cc_ec = global_indeces(lst_ec)[0]
avg_path_ec = global_indeces(lst_ec)[1]

# save
global_plot(global_cc_ec, avg_path_ec, 'global_indices_ec_pdc')
global_plot(global_cc_eo, avg_path_eo, 'global_indices_eo_pdc')


######## 2.5

topology(G_eo_pdc, 'topology_eo_pdc')
topology(G_eo_pdc, 'topology_ec_pdc')


######## 2.6

adj_ec2 = np.load('../part_1/results/npy/alt_ec_pdc_20.npy')
G_ec2 = nx.from_numpy_matrix(adj_ec2, create_using=nx.DiGraph())
adj_eo2 = np.load('../part_1/results/npy/alt_eo_pdc_20.npy')
G_eo2 = nx.from_numpy_matrix(adj_eo2, create_using=nx.DiGraph())


## GLOBAL INDICES 

CC_eo2 = nx.average_clustering(G_eo2)
CC_ec2 = nx.average_clustering(G_ec2)

avg_path_eo2 = nx.average_shortest_path_length(G_eo2)
avg_path_ec2 = nx.average_shortest_path_length(G_ec2)


df = pd.DataFrame([[CC_eo_pdc, CC_eo2, CC_ec_pdc, CC_ec2],
                      [avg_path_eo_pdc, avg_path_eo2, avg_path_ec_pdc, avg_path_ec2]], 
                     columns=['eo_pdc','alt_eo','ec_pdc','alt_ec'],
                 index = ['clustering_coefficient', 'avg_shortest_path_length'])
df.to_csv('results/graph_idx_comparison.csv')

## LOCAL INDICES 

# degree 
degree_10_eo2 = sorted(list(G_eo2.degree), reverse=True, key=operator.itemgetter(1))[:10]
save_highest_10(degree_10_eo2, 'degree_alt_eo')
degree_10_ec2 = sorted(list(G_ec2.degree), reverse=True, key=operator.itemgetter(1))[:10]
save_highest_10(degree_10_ec2, 'degree_alt_ec')

# in_degree
in_degree_10_eo2 = sorted(list(G_eo2.in_degree), reverse=True, key=operator.itemgetter(1))[:10]
save_highest_10(in_degree_10_eo2, 'in_degree_alt_eo')
in_degree_10_ec2 =sorted(list(G_ec2.in_degree), reverse=True, key=operator.itemgetter(1))[:10]
save_highest_10(in_degree_10_ec2, 'in_degree_alt_ec')

# out_degree
out_degree_10_eo2 = sorted(list(G_eo2.out_degree), reverse=True, key=operator.itemgetter(1))[:10]
save_highest_10(out_degree_10_eo2, 'out_degree_alt_eo')
out_degree_10_ec2 = sorted(list(G_ec2.out_degree), reverse=True, key=operator.itemgetter(1))[:10]
save_highest_10(out_degree_10_ec2, 'out_degree_alt_ec')



######## 2.7



In [14]:
####

# 1. trovare tutte le componenti connesse
# 2. per ogni componente selezionare i nodi
# calcolare avg del subgraph del grafo diretto
# media di tutto 


[1.5376984126984128, 2.804563492063492, 1.9796626984126984]