# Imports

In [1]:
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd

from tqdm import tqdm

%matplotlib inline

# Importing & Building the Network

In [2]:
weighted_network = nx.MultiDiGraph()

weighted_net_df = pd.read_csv('../../datasets/in/weighted_academic_graph.txt',
                              sep='\t', names=['from', 'to', 'year', 'weight'],
                              dtype={'from': str, 'to': str, 'year': int,
                                     'weight': int})

print('\n{}\n'.format(weighted_net_df.info()))

states = sorted(set(weighted_net_df['from']).union(set(weighted_net_df['to'])))
mapping = dict()

for index, state in enumerate(states):
    mapping[state] = index

for state in tqdm(states, desc='ADDING NODES'):
    weighted_network.add_node(mapping[state], state=state)

for row in tqdm(weighted_net_df.itertuples(index=False),
                desc='CONNECTING NODES', total=96185):
    weighted_network.add_edge(mapping[getattr(row, '_0')],
                              mapping[getattr(row, 'to')],
                              year=getattr(row, 'year'),
                              weight=getattr(row, 'weight'))

ADDING NODES: 100%|██████████| 184/184 [00:00<00:00, 149825.65it/s]
CONNECTING NODES:  15%|█▍        | 14049/96185 [00:00<00:00, 140482.77it/s]

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 96185 entries, 0 to 96184
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   from    96185 non-null  object
 1   to      96185 non-null  object
 2   year    96185 non-null  int64 
 3   weight  96185 non-null  int64 
dtypes: int64(2), object(2)
memory usage: 2.9+ MB

None



CONNECTING NODES: 100%|██████████| 96185/96185 [00:00<00:00, 146687.63it/s]


# Number of Nodes, Edges and Selfloops

In [3]:
print('\nNODES: {}, EDGES: {}\n'.format(weighted_network.number_of_nodes(),
                                        weighted_network.number_of_edges()))

print('NUMBER OF SELFLOOPS: {}\n'
      .format(len([e for e in weighted_network.edges() if e[0] == e[1]])))


NODES: 184, EDGES: 96185

NUMBER OF SELFLOOPS: 0



# Network's representation

In [None]:
pos = nx.spring_layout(weighted_network, k=2.5)

nx.draw_networkx_nodes(weighted_network, pos=pos,
                       nodelist=dict(weighted_network.in_degree()).keys(),
                       node_size=[in_d * 1 for in_d in
                                  dict(weighted_network.in_degree()).values()],
                       alpha=0.5)
nx.draw_networkx_edges(weighted_network, pos, width=0.5, arrowsize=7,
                       alpha=0.1)

labels = dict()
for n in weighted_network.nodes(data=True):
    labels[n[0]] = n[1]['state']
nx.draw_networkx_labels(weighted_network, pos, labels, font_size=3,
                        font_color='#ff4d4d')

plt.title("The Nodes' sizes correspond to their In-Degrees")
plt.suptitle('Original Network')
plt.axis('off')
plt.savefig('./images/graph.pdf', format='pdf')
plt.close()

# Degree distribution

In [4]:
degree = sorted([d for n, d in weighted_network.degree()])
in_degree = sorted([d for n, d in weighted_network.in_degree()])
out_degree = sorted([d for n, d in weighted_network.out_degree()])

print('MAX DEGREE: {}, MIN DEGREE: {}, AVERAGE DEGREE: {}\n'
      'MAX IN DEGREE:{}, MIN IN DEGREE: {}, AVERAGE IN DEGREE: {}\n'
      'MAX OUT DEGREE: {}, MIN OUT DEGREE: {}, AVERAGE OUT DEGREE: {}\n'
      .format(max(degree), min(degree), np.mean(degree), max(in_degree),
              min(in_degree), np.mean(in_degree), max(out_degree),
              min(out_degree), np.mean(out_degree)))

for i, d in enumerate([degree, in_degree, out_degree]):
    title, file_name, d_type = None, None, None
    max_d, min_d = max(d), min(d)
    max_nodes, min_nodes = list(), list()

    if i == 0:
        title, file_name = 'Degree distribution', './images/degree_hist.pdf'
        d_type = 'DEGREE'
        max_nodes = [d['state'] for n, d in weighted_network.nodes(data=True)
                     if weighted_network.degree(n) == max_d]
        min_nodes = [d['state'] for n, d in weighted_network.nodes(data=True)
                     if weighted_network.degree(n) == min_d]
    elif i == 1:
        title, file_name = \
            'Incoming Degrees distribution', './images/indegree_hist.pdf'
        d_type = 'IN DEGREE'
        max_nodes = [d['state'] for n, d in weighted_network.nodes(data=True)
                     if weighted_network.in_degree(n) == max_d]
        min_nodes = [d['state'] for n, d in weighted_network.nodes(data=True)
                     if weighted_network.in_degree(n) == min_d]
    else:
        title, file_name = \
            'Outgoing Degrees distribution', './images/outdegree_hist.pdf'
        d_type = 'OUT DEGREE'
        max_nodes = [d['state'] for n, d in weighted_network.nodes(data=True)
                     if weighted_network.out_degree(n) == max_d]
        min_nodes = [d['state'] for n, d in weighted_network.nodes(data=True)
                     if weighted_network.out_degree(n) == min_d]

    print('NODES WITH MAX {}: {}, NODES WITH MIN DEGREE: {}'
          .format(d_type, max_nodes, min_nodes))

    plt.hist(d, bins='sturges', density='True', cumulative=-1, log=True)
    plt.title(title)
    plt.xlabel(r'$k$')
    plt.ylabel(r'$p_k$')
    plt.savefig(file_name, format='pdf', bbox_inches='tight')
    plt.close()

MAX DEGREE: 10018, MIN DEGREE: 1, AVERAGE DEGREE: 1045.4891304347825
MAX IN DEGREE:4963, MIN IN DEGREE: 0, AVERAGE IN DEGREE: 522.7445652173913
MAX OUT DEGREE: 5055, MIN OUT DEGREE: 0, AVERAGE OUT DEGREE: 522.7445652173913

NODES WITH MAX DEGREE: ['United States'], NODES WITH MIN DEGREE: ['Aruba', 'Guinea']
NODES WITH MAX IN DEGREE: ['United States'], NODES WITH MIN DEGREE: ['Aruba']
NODES WITH MAX OUT DEGREE: ['United States'], NODES WITH MIN DEGREE: ['Guinea', 'Saint Lucia']


# CONNECTEDNESS

In [6]:
if nx.is_weakly_connected(weighted_network):
    if nx.is_strongly_connected(weighted_network):
        cc = nx.number_strongly_connected_components(weighted_network)

        print('\nTHE NETWORK IS STRONGLY CONNECTED WITH {} '
              'CONNECTED COMPONENT(S)'.format(cc))
    else:
        cc = nx.number_weakly_connected_components(weighted_network)

        print('\nTHE NETWORK IS WEAKLY CONNECTED WITH {} '
              'CONNECTED COMPONENT(S)'.format(cc))


THE NETWORK IS WEAKLY CONNECTED WITH 1 CONNECTED COMPONENT(S)


# CLUSTERING COEFFICIENT

In [5]:
weighted_network_digraph = nx.DiGraph()

for state in states:
    weighted_network_digraph.add_node(mapping[state], state=state)

for n_1 in weighted_network.nodes():
    for n_2 in weighted_network.nodes():
        data = None

        if weighted_network.has_edge(n_1, n_2):
            data = [e[2] for e in weighted_network.edges(n_1, data=True)
                    if e[1] == n_2]

            weighted_network_digraph.add_edge(n_1, n_2, data=data)

pos = nx.spring_layout(weighted_network_digraph, k=2.5)

nx.draw_networkx_nodes(weighted_network_digraph, pos,
                       nodelist=dict(weighted_network_digraph.in_degree()).
                       keys(),
                       node_size=[in_d * 1 for in_d in
                                  dict(weighted_network_digraph.in_degree()).
                                  values()],
                       alpha=0.6)
nx.draw_networkx_edges(weighted_network_digraph, pos, width=0.5, arrowsize=7,
                       alpha=0.1)

labels = dict()
for n in weighted_network_digraph.nodes(data=True):
    labels[n[0]] = n[1]['state']
nx.draw_networkx_labels(weighted_network_digraph, pos, labels, font_size=3,
                        font_color='#ff4d4d')

plt.title("The Nodes' sizes correspond to their In-Degrees")
plt.suptitle('Condensed Network')
plt.axis('off')
plt.savefig('./images/graph_condensed.pdf', format='pdf')
plt.close()

plt.hist(nx.clustering(weighted_network_digraph).values(), bins='sturges')
plt.title('Clustering Coefficient per Node')
plt.xlabel(r'$C_k$')
plt.ylabel(r'$\# k$')
plt.savefig('./images/clustering_coefficient.pdf', format='pdf', 
            bbox_inches='tight')
plt.close()

print('\nAVERAGE CLUSTERING COEFFICIENT: {}\n'
      .format(nx.average_clustering(weighted_network_digraph)))


AVERAGE CLUSTERING COEFFICIENT: 0.808497707955442



# SHORTEST PATHS

In [7]:
paths = nx.shortest_path(weighted_network)
avg_sp_per_node = dict()

for n_1 in weighted_network.nodes():
    avg_sp_per_node[n_1] = list()

    for edge in weighted_network.edges([n_1]):
        if edge not in avg_sp_per_node[n_1]:
            avg_sp_per_node[n_1].append(edge)

    lengths = [len(paths[e[0]][e[1]]) for e in avg_sp_per_node[n_1]]

    if len(lengths) == 0:
        del avg_sp_per_node[n_1]
    else:
        avg_sp_per_node[n_1] = np.mean(lengths)

plt.hist(avg_sp_per_node.values())
plt.title('Average Shortest Path per Node')
plt.xlabel(r'Length')
plt.ylabel(r'# of Shortest Paths')
plt.savefig('./images/avg_shortest_paths.pdf', format='pdf', 
            bbox_inches='tight')
plt.close()

print('AVERAGE SHORTEST PATH LENGTH: {}'
      .format(nx.average_shortest_path_length(weighted_network)))

AVERAGE SHORTEST PATH LENGTH: 1.7180743644571157
