In [None]:
# In the following there are some example tasks to praactice basic concepts of network analysis
# Import the libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx

In [None]:
# In the networkx package, there are some historically important networks already included
# One of them is a small network depicting a karate club
# We can start to work with the network using the following code

karate_net = nx.karate_club_graph()

In [None]:
# Let's visualize the network
# You can do it with the basic draw function
# Extra task: if you checked the documentation on drawing, as recommended with a link in the centrality analsysi lecture
# Find the layout that structure the nodes in a circle, and use that for visualization

nx.draw_circular(karate_net)

In [None]:
# Check the number of nodes and total number of edges
# 34 nodes, 78 edges
print(len(karate_net.nodes()), 'nodes')
print(karate_net.size(), 'links')

In [None]:
# Do you find any attributes for nodes?
# Specifications of two clubs, Mr. Hi and Office´r

karate_net.nodes(data = True)

In [None]:
# What is the highest/lowest number of degree you can find in the network?
# One node with 17 links, 11 nodes with 2 links

karate_degrees = [karate_net.degree()[i] for i in karate_net.nodes()]

karate_degrees = pd.Series(karate_degrees)
karate_degrees.value_counts()

In [None]:
# Visualize the histogram of degrees
sns.histplot(karate_degrees)

In [None]:
# Calculate the average number of links a nde has in the karate network
# 4.59

karate_degrees.sum() / len(karate_net.nodes())

In [None]:
# Is the network connected?
# Yes

nx.is_connected(karate_net)

In [None]:
# What is the diameter of the network?
# 5

nx.diameter(karate_net)

In [None]:
# Filter the network to keep only the nodes for which the club attribute is Officer
# How many nodes are left? 17
# Is the remaining network still connected? Yes

officers =  [n for n, d in karate_net.nodes(data=True) if d['club'] == 'Officer']
print(len(officers))
karate_officer = karate_net.subgraph(officers)
nx.is_connected(karate_officer)

In [None]:
# what is the maximum degree in the filtered network, and what is the average number of degrees?
# Maximum is 14, average is 3.76

karate_officer_degrees = [karate_officer.degree()[i] for i in karate_officer.nodes()]

karate_officer_degrees = pd.Series(karate_officer_degrees)
print(karate_officer_degrees.value_counts())

karate_officer_degrees.sum() / len(karate_officer.nodes())

In [None]:
# Perform the above tasks but now by filtering the original karate net for club Mr. Hi
# Do you find higher avergae degree in this case?
# 17 nodes, 15 maximum degree, still connected, avergae degree is 4.12, higher than Officers

hi =  [n for n, d in karate_net.nodes(data=True) if d['club'] == 'Mr. Hi']
print(len(hi))
karate_hi = karate_net.subgraph(hi)
print(nx.is_connected(karate_hi))

karate_hi_degrees = [karate_hi.degree()[i] for i in karate_hi.nodes()]

karate_hi_degrees = pd.Series(karate_hi_degrees)
print(karate_hi_degrees.value_counts())

karate_hi_degrees.sum() / len(karate_hi.nodes())

In [None]:
# Let's continue with the original network without filtering
# Calculate the five centrality measures introduced in the course
# Collect the centrality values in a dataframe as done in the lecture example

deg = nx.degree_centrality(karate_net)
closeness = nx.closeness_centrality(karate_net)
betweenness = nx.betweenness_centrality(karate_net)
eigenvector = nx.eigenvector_centrality(karate_net)
pagerank = nx.pagerank(karate_net)

centrality_karate = pd.DataFrame()
centrality_karate['degree_centrality'] = deg.values()
centrality_karate['closeness_centrality'] = closeness.values()
centrality_karate['betweenness_centrality'] = betweenness.values()
centrality_karate['eigenvector_centrality'] = eigenvector.values()
centrality_karate['pagerank'] = pagerank.values()

centrality_karate

In [None]:
# Print the top 5 most central nodes based on each centrality measure 
# Do you find nodes are are top in all the rankings?

for col in centrality_karate.columns:
    result = list(centrality_karate.sort_values(by = col, ascending = False).index[:5])
    print('The top 5 members based on', col, 'in the karate club are', result)

In [None]:
# Which pair of centrality measures have the highest correlation?

centrality_karate.corr()