In [None]:
import networkx as nx
import matplotlib.pyplot as plt
import json
import pickle

## load the IATA data
with open('data/IATA.json') as f:
    IATA = json.load(f)
    
## load the graph network
with open('data/graphnetwork.gpickle', 'rb') as f:
    G = pickle.load(f)
    
with open('data/continent_to_reviews.pkl', 'rb') as f:
    continent_to_reviews = pickle.load(f)

In [None]:
## Find degree distribution 
degrees = [v for k, v in G.degree()]
plt.hist(degrees, bins=100)
plt.axvline(sum(degrees) / len(degrees), color='red', linestyle='dashed', linewidth=1)
plt.xlabel('Degree')
plt.ylabel('Frequency')
plt.title('Degree Distribution')
print("Average degree: ", sum(degrees) / len(degrees))

In [None]:
## Centrality Measures
closeness_centrality = nx.closeness_centrality(G)
print("Done calculating closeness centrality")
sorted_closeness_centrality = sorted(closeness_centrality.items(), key=lambda x: x[1], reverse=True)
top_5_closeness = sorted_closeness_centrality[:5]
#find airport names from IATA code
for node, closeness in top_5_closeness:
    print(f"{G.nodes[node]['name']} has a closeness centrality of {closeness:.4f}")

In [None]:
## Eigenvector centrality
eigenvector_centrality = nx.eigenvector_centrality(G)
sorted_eigenvector_centrality = sorted(eigenvector_centrality.items(), key=lambda x: x[1], reverse=True)
top_5_eigenvector = sorted_eigenvector_centrality[:5]
for node, eigenvector in top_5_eigenvector:
    print(f"{G.nodes[node]['name']} has an eigenvector centrality of {eigenvector:.4f}")

In [None]:
## plot the closeness centrality and eigenvector centrality
plt.figure(figsize=(10, 5))
plt.bar([G.nodes[k]["name"] for k, v in sorted_closeness_centrality[:10]],
        [v for k, v in sorted_closeness_centrality[:10]])
plt.xticks(rotation=90)
plt.ylabel('Closeness Centrality')
plt.title('Top 10 airports with the highest closeness centrality')
#plt.savefig('images/top10_airports_closeness.png', dpi=300, bbox_inches='tight')
plt.show()

plt.figure(figsize=(10, 5))
plt.bar([G.nodes[k]["name"] for k, v in sorted_eigenvector_centrality[:10]],
        [v for k, v in sorted_eigenvector_centrality[:10]])
plt.xticks(rotation=90)
plt.ylabel('Eigenvector Centrality')
plt.title('Top 10 airports with the highest eigenvector centrality')
#plt.savefig('images/top10_airports_eigenvector.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
## Create scatter plot of closeness centrality vs eigenvector centrality
plt.figure(figsize=(10, 5))
plt.scatter([v for k, v in sorted_closeness_centrality], [v for k, v in sorted_eigenvector_centrality])
plt.xlabel('Closeness Centrality')
plt.ylabel('Eigenvector Centrality')
plt.title('Closeness Centrality vs Eigenvector Centrality')
#plt.savefig('images/closeness_vs_eigenvector.png', dpi=300, bbox_inches='tight')
plt.show()

## 