# 🛫 EU Air Transportation Network Analysis (Improved Version)

In [ ]:
# 📦 Imports
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from collections import Counter

# 🧭 Optional: Set plotting styles
sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (12, 8)


## 📁 1. Load Data

In [ ]:
nodes_df = pd.read_csv("EUAirTransportation_nodes.txt", sep=" ")
layers_df = pd.read_csv("EUAirTransportation_layers.txt", sep=" ")
edges_df = pd.read_csv("EUAirTransportation_multiplex.edges", sep=" ", names=["layerID", "sourceID", "targetID"])

# Ensure data looks good
display(nodes_df.head())
display(layers_df.head())
display(edges_df.head())


## 🌐 2. Create Directed Multiplex Network (One Layer per Airline)

In [ ]:
layer_graphs = {}

for layer_id in layers_df["nodeID"]:
    layer_edges = edges_df[edges_df["layerID"] == layer_id][["sourceID", "targetID"]]
    G = nx.DiGraph()  # Treat as directed flights
    G.add_edges_from(layer_edges.values)
    layer_graphs[layer_id] = G


## 🔍 3. Microscale Analysis
### 🎯 In-Degree and Out-Degree (Ryanair)

In [ ]:
def get_airport_label(airport_id):
    match = nodes_df[nodes_df["nodeID"] == airport_id]
    return match["nodeLabel"].values[0] if not match.empty else str(airport_id)

ryanair_graph = layer_graphs[2]
in_degrees = dict(ryanair_graph.in_degree())
out_degrees = dict(ryanair_graph.out_degree())

# Top 10 by in-degree
print("Top 10 Airports by In-Degree (Ryanair):")
for node_id, degree in sorted(in_degrees.items(), key=lambda x: -x[1])[:10]:
    print(f"{get_airport_label(node_id)}: {degree}")

# Top 10 by out-degree
print("\nTop 10 Airports by Out-Degree (Ryanair):")
for node_id, degree in sorted(out_degrees.items(), key=lambda x: -x[1])[:10]:
    print(f"{get_airport_label(node_id)}: {degree}")


### 🔄 Clustering Coefficient (Undirected Approximation)

In [ ]:
ryanair_undirected = ryanair_graph.to_undirected()
clustering = nx.clustering(ryanair_undirected)
avg_cc = np.mean(list(clustering.values()))
print(f"Average clustering coefficient (Ryanair): {avg_cc:.3f}")


## 🌍 4. Macroscale Analysis
### 🧱 Layer Sizes & Density

In [ ]:
layer_stats = []

for lid, graph in layer_graphs.items():
    num_nodes = graph.number_of_nodes()
    num_edges = graph.number_of_edges()
    density = nx.density(graph.to_undirected())
    layer_stats.append((lid, num_nodes, num_edges, density))

layer_stats_df = pd.DataFrame(layer_stats, columns=["LayerID", "NumNodes", "NumEdges", "Density"])
layer_stats_df = layer_stats_df.merge(layers_df, left_on="LayerID", right_on="nodeID")
layer_stats_df = layer_stats_df.drop("nodeID", axis=1)
layer_stats_df.sort_values(by="NumNodes", ascending=False).head(10)


### 🔗 Hub Airports Across Layers

In [ ]:
# Count occurrences of airports across all flights
airport_counts = Counter(edges_df["sourceID"].tolist() + edges_df["targetID"].tolist())
top_airports = airport_counts.most_common(10)

print("Top 10 most used airports across all airlines:")
for airport_id, count in top_airports:
    print(f"{get_airport_label(airport_id)} - {count} flights")


## 📍 5. Geographic Network Plot (Example: EasyJet)

In [ ]:
def plot_airline_geographic(layer_id, title="Airline Network"):
    g = layer_graphs[layer_id].to_undirected()
    pos = {node: (
        nodes_df[nodes_df["nodeID"] == node]["nodeLong"].values[0],
        nodes_df[nodes_df["nodeID"] == node]["nodeLat"].values[0]) for node in g.nodes() if not nodes_df[nodes_df["nodeID"] == node].empty}
    
    plt.figure(figsize=(14, 10))
    nx.draw(g, pos, node_size=30, edge_color="gray", node_color="blue", with_labels=False)
    plt.title(f"{title} (Layer ID: {layer_id})")
    plt.xlabel("Longitude")
    plt.ylabel("Latitude")
    plt.show()

# Plot for EasyJet (Layer ID 3)
plot_airline_geographic(3, title="EasyJet Flight Network")


### 📊 Degree Distribution for EasyJet

In [ ]:
degrees = list(dict(layer_graphs[3].degree()).values())
sns.histplot(degrees, bins=20, kde=True)
plt.title("Degree Distribution for EasyJet (Layer 3)")
plt.xlabel("Degree")
plt.ylabel("Frequency")
plt.show()


## 🧠 6. Summary & Insights

- **Microscale**: We analyzed directed in/out-degree centralities to understand airport roles.  
- **Macroscale**: Compared airlines by network size, density, and overall airport frequency.  
- **Visualization**: Improved geographic plots using actual latitude/longitude for context.  
- This version more accurately models the directed nature of flights, with better label readability.
