# ✈️ Optimized Flight Route Finder with GNN & Folium

In [1]:
!pip install pandas torch torch-geometric networkx folium haversine



In [2]:
import torch
import pandas as pd
import networkx as nx
import folium
from haversine import haversine
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv

In [3]:
airports_df = pd.read_csv("normalized_airports.csv")
routes_df = pd.read_csv("https://raw.githubusercontent.com/jpatokal/openflights/master/data/routes.dat", header=None)
routes_df.columns = ["Airline", "Airline ID", "Source Airport", "Source Airport ID", 
                     "Destination Airport", "Destination Airport ID", "Codeshare", 
                     "Stops", "Equipment"]

In [19]:
airport_to_idx = {code: i for i, code in enumerate(airports_df["IATA"])}
idx_to_airport = {i: code for code, i in airport_to_idx.items()}
airport_to_city = dict(zip(airports_df["IATA"], airports_df["City"]))
airport_to_country = dict(zip(airports_df["IATA"], airports_df["Country"]))

valid_routes = []
for _, row in routes_df.iterrows():
    src, dst = row["Source Airport"], row["Destination Airport"]
    if src in airport_to_idx and dst in airport_to_idx:
        src_info = airports_df[airports_df["IATA"] == src].iloc[0]
        dst_info = airports_df[airports_df["IATA"] == dst].iloc[0]
        dist = haversine((src_info["Latitude"], src_info["Longitude"]),
                         (dst_info["Latitude"], dst_info["Longitude"]))
        valid_routes.append((airport_to_idx[src], airport_to_idx[dst], dist, row["Airline"]))

G = nx.DiGraph()
for src, dst, weight, airline in valid_routes:
    G.add_edge(src, dst, weight=weight, airline=airline)

edge_index = torch.tensor([[src, dst] for src, dst, _, _ in valid_routes], dtype=torch.long).T
x = torch.tensor(airports_df[["Latitude", "Longitude", "Altitude"]].values, dtype=torch.float)
data = Data(x=x, edge_index=edge_index)

In [20]:
class FlightRouteGNN(torch.nn.Module):
    def __init__(self, input_dim=3, hidden_dim=8):
        super().__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, input_dim)  # match output to input dim

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = self.conv2(x, edge_index)
        return x

model = FlightRouteGNN(input_dim=3, hidden_dim=8)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

for epoch in range(100):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = torch.nn.functional.mse_loss(out, data.x)  # Autoencoder reconstruction
    loss.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")


Epoch 0, Loss: 1.7723
Epoch 10, Loss: 1.0977
Epoch 20, Loss: 0.7267
Epoch 30, Loss: 0.5034
Epoch 40, Loss: 0.3629
Epoch 50, Loss: 0.3040
Epoch 60, Loss: 0.2793
Epoch 70, Loss: 0.2564
Epoch 80, Loss: 0.2362
Epoch 90, Loss: 0.2177


In [21]:
def get_best_route(src_iata, dst_iata, max_hops=5, allowed_airlines=None, max_layover=15000):
    src = airport_to_idx.get(src_iata)
    dst = airport_to_idx.get(dst_iata)
    if src is None or dst is None:
        return "Invalid airport codes", []

    node_embeddings = model(data).detach().numpy()
    for u, v in G.edges():
        gnn_dist = torch.norm(torch.tensor(node_embeddings[u]) - torch.tensor(node_embeddings[v])).item()
        base_weight = G[u][v]['weight']
        G[u][v]['adjusted_weight'] = base_weight + gnn_dist * 0.3

    try:
        length, path = nx.single_source_dijkstra(G, src, target=dst, weight='adjusted_weight')
        hops = len(path) - 1
        if hops > max_hops:
            return "Too many hops", []

        filtered = []
        valid = True
        for u, v in zip(path[:-1], path[1:]):
            edge = G.get_edge_data(u, v)
            if allowed_airlines and edge["airline"] not in allowed_airlines:
                valid = False
                break
            if edge["weight"] > max_layover:
                valid = False
                break
            filtered.append((idx_to_airport[u], idx_to_airport[v], edge["airline"], edge["weight"]))

        return None if valid else "Route doesn't meet filter conditions", filtered if valid else []
    except nx.NetworkXNoPath:
        return "No path found", []

In [22]:
def plot_route_on_map(route):
    m = folium.Map(location=[20, 0], zoom_start=2)
    for u, v, airline, dist in route:
        src = airports_df[airports_df["IATA"] == u].iloc[0]
        dst = airports_df[airports_df["IATA"] == v].iloc[0]
        folium.Marker(location=[src["Latitude"], src["Longitude"]], popup=f"{u} ({src['City']})").add_to(m)
        folium.Marker(location=[dst["Latitude"], dst["Longitude"]], popup=f"{v} ({dst['City']})").add_to(m)
        folium.PolyLine(
            [(src["Latitude"], src["Longitude"]), (dst["Latitude"], dst["Longitude"])],
            tooltip=f"{airline} | {dist:.2f} km",
            color="blue",
        ).add_to(m)
    return m

In [23]:
error, route = get_best_route("RPR", "LAX", max_hops=5, allowed_airlines=None, max_layover=12000)
if error:
    print("Error:", error)
else:
    m = plot_route_on_map(route)
    m.save("route_map.html")
    m

In [24]:
import pandas as pd
import networkx as nx
import torch
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from haversine import haversine
import folium
from folium import PolyLine
from sklearn.metrics.pairwise import cosine_distances
import numpy as np

In [25]:
airports_df = pd.read_csv("normalized_airports.csv")
routes_df = pd.read_csv("https://raw.githubusercontent.com/jpatokal/openflights/master/data/routes.dat", header=None)
routes_df.columns = ["Airline", "Airline ID", "Source Airport", "Source Airport ID", 
                     "Destination Airport", "Destination Airport ID", "Codeshare", 
                     "Stops", "Equipment"]


In [26]:
airport_to_idx = {code: i for i, code in enumerate(airports_df["IATA"])}
idx_to_airport = {v: k for k, v in airport_to_idx.items()}


In [27]:
valid_routes = []
for _, row in routes_df.iterrows():
    src, dst = row["Source Airport"], row["Destination Airport"]
    if src in airport_to_idx and dst in airport_to_idx:
        src_info = airports_df[airports_df["IATA"] == src].iloc[0]
        dst_info = airports_df[airports_df["IATA"] == dst].iloc[0]
        distance = haversine((src_info["Latitude"], src_info["Longitude"]),
                             (dst_info["Latitude"], dst_info["Longitude"]))
        valid_routes.append((airport_to_idx[src], airport_to_idx[dst], distance))

In [28]:
G = nx.DiGraph()
for src, dst, weight in valid_routes:
    G.add_edge(src, dst, weight=weight)

In [29]:
data = Data(
    x=torch.tensor(airports_df[["Latitude", "Longitude", "Altitude"]].values, dtype=torch.float),
    edge_index=torch.tensor([[src, dst] for src, dst, _ in valid_routes], dtype=torch.long).T
)

In [30]:
class GCN(torch.nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim):
        super().__init__()
        self.conv1 = GCNConv(in_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, out_dim)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = self.conv2(x, edge_index)
        return x

model = GCN(3, 16, 8)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [31]:
model.train()
for epoch in range(100):
    optimizer.zero_grad()
    out = model(data)
    loss = torch.nn.functional.mse_loss(out, data.x)  # Autoencoder style
    loss.backward()
    optimizer.step()
    

  loss = torch.nn.functional.mse_loss(out, data.x)  # Autoencoder style


RuntimeError: The size of tensor a (8) must match the size of tensor b (3) at non-singleton dimension 1