<div class="alert alert-block alert-info"> A notebook that lists every possible route combinations (directionnal) between Europe and North America. Add airport metrics (only that for the moment) to compare existing America routes with all possible routes.</div>

# Import

## lib

In [None]:
import numpy as np
import polars as pl
import pandas as pd
from polars import col as d
import glob
import os
# import math

## csv

In [None]:
folder_path = '/home/sara/Desktop/ATSLab/data/' 

In [None]:
df_airports_lookup_modif = pl.read_csv(folder_path+"df_airports_lookup_modif.csv")
df_airports_metrics_modif = pl.read_csv(folder_path+"df_airports_metrics_modif.csv")
df_scheduled = pl.read_parquet(folder_path+"scheduled_dataset_transatlantic_enhanced.parquet") ## change name

In [None]:
df_airports_ratings = pl.read_csv("/home/sara/Desktop/ATSLab/data_scrapping/csv_output/20250819_output_airport_ratings.csv")

# Combinaison

Doing all the possible route combinaison between north american airports and european ones. (directionnal)

In [None]:
df_eu = df_airports_metrics_modif.filter(d.REGION_ID == 13).select('APT_CODE').rename({'APT_CODE':'APT_CODE_A'})
df_us = df_airports_metrics_modif.filter(d.REGION_ID == 10).select('APT_CODE').rename({'APT_CODE':'APT_CODE_B'})

In [None]:
df1 = df_eu.join(df_us, how="cross")
df_us_rename = df_us.rename({"APT_CODE_B": "APT_CODE_A"})
df_eu_rename = df_eu.rename({"APT_CODE_A": "APT_CODE_B"})
df2 = df_us_rename.join(df_eu_rename, how="cross")
df_apt_combinaison = pl.concat([df1, df2])

In [None]:
df_apt_combinaison ## 283_404 possible routes

# Add indicators

In [None]:
R = 6371.0

lat1 = pl.col("LATITUDE_A") * np.pi / 180
lon1 = pl.col("LONGITUDE_A") * np.pi / 180
lat2 = pl.col("LATITUDE_B") * np.pi / 180
lon2 = pl.col("LONGITUDE_B") * np.pi / 180

dlat = lat2 - lat1
dlon = lon2 - lon1

a = (dlat / 2).sin()**2 + lat1.cos() * lat2.cos() * (dlon / 2).sin()**2
c = 2 * a.sqrt().arcsin()

In [None]:
base_TO = 2800.0 ## m
base_LDG = 1800.0 ## m

In [None]:
df_combinaison_enhanced_apt_metrics = (
    df_apt_combinaison
    .join(df_scheduled.select('APT_CODE_A', 'APT_CODE_B').unique().with_columns(HAS_EXISTED = True), how = 'left', on = ['APT_CODE_A', 'APT_CODE_B'])
    .fill_null(False)
    
    ## add apt metrics
    .join(df_airports_metrics_modif.select('APT_CODE', 'OPENING_YEAR', 'CLOSING_YEAR', 'TIME_ZONE_2016', 'IS_ISLAND', 'ELEV_FT', 'NB_RUNWAYS', 'LONGEST_RUNWAY_FT', 'APT_CITY_DRIVE_DIST_KM', 'APT_CITY_DRIVE_TIME_H').rename({col: f"{col}_A" for col in ['APT_CODE', 'OPENING_YEAR', 'CLOSING_YEAR', 'TIME_ZONE_2016', 'IS_ISLAND', 'ELEV_FT', 'NB_RUNWAYS', 'LONGEST_RUNWAY_FT', 'APT_CITY_DRIVE_DIST_KM', 'APT_CITY_DRIVE_TIME_H']}), how = 'left', on = 'APT_CODE_A')
    .join(df_airports_metrics_modif.select('APT_CODE', 'OPENING_YEAR', 'CLOSING_YEAR', 'TIME_ZONE_2016', 'IS_ISLAND', 'ELEV_FT', 'NB_RUNWAYS', 'LONGEST_RUNWAY_FT', 'APT_CITY_DRIVE_DIST_KM', 'APT_CITY_DRIVE_TIME_H').rename({col: f"{col}_B" for col in ['APT_CODE', 'OPENING_YEAR', 'CLOSING_YEAR', 'TIME_ZONE_2016', 'IS_ISLAND', 'ELEV_FT', 'NB_RUNWAYS', 'LONGEST_RUNWAY_FT', 'APT_CITY_DRIVE_DIST_KM', 'APT_CITY_DRIVE_TIME_H']}), how = 'left', on = 'APT_CODE_B')

    ## add apt rating
    .join(df_airports_ratings.select('APT_CODE', 'RATING', 'NB_REVIEW').rename({col: f"{col}_A" for col in ['APT_CODE', 'RATING', 'NB_REVIEW']}), how = 'left', on = 'APT_CODE_A')
    .join(df_airports_ratings.select('APT_CODE', 'RATING', 'NB_REVIEW').rename({col: f"{col}_B" for col in ['APT_CODE', 'RATING', 'NB_REVIEW']}), how = 'left', on = 'APT_CODE_B')

    ## add indicators
    .with_columns(ROUTE_DRIVE_DIST_KM = d.APT_CITY_DRIVE_DIST_KM_A + d.APT_CITY_DRIVE_DIST_KM_B)
    .with_columns(ROUTE_DRIVE_TIME_H = d.APT_CITY_DRIVE_TIME_H_A + d.APT_CITY_DRIVE_TIME_H_B)
    .with_columns(ROUTE_RATING = d.RATING_A+d.RATING_B)
    .with_columns(TIME_ZONE_DIFF = abs(d.TIME_ZONE_2016_A - d.TIME_ZONE_2016_B))

    ## apply log to few columns
    .with_columns(ELEV_LOG_FT_A = d.ELEV_FT_A.log())
    .with_columns(ELEV_LOG_FT_B = d.ELEV_FT_B.log())
    .with_columns(NB_REVIEW_LOG_A = d.NB_REVIEW_A.log())
    .with_columns(NB_REVIEW_LOG_B = d.NB_REVIEW_B.log())

    ## join to get the latitude & longitude by airport
    .join(df_airports_lookup_modif.select('APT_CODE', 'LATITUDE', 'LONGITUDE').rename({col: f"{col}_A" for col in ['APT_CODE', 'LATITUDE', 'LONGITUDE']}), how = 'left', on = 'APT_CODE_A')
    .join(df_airports_lookup_modif.select('APT_CODE', 'LATITUDE', 'LONGITUDE').rename({col: f"{col}_B" for col in ['APT_CODE', 'LATITUDE', 'LONGITUDE']}), how = 'left', on = 'APT_CODE_B')
    
    ## compute the route distance
    .with_columns(DIST_GC_KM = R * c)

    ## other columns
    .with_columns(RUNWAY_M = d.LONGEST_RUNWAY_FT_A * 0.3048)
    .with_columns(TO_FEASIBLE = d.RUNWAY_M  >= (base_TO * (1 + 0.07 * (d.ELEV_FT_A / 1000.0))))
    .with_columns(LDG_FEASIBLE = d.RUNWAY_M  >= (base_LDG * (1 + 0.07 * (d.ELEV_FT_B / 1000.0))))
    .with_columns(IS_FEASIBLE = d.TO_FEASIBLE & d.LDG_FEASIBLE)
)

In [None]:
df_combinaison_enhanced_apt_metrics.head(2)

# Save parquet

In [None]:
df_combinaison_enhanced_apt_metrics.write_parquet("df_route_combinaison_enhanced.parquet")
print("Parquet saved!")