<div class="alert alert-block alert-info"> In this notebook, I want to examine whether there are differences between routes that already existed and all possible routes by comparing them against airport metrics. (on the international market) </div>

# Import

## lib

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import polars as pl
import pandas as pd
from polars import col as d
import glob
import os
import plotly.express as px

## plot

In [None]:
from utils_plot import facet_distribution_plot
from utils_plot import plot_heatmap_by_group

## csv

In [None]:
folder_path = '/home/sara/Desktop/ATSLab/data/' 

In [None]:
df_route_combinaison_enhanced = pl.read_parquet(folder_path+"df_route_combinaison_enhanced.parquet")

## some variables

In [None]:
dico_has_existed = {
    False: "#E57373",
    True: "#81C784"
}

# Volume of data

In [None]:
(
    df_route_combinaison_enhanced
    .group_by('HAS_EXISTED')
    .agg(pl.len().alias('COUNT'))
)

# Histogram

## airport to city drive distance

In [None]:
(
    facet_distribution_plot(df_route_combinaison_enhanced.to_pandas(), category = 'HAS_EXISTED', title_x = 'distance (km)', value_vars= ['APT_CITY_DRIVE_DIST_KM_A', 'APT_CITY_DRIVE_DIST_KM_B', 'ROUTE_DRIVE_DIST_KM'], dico_color = dico_has_existed, nbins = 50)
    .update_xaxes(dtick = 10)
    # .update_yaxes(matches = None)
)

## airport to city drive time

In [None]:
(
    facet_distribution_plot(df_route_combinaison_enhanced.to_pandas(), category = 'HAS_EXISTED', value_vars= ['APT_CITY_DRIVE_TIME_H_A', 'APT_CITY_DRIVE_TIME_H_B', 'ROUTE_DRIVE_TIME_H'], title_x = 'time (h)', dico_color=dico_has_existed, nbins = 50)
    .update_xaxes(dtick = 0.2)
    # .update_yaxes(matches = None)

)

## elevation

In [None]:
(
    facet_distribution_plot(df_route_combinaison_enhanced.to_pandas(), dico_color=dico_has_existed, title_x = 'elevation (ft)', category = 'HAS_EXISTED', value_vars= ['ELEV_FT_A', 'ELEV_FT_B'], nbins = 50)
    .update_xaxes(dtick = 200)

)

In [None]:
(
    facet_distribution_plot(df_combinaison_enhanced_apt_metrics.to_pandas(), dico_color=dico_has_existed, title_x = 'elevation log', category = 'HAS_EXISTED', value_vars= ['ELEV_LOG_FT_A', 'ELEV_LOG_FT_B'], nbins = 30)
    .update_xaxes(dtick = 0.5)

)

## ratings

In [None]:
(
    facet_distribution_plot(df_route_combinaison_enhanced.to_pandas(), category = 'HAS_EXISTED', title_x = 'rating', dico_color=dico_has_existed, value_vars= ['RATING_A', 'RATING_B', 'ROUTE_RATING'], nbins = 80)
    .update_xaxes(dtick = 0.1)
    # .update_yaxes(matches = None)

)

## time zone

In [None]:
(
    facet_distribution_plot(df_route_combinaison_enhanced.to_pandas(), category = 'HAS_EXISTED', dico_color=dico_has_existed, title_x='time zone', value_vars= ['TIME_ZONE_2016_A', 'TIME_ZONE_2016_B', 'TIME_ZONE_DIFF'], nbins = 30)
    .update_xaxes(dtick = 1)
)

## number of runways

In [None]:
(
    facet_distribution_plot(df_route_combinaison_enhanced.to_pandas(), category = 'HAS_EXISTED', title_x='number of runways', dico_color=dico_has_existed, value_vars= ['NB_RUNWAYS_A', 'NB_RUNWAYS_B'], nbins = 30)
    .update_xaxes(dtick = 1)
)

## number of review

In [None]:
(
    facet_distribution_plot(df_route_combinaison_enhanced.to_pandas(), category = 'HAS_EXISTED', title_x='number of review', dico_color=dico_has_existed, value_vars= ['NB_REVIEW_LOG_A', 'NB_REVIEW_LOG_B'], nbins = 20)
    .update_xaxes(dtick = 1)
)

# Heatmap

## ratings

In [None]:
(
    plot_heatmap_by_group(
    df_route_combinaison_enhanced,
    group_col="HAS_EXISTED",
    x_col="RATING_A",
    y_col="RATING_B",
    # normalize=True
    )
    .update_layout(height = 700)
    .update_xaxes(dtick = 0.1)
    .update_yaxes(dtick = 0.1)

)

## number of runways

In [None]:
(
    plot_heatmap_by_group(
    df_route_combinaison_enhanced,
    group_col="HAS_EXISTED",
    x_col="NB_RUNWAYS_A",
    y_col="NB_RUNWAYS_B",
    # normalize=True
    )
    .update_layout(height = 500)
    .update_xaxes(dtick = 1)
    .update_yaxes(dtick = 1)
)

## is island

In [None]:
(
    plot_heatmap_by_group(
    df_route_combinaison_enhanced,
    group_col="HAS_EXISTED",
    x_col="IS_ISLAND_A",
    y_col="IS_ISLAND_B",
    )
)

## airport to city drive distance

In [None]:
bin_size = 10

df_binned = (
    df_route_combinaison_enhanced
    .with_columns(APT_CITY_DRIVE_DIST_KM_A_BIN = d.APT_CITY_DRIVE_DIST_KM_A // bin_size * bin_size)
    .with_columns(APT_CITY_DRIVE_DIST_KM_B_BIN = d.APT_CITY_DRIVE_DIST_KM_B // bin_size * bin_size)
)

In [None]:
(
    plot_heatmap_by_group(
    df_binned,
    group_col="HAS_EXISTED",
    x_col="APT_CITY_DRIVE_DIST_KM_A_BIN",
    y_col="APT_CITY_DRIVE_DIST_KM_B_BIN",
    )
    # .update_layout(height = 500)
    # .update_xaxes(dtick = 10)
    # .update_yaxes(dtick = 10)

)

## airport to city time distance

In [None]:
bin_size = 0.2

df_binned = (
    df_route_combinaison_enhanced
    .with_columns(APT_CITY_DRIVE_TIME_H_A_BIN = (d.APT_CITY_DRIVE_TIME_H_A // bin_size * bin_size).round(1))
    .with_columns(APT_CITY_DRIVE_TIME_H_B_BIN = (d.APT_CITY_DRIVE_TIME_H_B // bin_size * bin_size).round(1))
)

In [None]:
(
    plot_heatmap_by_group(
    df_binned,
    group_col="HAS_EXISTED",
    x_col="APT_CITY_DRIVE_TIME_H_A_BIN",
    y_col="APT_CITY_DRIVE_TIME_H_B_BIN",
    )
    .update_xaxes(dtick = 0.2)
    .update_yaxes(dtick = 0.2)

)

## elevation

In [None]:
bin_size = 500

df_binned = (
    df_route_combinaison_enhanced
    .with_columns(ELEV_FT_A_BIN = d.ELEV_FT_A // bin_size * bin_size)
    .with_columns(ELEV_FT_B_BIN = d.ELEV_FT_B // bin_size * bin_size)
)

In [None]:
(
    plot_heatmap_by_group(
    df_binned,
    group_col="HAS_EXISTED",
    x_col="ELEV_FT_A_BIN",
    y_col="ELEV_FT_B_BIN",
    )
    # .update_xaxes(dtick = 500)
    # .update_yaxes(dtick = 500)

)

## time zone

In [None]:
(
    plot_heatmap_by_group(
    df_route_combinaison_enhanced,
    group_col="HAS_EXISTED",
    x_col="TIME_ZONE_2016_A",
    y_col="TIME_ZONE_2016_B",
    # normalize=True
    )
    .update_layout(height = 600)
    .update_xaxes(dtick = 1)
    .update_yaxes(dtick = 1)
)

## number of review

In [None]:
bin_size = 1

df_binned = (
    df_route_combinaison_enhanced
    .with_columns(NB_REVIEW_LOG_A_BIN = (d.NB_REVIEW_LOG_A // bin_size * bin_size).round(1))
    .with_columns(NB_REVIEW_LOG_B_BIN = (d.NB_REVIEW_LOG_B // bin_size * bin_size).round(1))
)

In [None]:
(
    plot_heatmap_by_group(
    df_binned,
    group_col="HAS_EXISTED",
    x_col="NB_REVIEW_LOG_A_BIN",
    y_col="NB_REVIEW_LOG_B_BIN",
    # normalize=True
    )
    .update_layout(height = 400)
    # .update_xaxes(dtick = 0.1)
    # .update_yaxes(dtick = 0.1)



)

# Range

In [None]:
(
    facet_distribution_plot(df_route_combinaison_enhanced.to_pandas(), category = 'HAS_EXISTED', title_x = 'distance great circle (km)', value_vars= ['DIST_GC_KM'], dico_color = dico_has_existed, nbins = 60, height = 450)
    .update_xaxes(dtick = 200)
    # .update_yaxes(matches = None)

    .add_scatter(
        x=[8700, 8700],
        y=[0,0.064],
        mode="lines",
        line=dict(color='black', dash='dash', width=2),
        name="Max XLR range"
    )

    .add_scatter(
        x=[7400, 7400],
        y=[0,0.064],
        mode="lines",
        line=dict(color='black', dash='dot', width=2),
        name="Max LR range"
    )

    .add_shape(
        type="rect",
        x0=7400,
        x1=8700,
        y0=0,
        xref="x",
        yref="paper",  
        line=dict(color="black", width=2),
        fillcolor="black",
        opacity=0.3,
    )

)

# TO/LDG faisability

In [None]:
base_TO = 2800.0 ## m
base_LDG = 1800.0 ## m

x_vals = np.linspace(df_route_combinaison_enhanced["RUNWAY_M"].min(), df_route_combinaison_enhanced["RUNWAY_M"].max(), 100)
y_vals_to = ((x_vals / base_TO ) - 1) / 0.07 * 1000
y_vals_ldg = ((x_vals / base_LDG) - 1) / 0.07 * 1000

(
    px.scatter(
    df_route_combinaison_enhanced.to_pandas(),
    x="RUNWAY_M",
    y="ELEV_FT_A",
    color="IS_FEASIBLE",
    labels={"RUNWAY_M": "Runway lenght (m)", "ELEV_FT_A": "Elevation (ft)"},
    title="Take off/ landing faisability",
    color_discrete_map = dico_has_existed
    )
    
    .add_scatter(x=x_vals, y=y_vals_to, mode="lines", line=dict(color="black", dash="dash"), name="Limit take off")
    .add_scatter(x=x_vals, y=y_vals_ldg, mode="lines", line=dict(color="black", dash="dot"), name="Limit landing")
    .update_yaxes(range= [-200,8000])
    .update_xaxes(dtick = 200)

)