<div class="alert alert-block alert-info"> In this notebook, I want to examine whether there are differences between newly opened routes and existing routes by comparing them against airport metrics. (on the international market) </div>

# Import

## lib

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import polars as pl
import pandas as pd
from polars import col as d
import glob
import os

## plot

In [None]:
from utils_plot import facet_distribution_plot
from utils_plot import plot_heatmap_by_group

## csv

In [None]:
folder_path = '/home/sara/Desktop/ATSLab/data/' 

In [None]:
df_airports_metrics_modif = pl.read_csv(folder_path+"df_airports_metrics_modif.csv")
df_scheduled = pl.read_parquet(folder_path+"scheduled_dataset_transatlantic_enhanced.parquet") ## change name

In [None]:
df_airports_ratings = pl.read_csv("/home/sara/Desktop/ATSLab/data_scrapping/csv_output/20250819_output_airport_ratings.csv")

## some variables

In [None]:
dico_is_opening = {
    False: "#E57373",
    True: "#81C784"
}

dico_has_ended = {
    False: "#E57373",
    True: "#81C784"
}

dico_tag_duration_opening = {
    'SHORT_OPENING': "#FFA500",
    'LONG_OPENING': "#1f77b4",
    'NO_OPENING': "#A9A9A9"
}

dico_tag_end = {
    "NEW_RTE_STILL_EXISTING": "#1f9e89",
    "NEW_RTE_STOPPED_EXISTING": "#a8d5bf",
    "OLD_RTE_STILL_EXISTING": "#d65f5f",
    "OLD_RTE_STOPPED_EXISTING": "#e3b6b6"
}

In [None]:
category_orders = {
    "TAG_DURATION_OPENING": ["SHORT_OPENING", "LONG_OPENING", "NO_OPENING"],
    "TAG_END_WITHOUT_COVID": ["OLD_RTE_STILL_EXISTING", "NEW_RTE_STILL_EXISTING", "OLD_RTE_STOPPED_EXISTING", 'NEW_RTE_STOPPED_EXISTING']
}

# Data enhancing

In [None]:
df_scheduled_enhanced_apt_metrics = (
    df_scheduled
    .filter(d.REGION_ID_A != d.REGION_ID_B)
    .group_by('APT_CODE_A', 'APT_CODE_B', 'LAST_EXISTING_YEAR')
    .agg(d.NB_OPENING_RTE.sum(), d.NB_SHORT_OPENING_RTE.sum(), d.NB_LONG_OPENING_RTE.sum(), d.NB_ENDING_RTE.sum(), d.NB_REOPENING_RTE.sum(), d.NB_PAUSE_RTE.sum())
    .with_columns(IS_OPENING = d.NB_OPENING_RTE.cast(pl.Boolean))
    .with_columns(TAG_DURATION_OPENING = pl.when(d.NB_SHORT_OPENING_RTE == 1)
                                  .then(pl.lit('SHORT_OPENING'))
                                  .when(d.NB_LONG_OPENING_RTE == 1)
                                  .then(pl.lit('LONG_OPENING'))
                                  .otherwise(pl.lit('NO_OPENING'))
    )

    ## add apt metrics
    .join(df_airports_metrics_modif.select('APT_CODE', 'OPENING_YEAR', 'TIME_ZONE_2016', 'IS_ISLAND', 'ELEV_FT', 'NB_RUNWAYS', 'LONGEST_RUNWAY_FT', 'APT_CITY_DRIVE_DIST_KM', 'APT_CITY_DRIVE_TIME_H').rename({col: f"{col}_A" for col in ['APT_CODE', 'OPENING_YEAR', 'TIME_ZONE_2016', 'IS_ISLAND', 'ELEV_FT', 'NB_RUNWAYS', 'LONGEST_RUNWAY_FT', 'APT_CITY_DRIVE_DIST_KM', 'APT_CITY_DRIVE_TIME_H']}), how = 'left', on = 'APT_CODE_A')
    .join(df_airports_metrics_modif.select('APT_CODE', 'OPENING_YEAR', 'TIME_ZONE_2016', 'IS_ISLAND', 'ELEV_FT', 'NB_RUNWAYS', 'LONGEST_RUNWAY_FT', 'APT_CITY_DRIVE_DIST_KM', 'APT_CITY_DRIVE_TIME_H').rename({col: f"{col}_B" for col in ['APT_CODE', 'OPENING_YEAR', 'TIME_ZONE_2016', 'IS_ISLAND', 'ELEV_FT', 'NB_RUNWAYS', 'LONGEST_RUNWAY_FT', 'APT_CITY_DRIVE_DIST_KM', 'APT_CITY_DRIVE_TIME_H']}), how = 'left', on = 'APT_CODE_B')

    ## add apt rating
    .join(df_airports_ratings.select('APT_CODE', 'RATING').rename({col: f"{col}_A" for col in ['APT_CODE', 'RATING']}), how = 'left', on = 'APT_CODE_A')
    .join(df_airports_ratings.select('APT_CODE', 'RATING').rename({col: f"{col}_B" for col in ['APT_CODE', 'RATING']}), how = 'left', on = 'APT_CODE_B')

    ## some tag maybe not useful
    .with_columns(TAG_END_WITH_COVID = pl.when((d.NB_OPENING_RTE == 1) & (d.NB_ENDING_RTE == 0))
                                      .then(pl.lit('NEW_RTE_STILL_EXISTING'))
                                      .when((d.NB_OPENING_RTE == 1) & (d.NB_ENDING_RTE == 1))
                                      .then(pl.lit('NEW_RTE_STOPPED_EXISTING'))
                                      .when((d.NB_OPENING_RTE == 0) & (d.NB_ENDING_RTE == 1))
                                      .then(pl.lit('OLD_RTE_STOPPED_EXISTING'))
                                      .otherwise(pl.lit('OLD_RTE_STILL_EXISTING'))
                 )

    .with_columns(TAG_END_WITHOUT_COVID = pl.when((d.NB_OPENING_RTE == 1) & (d.NB_ENDING_RTE == 0))
                                      .then(pl.lit('NEW_RTE_STILL_EXISTING'))
                                      .when((d.NB_OPENING_RTE == 1) & (d.NB_ENDING_RTE == 1) & (d.LAST_EXISTING_YEAR <= 2019))
                                      .then(pl.lit('NEW_RTE_STOPPED_EXISTING'))
                                      .when((d.NB_OPENING_RTE == 1) & (d.NB_ENDING_RTE == 1) & (d.LAST_EXISTING_YEAR > 2019))
                                      .then(pl.lit('NEW_RTE_STILL_EXISTING'))     
                                      .when((d.NB_OPENING_RTE == 0) & (d.NB_ENDING_RTE == 1) & (d.LAST_EXISTING_YEAR <= 2019))
                                      .then(pl.lit('OLD_RTE_STOPPED_EXISTING'))
                                      .otherwise(pl.lit('OLD_RTE_STILL_EXISTING'))
                 )

    .with_columns(HAS_ENDED_WITHOUT_COVID = pl.when(d.TAG_END_WITHOUT_COVID.is_in(['OLD_RTE_STOPPED_EXISTING', 'NEW_RTE_STOPPED_EXISTING']))
                              .then(True)
                              .otherwise(False)
                 )

    .rename({'OPENING_YEAR_A':'APT_OPENING_YEAR_A', 'OPENING_YEAR_B':'APT_OPENING_YEAR_B'})

    .with_columns(ROUTE_DRIVE_DIST_KM = d.APT_CITY_DRIVE_DIST_KM_A + d.APT_CITY_DRIVE_DIST_KM_B)
    .with_columns(ROUTE_DRIVE_TIME_H = d.APT_CITY_DRIVE_TIME_H_A + d.APT_CITY_DRIVE_TIME_H_B)
    .with_columns(ROUTE_RATING = d.RATING_A+d.RATING_B)

    .with_columns(ELEV_LOG_FT_A = d.ELEV_FT_A.log())
    .with_columns(ELEV_LOG_FT_B = d.ELEV_FT_B.log())

    .with_columns(TIME_ZONE_DIFF = abs(d.TIME_ZONE_2016_A - d.TIME_ZONE_2016_B))


)

# Volume of data

In [None]:
(
    df_scheduled_enhanced_apt_metrics
    .group_by('IS_OPENING')
    .agg(pl.len().alias('COUNT'))
)

In [None]:
(
    df_scheduled_enhanced_apt_metrics
    .group_by('TAG_DURATION_OPENING')
    .agg(pl.len().alias('COUNT'))
)

In [None]:
(
    df_scheduled_enhanced_apt_metrics
    .group_by('TAG_END_WITH_COVID')
    .agg(pl.len().alias('COUNT'))
    .sort('TAG_END_WITH_COVID')
)

In [None]:
(
    df_scheduled_enhanced_apt_metrics
    .group_by('TAG_END_WITHOUT_COVID')
    .agg(pl.len().alias('COUNT'))
    .sort('TAG_END_WITHOUT_COVID')

)

# Histogram

## airport to city drive distance

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'IS_OPENING', title_x = 'distance (km)', value_vars= ['APT_CITY_DRIVE_DIST_KM_A', 'APT_CITY_DRIVE_DIST_KM_B', 'ROUTE_DRIVE_DIST_KM'], dico_color = dico_is_opening, nbins = 30)
    .update_xaxes(dtick = 10)
    # .update_yaxes(matches = None)
)

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'TAG_DURATION_OPENING', title_x = 'distance (km)', value_vars= ['APT_CITY_DRIVE_DIST_KM_A', 'APT_CITY_DRIVE_DIST_KM_B', 'ROUTE_DRIVE_DIST_KM'], dico_color = dico_tag_duration_opening, order = category_orders, nbins = 30)
    .update_xaxes(dtick = 10)
    # .update_yaxes(matches = None)

)

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'TAG_END_WITHOUT_COVID', title_x = 'distance (km)', value_vars= ['APT_CITY_DRIVE_DIST_KM_A', 'APT_CITY_DRIVE_DIST_KM_B', 'ROUTE_DRIVE_DIST_KM'], dico_color=dico_tag_end, order= category_orders, nbins = 30, height = 500)
    .update_xaxes(dtick = 10)
)

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'HAS_ENDED_WITHOUT_COVID', title_x = 'distance (km)', value_vars= ['APT_CITY_DRIVE_DIST_KM_A', 'APT_CITY_DRIVE_DIST_KM_B', 'ROUTE_DRIVE_DIST_KM'], dico_color=dico_has_ended, nbins = 30)
    .update_xaxes(dtick = 10)
)

## airport to city drive time

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'IS_OPENING', value_vars= ['APT_CITY_DRIVE_TIME_H_A', 'APT_CITY_DRIVE_TIME_H_B', 'ROUTE_DRIVE_TIME_H'], title_x = 'time (h)', dico_color=dico_is_opening, nbins = 20)
    .update_xaxes(dtick = 0.2)
    # .update_yaxes(matches = None)

)

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'TAG_DURATION_OPENING', title_x = 'time (h)', dico_color=dico_tag_duration_opening, order=category_orders, value_vars= ['APT_CITY_DRIVE_TIME_H_A', 'APT_CITY_DRIVE_TIME_H_B', 'ROUTE_DRIVE_TIME_H'], nbins = 20)
    .update_xaxes(dtick = 0.2)
    # .update_yaxes(matches = None)

)

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'TAG_END_WITHOUT_COVID', title_x = 'time (h)', dico_color=dico_tag_end, order=category_orders, value_vars= ['APT_CITY_DRIVE_TIME_H_A', 'APT_CITY_DRIVE_TIME_H_B', 'ROUTE_DRIVE_TIME_H'], nbins = 20)
    .update_xaxes(dtick = 0.2)
    # .update_yaxes(matches = None)

)

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'HAS_ENDED_WITHOUT_COVID', title_x = 'time (h)', dico_color=dico_has_ended, value_vars= ['APT_CITY_DRIVE_TIME_H_A', 'APT_CITY_DRIVE_TIME_H_B', 'ROUTE_DRIVE_TIME_H'], nbins = 20)
    .update_xaxes(dtick = 0.2)
    # .update_yaxes(matches = None)

)

## elevation

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), dico_color=dico_is_opening, title_x = 'elevation (ft)', category = 'IS_OPENING', value_vars= ['ELEV_FT_A', 'ELEV_FT_B'], nbins = 50)
    .update_xaxes(dtick = 200)

)

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), dico_color=dico_is_opening, title_x = 'elevation log', category = 'IS_OPENING', value_vars= ['ELEV_LOG_FT_A', 'ELEV_LOG_FT_B'], nbins = 30)
    .update_xaxes(dtick = 0.5)

)

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'TAG_DURATION_OPENING', title_x = 'elevation (ft)', dico_color=dico_tag_duration_opening, order=category_orders, value_vars= ['ELEV_FT_A', 'ELEV_FT_B'], nbins = 50)
    .update_xaxes(dtick = 200)

)

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'TAG_DURATION_OPENING', title_x = 'elevation log', dico_color=dico_tag_duration_opening, order=category_orders, value_vars= ['ELEV_LOG_FT_A', 'ELEV_LOG_FT_B'], nbins = 30)
    .update_xaxes(dtick = 0.5)

)

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'TAG_END_WITHOUT_COVID', title_x = 'elevation (ft)', dico_color=dico_tag_end, order=category_orders,value_vars= ['ELEV_FT_A', 'ELEV_FT_B'], nbins = 50)
    .update_xaxes(dtick = 200)

)

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'TAG_END_WITHOUT_COVID', title_x = 'elevation log', dico_color=dico_tag_end, order=category_orders,value_vars= ['ELEV_LOG_FT_A', 'ELEV_LOG_FT_B'], nbins = 30)
    .update_xaxes(dtick = 0.5)

)

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'HAS_ENDED_WITHOUT_COVID', title_x = 'elevation (ft)', dico_color=dico_has_ended, order=category_orders,value_vars= ['ELEV_FT_A', 'ELEV_FT_B'], nbins = 50)
    .update_xaxes(dtick = 200)

)

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'HAS_ENDED_WITHOUT_COVID', title_x = 'elevation log', dico_color=dico_has_ended, order=category_orders,value_vars= ['ELEV_LOG_FT_A', 'ELEV_LOG_FT_B'], nbins = 30)
    .update_xaxes(dtick = 0.5)

)

## ratings

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'IS_OPENING', title_x = 'rating', dico_color=dico_is_opening, value_vars= ['RATING_A', 'RATING_B', 'ROUTE_RATING'], nbins = 80)
    .update_xaxes(dtick = 0.1)
    # .update_yaxes(matches = None)

)

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(),  category = 'TAG_DURATION_OPENING', title_x = 'rating', dico_color=dico_tag_duration_opening, order=category_orders, value_vars= ['RATING_A', 'RATING_B', 'ROUTE_RATING'], nbins = 80)
    .update_xaxes(dtick = 0.1)
    # .update_yaxes(matches = None)

)

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(),  category = 'TAG_END_WITHOUT_COVID', title_x = 'rating', dico_color=dico_tag_end, order=category_orders, value_vars= ['RATING_A', 'RATING_B', 'ROUTE_RATING'], nbins = 80)
    .update_xaxes(dtick = 0.1)
    # .update_yaxes(matches = None)

)

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(),  category = 'HAS_ENDED_WITHOUT_COVID', title_x = 'rating', dico_color=dico_has_ended, order=category_orders, value_vars= ['RATING_A', 'RATING_B', 'ROUTE_RATING'], nbins = 80)
    .update_xaxes(dtick = 0.1)
    # .update_yaxes(matches = None)

)

## time zone

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'IS_OPENING', dico_color=dico_is_opening, title_x='time zone', value_vars= ['TIME_ZONE_2016_A', 'TIME_ZONE_2016_B', 'TIME_ZONE_DIFF'], nbins = 30)
    .update_xaxes(dtick = 1)
)

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'TAG_DURATION_OPENING', title_x='time zone', dico_color=dico_tag_duration_opening, order=category_orders, value_vars= ['TIME_ZONE_2016_A', 'TIME_ZONE_2016_B', 'TIME_ZONE_DIFF'], nbins = 30)
    .update_xaxes(dtick = 1)
)

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'TAG_END_WITHOUT_COVID', dico_color= dico_tag_end, title_x='time zone', order=category_orders, value_vars= ['TIME_ZONE_2016_A', 'TIME_ZONE_2016_B', 'TIME_ZONE_DIFF'], nbins = 30)
    .update_xaxes(dtick = 1)
)

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'HAS_ENDED_WITHOUT_COVID', dico_color= dico_has_ended, title_x='time zone', order=category_orders, value_vars= ['TIME_ZONE_2016_A', 'TIME_ZONE_2016_B', 'TIME_ZONE_DIFF'], nbins = 30)
    .update_xaxes(dtick = 1)
)

## number of runways

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'IS_OPENING', title_x='number of runways', dico_color=dico_is_opening, value_vars= ['NB_RUNWAYS_A', 'NB_RUNWAYS_B'], nbins = 30)
    .update_xaxes(dtick = 1)
)

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'TAG_DURATION_OPENING', title_x='number of runways', dico_color=dico_tag_duration_opening, order=category_orders, value_vars= ['NB_RUNWAYS_A', 'NB_RUNWAYS_B'], nbins = 30)
    .update_xaxes(dtick = 1)
)

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'TAG_END_WITHOUT_COVID', title_x='number of runways', dico_color=dico_tag_end, order=category_orders, value_vars= ['NB_RUNWAYS_A', 'NB_RUNWAYS_B'], nbins = 30)
    .update_xaxes(dtick = 1)
)

In [None]:
(
    facet_distribution_plot(df_scheduled_enhanced_apt_metrics.to_pandas(), category = 'HAS_ENDED_WITHOUT_COVID', title_x='number of runways', dico_color=dico_has_ended, order=category_orders, value_vars= ['NB_RUNWAYS_A', 'NB_RUNWAYS_B'], nbins = 30)
    .update_xaxes(dtick = 1)
)

# Heatmap

## ratings

In [None]:
(
    plot_heatmap_by_group(
    df_scheduled_enhanced_apt_metrics,
    group_col="IS_OPENING",
    x_col="RATING_A",
    y_col="RATING_B",
    # normalize=True
    )
    .update_layout(height = 600)
    .update_xaxes(dtick = 0.1)
    .update_yaxes(dtick = 0.1)
)

In [None]:
(
    plot_heatmap_by_group(
    df_scheduled_enhanced_apt_metrics,
    group_col="TAG_DURATION_OPENING",
    x_col="RATING_A",
    y_col="RATING_B",
    # normalize=True
    )
    .update_layout(height = 600)
    .update_xaxes(dtick = 0.1)
    .update_yaxes(dtick = 0.1)
)

In [None]:
(
    plot_heatmap_by_group(
    df_scheduled_enhanced_apt_metrics,
    group_col="TAG_END_WITHOUT_COVID",
    x_col="RATING_A",
    y_col="RATING_B",
    # normalize=True
    )
    .update_layout(height = 500)
    .update_xaxes(dtick = 0.1)
    .update_yaxes(dtick = 0.1)
    .update_annotations(font=dict(size=11))
)

## number of runways

In [None]:
(
    plot_heatmap_by_group(
    df_scheduled_enhanced_apt_metrics,
    group_col="IS_OPENING",
    x_col="NB_RUNWAYS_A",
    y_col="NB_RUNWAYS_B",
    # normalize=True
    )
    .update_layout(height = 500)
    # .update_xaxes(dtick = 0.1)
    # .update_yaxes(dtick = 0.1)
)

In [None]:
(
    plot_heatmap_by_group(
    df_scheduled_enhanced_apt_metrics,
    group_col="TAG_DURATION_OPENING",
    x_col="NB_RUNWAYS_A",
    y_col="NB_RUNWAYS_B",
    # normalize=True
    )
    .update_layout(height = 500)
    .update_xaxes(dtick = 1)

)

In [None]:
(
    plot_heatmap_by_group(
    df_scheduled_enhanced_apt_metrics,
    group_col="TAG_END_WITHOUT_COVID",
    x_col="NB_RUNWAYS_A",
    y_col="NB_RUNWAYS_B",
    # normalize=True
    )
    .update_layout(height = 500)
    .update_xaxes(dtick = 1)
    .update_annotations(font=dict(size=11))

)

## is island

In [None]:
(
    plot_heatmap_by_group(
    df_scheduled_enhanced_apt_metrics,
    group_col="IS_OPENING",
    x_col="IS_ISLAND_A",
    y_col="IS_ISLAND_B",
    )
)

In [None]:
# (
#     plot_heatmap_by_group(
#     df_scheduled_enhanced_apt_metrics,
#     group_col="TAG_DURATION_OPENING",
#     x_col="IS_ISLAND_A",
#     y_col="IS_ISLAND_B",
#     )
# )

In [None]:
# (
#     plot_heatmap_by_group(
#     df_scheduled_enhanced_apt_metrics,
#     group_col="TAG_END_WITHOUT_COVID",
#     x_col="IS_ISLAND_A",
#     y_col="IS_ISLAND_B",
#     )
# )

## airport to city drive distance

In [None]:
bin_size = 10

df_binned = (
    df_scheduled_enhanced_apt_metrics
    .with_columns(APT_CITY_DRIVE_DIST_KM_A_BIN = d.APT_CITY_DRIVE_DIST_KM_A // bin_size * bin_size)
    .with_columns(APT_CITY_DRIVE_DIST_KM_B_BIN = d.APT_CITY_DRIVE_DIST_KM_B // bin_size * bin_size)
)

In [None]:
(
    plot_heatmap_by_group(
    df_binned,
    group_col="IS_OPENING",
    x_col="APT_CITY_DRIVE_DIST_KM_A_BIN",
    y_col="APT_CITY_DRIVE_DIST_KM_B_BIN",
    )
    # .update_layout(height = 500)
    # .update_xaxes(dtick = 10)
    # .update_yaxes(dtick = 10)

)

In [None]:
(
    plot_heatmap_by_group(
    df_binned,
    group_col="TAG_DURATION_OPENING",
    x_col="APT_CITY_DRIVE_DIST_KM_A_BIN",
    y_col="APT_CITY_DRIVE_DIST_KM_B_BIN",
    )
    # .update_layout(height = 500)
    # .update_xaxes(dtick = 10)
    # .update_yaxes(dtick = 10)

)

In [None]:
(
    plot_heatmap_by_group(
    df_binned,
    group_col="TAG_END_WITHOUT_COVID",
    x_col="APT_CITY_DRIVE_DIST_KM_A_BIN",
    y_col="APT_CITY_DRIVE_DIST_KM_B_BIN",
    )
    # .update_layout(height = 500)
    # .update_xaxes(dtick = 10)
    # .update_yaxes(dtick = 10)
    .update_annotations(font=dict(size=11))


)

## airport to city time distance

In [None]:
bin_size = 0.2

df_binned = (
    df_scheduled_enhanced_apt_metrics
    .with_columns(APT_CITY_DRIVE_TIME_H_A_BIN = (d.APT_CITY_DRIVE_TIME_H_A // bin_size * bin_size).round(1))
    .with_columns(APT_CITY_DRIVE_TIME_H_B_BIN = (d.APT_CITY_DRIVE_TIME_H_B // bin_size * bin_size).round(1))
)

In [None]:
(
    plot_heatmap_by_group(
    df_binned,
    group_col="IS_OPENING",
    x_col="APT_CITY_DRIVE_TIME_H_A_BIN",
    y_col="APT_CITY_DRIVE_TIME_H_B_BIN",
    )
    .update_xaxes(dtick = 0.2)
    .update_yaxes(dtick = 0.2)

)

In [None]:
(
    plot_heatmap_by_group(
    df_binned,
    group_col="TAG_DURATION_OPENING",
    x_col="APT_CITY_DRIVE_TIME_H_A_BIN",
    y_col="APT_CITY_DRIVE_TIME_H_B_BIN",
    )
    .update_xaxes(dtick = 0.2)
    .update_yaxes(dtick = 0.2)

)

In [None]:
(
    plot_heatmap_by_group(
    df_binned,
    group_col="TAG_END_WITHOUT_COVID",
    x_col="APT_CITY_DRIVE_TIME_H_A_BIN",
    y_col="APT_CITY_DRIVE_TIME_H_B_BIN",
    )
    .update_xaxes(dtick = 0.2)
    .update_yaxes(dtick = 0.2)
    .update_annotations(font=dict(size=11))
)

## elevation

In [None]:
bin_size = 500

df_binned = (
    df_scheduled_enhanced_apt_metrics
    .with_columns(ELEV_FT_A_BIN = d.ELEV_FT_A // bin_size * bin_size)
    .with_columns(ELEV_FT_B_BIN = d.ELEV_FT_B // bin_size * bin_size)
)

In [None]:
(
    plot_heatmap_by_group(
    df_binned,
    group_col="IS_OPENING",
    x_col="ELEV_FT_A_BIN",
    y_col="ELEV_FT_B_BIN",
    )
    # .update_xaxes(dtick = 500)
    # .update_yaxes(dtick = 500)

)

In [None]:
(
    plot_heatmap_by_group(
    df_binned,
    group_col="TAG_DURATION_OPENING",
    x_col="ELEV_FT_A_BIN",
    y_col="ELEV_FT_B_BIN",
    )
    # .update_xaxes(dtick = 500)
    # .update_yaxes(dtick = 500)

)

In [None]:
(
    plot_heatmap_by_group(
    df_binned,
    group_col="TAG_END_WITHOUT_COVID",
    x_col="ELEV_FT_A_BIN",
    y_col="ELEV_FT_B_BIN",
    )
    # .update_xaxes(dtick = 500)
    # .update_yaxes(dtick = 500)
    .update_annotations(font=dict(size=11))
)

## time zone

In [None]:
(
    plot_heatmap_by_group(
    df_scheduled_enhanced_apt_metrics,
    group_col="IS_OPENING",
    x_col="TIME_ZONE_2016_A",
    y_col="TIME_ZONE_2016_B",
    # normalize=True
    )
    .update_layout(height = 600)
    .update_xaxes(dtick = 1)
    .update_yaxes(dtick = 1)
)

In [None]:
(
    plot_heatmap_by_group(
    df_scheduled_enhanced_apt_metrics,
    group_col="TAG_DURATION_OPENING",
    x_col="TIME_ZONE_2016_A",
    y_col="TIME_ZONE_2016_B",
    # normalize=True
    )
    .update_layout(height = 580)
    .update_xaxes(dtick = 1)
    .update_yaxes(dtick = 1)
)

In [None]:
(
    plot_heatmap_by_group(
    df_scheduled_enhanced_apt_metrics,
    group_col="TAG_END_WITHOUT_COVID",
    x_col="TIME_ZONE_2016_A",
    y_col="TIME_ZONE_2016_B",
    # normalize=True
    )
    .update_layout(height = 500)
    .update_xaxes(dtick = 1)
    .update_yaxes(dtick = 1)
    .update_annotations(font=dict(size=11))
)

# Ideas

- Add range
- Add TO/LDG faisability