In [None]:
import yaml
import os

def create_data_list(source_file):
    with open(source_file, "r") as my_file:
        sources = yaml.safe_load(my_file)
    return(sources)

sources = create_data_list(source_file = "sources.yml")

In [None]:
urls = create_data_list("sources.yml")

In [None]:
list(urls['airports'].values())

In [None]:
import pandas as pd

data = pd.read_csv(
    list(urls['airports'].values())[0],
    sep = ";",
    dtype = {
    "ANMOIS": "str",  
    "APT": "str",     
    "APT_NOM": "str", 
    "APT_ZON": "str",
}
)
data["an"] = data["ANMOIS"].str.slice(stop = 4)
data["mois"] = data["ANMOIS"].str.slice(start = 4)


In [None]:
def clean_dataframe(df):

    df["an"] = df["ANMOIS"].str.slice(stop = 4)
    df["mois"] = df["ANMOIS"].str.slice(start = 4)

    # Remove leading zeros from 'mois' column
    df['mois'] = df['mois'].str.replace(r'^0+', '', regex=True)
    
    df.columns = [x.lower() for x in df.columns]

    return(df)

In [None]:
def import_airport_data(list_files):
    # Define the data types for each column
    col_types = {
        "ANMOIS": "str",
        "APT": "str",     # equivalent to col_character()
        "APT_NOM": "str", # equivalent to col_character()
        "APT_ZON": "str", # equivalent to col_character()
    }

    # Read the CSV file(s) with the specified column types
    pax_apt_all = pd.concat([
        pd.read_csv(file, delimiter = ';', dtype = col_types)
        for file in list_files
        ])

    # Clean the DataFrame (assuming clean_dataframe is a predefined function)
    pax_apt_all = clean_dataframe(pax_apt_all)

    return pax_apt_all



def import_compagnies_data(list_files):
    # Define the data types for each column
    col_types = {
        "ANMOIS": "str",
        "CIE": "str",
        "CIE_NOM": "str",
        "CIE_NAT": "str",
        "CIE_PAYS": "str"
    }

    # Read the CSV file(s) with the specified column types
    pax_cie_all = pd.concat([
        pd.read_csv(file, delimiter = ';', dtype = col_types)
        for file in list_files
        ])

    # Clean the DataFrame (assuming clean_dataframe is a predefined function)
    pax_cie_all = clean_dataframe(pax_cie_all)


    return pax_cie_all


def import_liaisons_data(list_files):
    # Define the data types for each column
    col_types = {
        "ANMOIS": "str",
        "LSN": "str",
        "LSN_DEP_NOM": "str",
        "LSN_ARR_NOM": "str",
        "LSN_SCT": "str",
        "LSN_FSC": "str"
    }

    # Read the CSV file(s) with the specified column types
    pax_lsn_all = pd.concat([
        pd.read_csv(file, delimiter = ';', dtype = col_types)
        for file in list_files
        ])

    # Clean the DataFrame
    pax_lsn_all = clean_dataframe(pax_lsn_all)

    return pax_lsn_all


In [None]:
pax_apt_all = import_airport_data(list(urls['airports'].values()))
pax_cie_all = import_compagnies_data(list(urls['compagnies'].values()))
pax_lsn_all = import_liaisons_data(list(urls['liaisons'].values()))


In [None]:
import geopandas as gpd

airports_location = gpd.read_file(urls['geojson']['airport'])

In [None]:
import folium 
m = folium.Map()

folium.GeoJson(airports_location).add_to(m)
m

In [None]:
import src.import_data as sid

In [None]:
pax_apt_all.head()

In [None]:
liste_aeroports = pax_apt_all['apt'].unique()
default_airport = liste_aeroports[0]
default_airport

In [None]:
import pandas as pd
import geopandas as gpd
import plotly.express as px
from plotnine import ggplot, geom_line, aes

import src.import_data as sid
from src.create_data_list import create_data_list

# Load data ----------------------------------
urls = create_data_list("./sources.yml")


pax_apt_all = sid.import_airport_data(urls['airports'].values())
pax_cie_all = sid.import_airport_data(urls['compagnies'].values())
pax_lsn_all = sid.import_airport_data(urls['liaisons'].values())


airports_location = gpd.read_file(
    urls['geojson']['airport']
)


liste_aeroports = pax_apt_all['apt'].unique()
default_airport = liste_aeroports[0]


pax_apt_all['trafic'] = pax_apt_all['apt_pax_dep'] + \
  pax_apt_all['apt_pax_tr'] + \
  pax_apt_all['apt_pax_arr']

trafic_aeroports = (
  pax_apt_all
  .loc[pax_apt_all['apt'] == default_airport]
)
trafic_aeroports['date'] = pd.to_datetime(
  trafic_aeroports['anmois'] + '01', format='%Y%m%d'
)


# VALORISATIONS ----------------------------------------------

from src.figures import plot_airport_line

figure_plotly = plot_airport_line(trafic_aeroports, default_airport)



In [None]:
pax_apt_all.head()

In [None]:
YEARS_LIST = [str(year) for year in range(2018, 2023)]
MONTHS_LIST = list(str(range(1, 13)))


def create_data_from_input(df, annee, mois):
    mois = str(mois)
    data = df.query("an == @annee").query("mois == @mois")
    return(data)

def summary_stat_airport(data):
    table2 = (
        data
        .groupby(["apt", "apt_nom"])
        .agg({"apt_pax_dep": "sum", "apt_pax_arr": "sum", "apt_pax_tr": "sum", "trafic": "sum"})
        .sort_values("trafic", ascending=False)
        .reset_index()
    )
    table2.columns = table2.columns.str.replace("apt_pax_", "pax")
    return table2


In [None]:
test = create_data_from_input(df = pax_apt_all, annee = "2018", mois = 9)
test.head()

stats_aeroports = summary_stat_airport(pax_apt_all)
stats_aeroports.head()

In [None]:
stats_aeroports['name_clean'] = stats_aeroports['apt_nom'].str.title() + " _(" + stats_aeroports['apt'] + ")_"

In [None]:
stats_aeroports.columns

In [None]:
from great_tables import GT, md

(
    GT(stats_aeroports.head(15)[["name_clean", 'paxdep', 'paxarr', 'paxtr', 'trafic']])
     .fmt_number(columns= ['paxdep', 'paxarr', 'paxtr', 'trafic'], compact=True)
     .fmt_markdown(columns = "name_clean")
     .tab_header(title = md("__Statistiques de fréquentation__"), subtitle = "Classement des aéroports")
     .cols_label(
        name_clean = md("__Aéroport__"),
        paxdep = md("__Départs__"),
        paxarr = md("__Arrivée__"),
        paxtr = md("__Transit__"),
        trafic = md("__Total__")
     )
     .tab_source_note(
      source_note = md("_Source: DGAC, à partir des données sur data.gouv.fr_")
      )
)

In [None]:
month = 1
year = 2019

In [None]:
pax_apt_all

In [None]:
airports_location.columns

In [None]:
trafic_date = pax_apt_all.loc[(pax_apt_all["an"] == str(year)) & (pax_apt_all["mois"] == str(month)), :]

trafic_aeroports = airports_location[["Code.OACI", "geometry"]].merge(trafic_date,  left_on='Code.OACI', right_on='apt')
trafic_aeroports['volume'] = pd.qcut(trafic_aeroports['trafic'], 3, labels=False) + 1

trafic_aeroports

In [89]:
df = pax_apt_all
trafic_date = df.loc[
        (df['mois'].astype(int) == month) & (df['an'].astype(int) == int(year))
    ]
print(trafic_date.shape)


(89, 16)


In [90]:
import folium

def map_leaflet_airport(df, airports_location, month, year):

    df['date'] = pd.to_datetime(df['anmois'] + '01', format='%Y%m%d')
    print(df.shape)

    # Filter by month and year
    trafic_date = df.loc[
        (df['mois'].astype(int) == month) & (df['an'].astype(int) == int(year)), :
    ]
    print(trafic_date.shape)
 
    # Perform an inner join with airport locations
    trafic_aeroports = airports_location.merge(trafic_date, left_on="Code.OACI", right_on="apt", suffixes = ["_x", ""])
    print(trafic_aeroports.shape)
    trafic_aeroports['date'] = trafic_aeroports['date'].dt.strftime('%Y-%m-%d')

    palette = ['green', 'blue', 'red']  # Define your color palette

    trafic_aeroports['volume'] = pd.qcut(trafic_aeroports['trafic'], 3, labels=False) + 1
    trafic_aeroports['color']  = trafic_aeroports['volume'].apply(lambda x: palette[x-1])

    m = folium.Map()

    # Iterate over each point in the GeoDataFrame
    for idx, row in trafic_aeroports.iterrows():
        # Extract the coordinates and other properties
        coord = row['geometry']
        name = row['Nom']
        code_oaci = row['Code.OACI']
        trafic = int(row['trafic'])
        color = row['color']
        
        # Create the popup content
        popup_content = f"{name} ({code_oaci}) : {trafic} voyageurs"
        
        # Add a marker with the specified icon and color
        folium.Marker(
            location=[coord.y, coord.x],
            popup=folium.Popup(popup_content, parse_html=True),
            icon=folium.Icon(icon="plane", prefix='fa', color=color)
        ).add_to(m)

    return m

m = map_leaflet_airport(pax_apt_all, airports_location, month, year)
m

(4804, 16)
(89, 16)
(63, 36)


In [92]:
pd.to_datetime("2019-01-01").year

2019