In [None]:
import yaml
import os

def create_data_list(source_file):
    with open(source_file, "r") as my_file:
        sources = yaml.safe_load(my_file)
    return(sources)

sources = create_data_list(source_file = "sources.yml")

In [None]:
urls = create_data_list("sources.yml")

In [None]:
list(urls['airports'].values())

In [None]:
import pandas as pd

data = pd.read_csv(
    list(urls['airports'].values())[0],
    sep = ";",
    dtype = {
    "ANMOIS": "str",  
    "APT": "str",     
    "APT_NOM": "str", 
    "APT_ZON": "str",
}
)
data["an"] = data["ANMOIS"].str.slice(stop = 4)
data["mois"] = data["ANMOIS"].str.slice(start = 4)


In [None]:
def clean_dataframe(df):

    df["an"] = df["ANMOIS"].str.slice(stop = 4)
    df["mois"] = df["ANMOIS"].str.slice(start = 4)

    # Remove leading zeros from 'mois' column
    df['mois'] = df['mois'].str.replace(r'^0+', '', regex=True)
    
    data.columns = [x.lower() for x in data.columns]

    return(df)

In [None]:
def import_airport_data(list_files):
    # Define the data types for each column
    col_types = {
        "ANMOIS": "str",
        "APT": "str",     # equivalent to col_character()
        "APT_NOM": "str", # equivalent to col_character()
        "APT_ZON": "str", # equivalent to col_character()
    }

    # Read the CSV file(s) with the specified column types
    pax_apt_all = pd.concat([
        pd.read_csv(file, delimiter = ';', dtype = col_types)
        for file in list_files
        ])

    # Clean the DataFrame (assuming clean_dataframe is a predefined function)
    pax_apt_all = clean_dataframe(pax_apt_all)

    return pax_apt_all



def import_compagnies_data(list_files):
    # Define the data types for each column
    col_types = {
        "ANMOIS": "str",
        "CIE": "str",
        "CIE_NOM": "str",
        "CIE_NAT": "str",
        "CIE_PAYS": "str"
    }

    # Read the CSV file(s) with the specified column types
    pax_cie_all = pd.concat([
        pd.read_csv(file, delimiter = ';', dtype = col_types)
        for file in list_files
        ])

    # Clean the DataFrame (assuming clean_dataframe is a predefined function)
    pax_cie_all = clean_dataframe(pax_cie_all)


    return pax_cie_all


def import_liaisons_data(list_files):
    # Define the data types for each column
    col_types = {
        "ANMOIS": "str",
        "LSN": "str",
        "LSN_DEP_NOM": "str",
        "LSN_ARR_NOM": "str",
        "LSN_SCT": "str",
        "LSN_FSC": "str"
    }

    # Read the CSV file(s) with the specified column types
    pax_lsn_all = pd.concat([
        pd.read_csv(file, delimiter = ';', dtype = col_types)
        for file in list_files
        ])

    # Clean the DataFrame
    pax_lsn_all = clean_dataframe(pax_lsn_all)

    return pax_lsn_all


In [None]:
defpax_apt_all = import_airport_data(list(urls['airports'].values()))
pax_cie_all = import_compagnies_data(list(urls['compagnies'].values()))
pax_lsn_all = import_liaisons_data(list(urls['liaisons'].values()))


In [None]:
import geopandas as gpd

airports_location = gpd.read_file(urls['geojson']['airport'])

In [None]:
import folium 
m = folium.Map()

folium.GeoJson(airports_location).add_to(m)
m

In [31]:
import src.import_data as sid