***
### Import of required libraries
***

In [None]:
import glob

import pandas as pd
from tqdm.auto import tqdm

from traffic.core import Traffic
from traffic.data import airports

t.to_parquet(
    "/mnt/beegfs/store/krum/MT/inputs/samax_trajs_columns_removed.parquet"
)

***
### Procecessing of SAMAX data
***

In [None]:
t = Traffic.from_file(
    "/mnt/beegfs/store/MIAR/01_sources/SAMAX/02_runway_type/to_28.parquet"
)

In [None]:
t.data = t.data[
    [
        "timestamp",
        "icao24",
        "latitude",
        "longitude",
        "altitude",
        "callsign",
        "flight_id",
    ]
]

In [None]:
t.to_parquet(
    "/mnt/beegfs/store/krum/MT/inputs/samax_trajs_columns_removed.parquet"
)

In [None]:
t = (
    t.resample("1s")
    .distance(airports["LSZH"])
    .query("distance < 30")
    .eval(max_workers=30, desc="Resampling and reduction to 30nm")
)
t.data.drop(
    columns="distance",
    axis=1,
    inplace=True,
)

In [None]:
t.to_parquet(
    "/mnt/beegfs/store/krum/MT/inputs/samax_trajs_radius_reduced.parquet"
)

In [None]:
# Remove part before the start of the take-off roll
def after_to_roll(flight):
    try:
        t0 = flight.data.query(
            "8.56695<longitude<8.5695 and 47.45659<latitude<47.457"
        ).timestamp.min()
        return flight.after(t0)
    except:
        return


t = (
    t.iterate_lazy()
    .pipe(after_to_roll)
    .eval(desc="processing", max_workers=30)
)

In [None]:
# Save traffic as parquet
t.to_parquet(
    "/mnt/beegfs/store/krum/MT/inputs/samax_trajs_toroll_crop.parquet"
)

In [None]:
traffic_fids = t.query("altitude>5000").data.flight_id.unique()
t = t[traffic_fids]

In [None]:
# Save traffic as parquet
t.to_parquet("/mnt/beegfs/store/krum/MT/inputs/samax_trajs.parquet")

***
### Processing of FZAG mass data
***

##### Import and processing of data

In [None]:
# Loading the FZAG mass data
fzag_data_path = "/mnt/beegfs/store/MIAR/01_sources/FZAG"
df_departures = (
    pd.read_csv(
        f"{fzag_data_path}/df_departure.csv", sep=",", header=0, index_col=0
    )
    .rename(
        columns={
            "SDT": "date",
            "CSG": "callsign",
            "TWT": "toff_weight_kg",
            "ITY": "typecode",
        }
    )
    .drop(columns=["REG"])
)

# Turning the date column into a datetime object
df_departures["date"] = pd.to_datetime(df_departures["date"])

# Dropping rows with NaN values in the toff_weight_kg column
df_departures = df_departures[df_departures["toff_weight_kg"].notna()]

# Add column with airline ICAO code
df_departures["ICAO"] = df_departures["callsign"].str.slice(0, 3)

# Removal of Swiss Air Force and invalid ICAO codes
df_departures = df_departures[
    df_departures["ICAO"].isin(["SWR", "EDW", "DLH", "ADR", "BEL"])
]

# Removal of rare typecodes
df_departures = df_departures[
    ~df_departures["typecode"].isin(["CRJ7", "CRJX", "B734"])
]

# Save the processed FZAG data
df_departures.to_parquet("/mnt/beegfs/store/krum/MT/inputs/df_mass.parquet")