***
### Import of required libraries
***

In [14]:
import glob

import pandas as pd
from tqdm.auto import tqdm

from traffic.core import Traffic
from traffic.data import airports

***
### Procecessing of SAMAX data
***

In [None]:
# Path to folder containing the daily SAMAX files
samax_data_path = "/store/MIAR/01_sources/SAMAX/01_raw"

# Definition of columns to keep
cols2keep = {
    "Time [ms since 1.1.1970]": "timestamp",
    "WGS84 lat Float '[-]dd.dddddd' [deg]": "latitude",
    "WGS84 lon Float '[-]ddd.ddddd' [deg]": "longitude",
    "I081/090 Mode C - Float [ft]": "altitude",
    "NC-139 BDS50 Register GS - Int [kt]": "groundspeed",
    "NC-131 Aircraft ID (Downlinked Callsign) - String": "callsign",
    # Kept for reducing to Takeoffs on 28
    "NC-046 Target Direction - Int [ |1..6] (1=ARR/2=DEP/3=LOC/4=TRANS/5=MOV/6=UNK)": "direction",
    "NC-052 Allocated RWY - String": "RWY",
}

# List of daily csv files in the SAMAX folder
all_csv_files = glob.glob(f"{samax_data_path}/**/*.csv", recursive=True)

# List to store the read data
df_samax = []

# Read all csv files to a pandas dataframe, keeping only the columns defined in
# cols2keep and filtering for takeoffs on runway 28. Direction and RWY columns
# are removed.
for f in tqdm(all_csv_files):
    try:
        df_samax.append(
            pd.read_csv(
                f,
                sep=";",
                usecols=cols2keep.keys(),
                parse_dates=True,
            )
            .rename(columns=cols2keep)
            .query(
                "(direction == '1' and RWY == '14') | (direction == '2' and RWY == '28')"
            )
            .drop(columns=["direction", "RWY"])
        )
    # Skip files that raise an error
    except Exception as e:
        print(f"Error with file {f}: {e}")
        pass

# Concatenate all daily dataframes
print("concatenating")
df_samax = pd.concat(df_samax, ignore_index=True)

# # change type of some columns:
df_samax["timestamp"] = pd.to_datetime(
    df_samax["timestamp"], unit="ms", utc=True
)

# Convert to traffic, resample,  and reduce to 20nm radius around airport
t = Traffic(df_samax)
t = (
    t.assign_id()
    .resample("1s")
    .distance(airports["LSZH"])
    .query("distance < 20")
    .eval(max_workers=30, desc="Resampling and reduction to 20nm")
)
t.data.drop(
    columns="distance",
    axis=1,
    inplace=True,
)


# Remove part before the start of the take-off roll
def after_to_roll(flight):
    try:
        t0 = flight.data.query(
            "8.56695<longitude<8.5695 and 47.45659<latitude<47.457"
        ).timestamp.min()
        return flight.after(t0)
    except:
        return


t = (
    t.iterate_lazy()
    .pipe(after_to_roll)
    .eval(desc="processing", max_workers=30)
)

# Save traffic as parquet
t.to_parquet("/mnt/beegfs/store/krum/MT/inputs/samax_trajs.parquet")

***
### Processing of FZAG mass data
***

##### Import and processing of data

In [24]:
# Loading the FZAG mass data
fzag_data_path = "/mnt/beegfs/store/MIAR/01_sources/FZAG"
df_departures = (
    pd.read_csv(
        f"{fzag_data_path}/df_departure.csv", sep=",", header=0, index_col=0
    )
    .rename(
        columns={
            "SDT": "date",
            "CSG": "callsign",
            "TWT": "toff_weight_kg",
            "ITY": "typecode",
        }
    )
    .drop(columns=["REG"])
)

# Turning the date column into a datetime object
df_departures["date"] = pd.to_datetime(df_departures["date"])

# Dropping rows with NaN values in the toff_weight_kg column
df_departures = df_departures[df_departures["toff_weight_kg"].notna()]

# Add column with airline ICAO code
df_departures["ICAO"] = df_departures["callsign"].str.slice(0, 3)

# Removal of Swiss Air Force and invalid ICAO codes
df_departures = df_departures[
    df_departures["ICAO"].isin(["SWR", "EDW", "DLH", "ADR", "BEL"])
]

# Removal of rare typecodes
df_departures = df_departures[
    ~df_departures["typecode"].isin(["CRJ7", "CRJX", "B734"])
]

# Save the processed FZAG data
df_departures.to_parquet(f"/mnt/beegfs/store/krum/MT/inputs/df_mass.parquet")