In [1]:
from traffic.core import Traffic
from preprocessing import get_complete_flights, preprocess_traffic, generate_aux_columns
import h5py
import numpy as np
import pandas as pd
import datetime
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler
from joblib import dump, load
import itertools
from tqdm.auto import tqdm

In [2]:
def seconds_till_arrival(flights_data: pd.DataFrame):
    time_till_arrival = flights_data["arrival_time"]- flights_data["timestamp"]
    seconds = time_till_arrival.dt.total_seconds()
    return seconds

In [12]:
def build_sequential(lr, input_dims, output_dims, layerSizes, dropout_rate, activation, loss):
    model = keras.Sequential()
    model.add(keras.layers.Input(shape=input_dims))
    for size in layerSizes:
        model.add(keras.layers.Dense(size))
        model.add(keras.layers.LeakyReLU(alpha=0.05))
        model.add(keras.layers.Dropout(dropout_rate))  # Add dropout layer here

    model.add(keras.layers.Dense(output_dims,activation=activation))

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr), loss=loss)

    return model


In [4]:
scaler = load('../trained_models/std_scaler_reg.bin')
file_0 = "../data/Frankfurt_LH_2301.h5"
file_1 = "Frankfurt_LH_2302.h5"
file_2 = "Frankfurt_LH_2303.h5"
file_batch = [file_2]
def load_data_batch(file_batch, sample_fraction=1):
    first_day = True
    for file in file_batch:
        with h5py.File(file, 'r') as f:
            for key in tqdm(list(f.keys()),desc=file):

                new_flights = Traffic.from_file(file, key=key,
                                                parse_dates=["day", "firstseen", "hour", "last_position",
                                                             "lastseen", "timestamp"]).data

                if first_day:
                    df_flights = preprocess_traffic(new_flights)
                    df_flights = df_flights[
                        [
                            "distance",
                            "altitude",
                            "geoaltitude",
                            "arrival_time",
                            "timestamp",
                            "vertical_rate",
                            "groundspeed",
                            "latitude",
                            "longitude"
                        ]
                    ].dropna()
                    df_flights = df_flights.sample(frac=sample_fraction)
                    first_day = False
                else:
                    old_flights = pd.concat([old_flights,new_flights])
                    start = new_flights.day.min().replace(tzinfo=None)
                    end = start + datetime.timedelta(days=1)
                    relevant_time = [str(start), str(end)]
                    df_add_flights = preprocess_traffic(old_flights, relevant_time)
                    df_add_flights = df_add_flights[
                        [
                            "distance",
                            "altitude",
                            "geoaltitude",
                            "arrival_time",
                            "timestamp",
                            "vertical_rate",
                            "groundspeed",
                            "latitude",
                            "longitude"
                        ]
                    ].dropna()
                    del(old_flights)
                    df_add_flights = df_add_flights.sample(frac=sample_fraction)
                    df_flights = pd.concat([df_flights, df_add_flights])
                    del(df_add_flights)
                old_flights = new_flights


    return df_flights


In [13]:
model_file = '../trained_models/model_bering'
#model = load_model(model_file)
model = build_sequential(lr=0.0001, input_dims=(X.shape[1],),output_dims=1, layerSizes=(1024, 512, 256),dropout_rate =0.2, activation="relu",loss="MAE")
model.summary()
model.save(model_file)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 1024)              20480     
                                                                 
 leaky_re_lu_3 (LeakyReLU)   (None, 1024)              0         
                                                                 
 dropout (Dropout)           (None, 1024)              0         
                                                                 
 dense_5 (Dense)             (None, 512)               524800    
                                                                 
 leaky_re_lu_4 (LeakyReLU)   (None, 512)               0         
                                                                 
 dropout_1 (Dropout)         (None, 512)               0         
                                                                 
 dense_6 (Dense)             (None, 256)              

In [5]:
df_eval = load_data_batch(file_batch)
df_eval = generate_aux_columns(df_eval)
df_eval.head(100)

Frankfurt_LH_2303.h5:   0%|          | 0/20 [00:00<?, ?it/s]

Unnamed: 0,distance,altitude,geoaltitude,arrival_time,timestamp,vertical_rate,groundspeed,latitude,longitude,holiday,...,day_cos,bearing_sin,bearing_cos,weekday_0,weekday_1,weekday_2,weekday_3,weekday_4,weekday_5,weekday_6
58919,1640.489790,6425.0,6675.0,2023-03-01 15:13:33+00:00,2023-03-01 12:58:44+00:00,3776.0,258.0,41.394562,-8.677202,0,...,0.512371,-0.872648,-0.488350,0,0,1,0,0,0,0
54569,221.990447,22975.0,22875.0,2023-03-01 11:14:29+00:00,2023-03-01 10:49:15+00:00,64.0,419.0,50.586594,5.557203,0,...,0.512371,-0.955777,0.294093,0,0,1,0,0,0,0
131021,340.263502,39000.0,38725.0,2023-03-01 15:27:21+00:00,2023-03-01 14:48:30+00:00,0.0,416.0,51.774902,4.565339,0,...,0.512371,-0.808002,0.589180,0,0,1,0,0,0,0
102933,8.929258,925.0,1325.0,2023-03-01 17:32:36+00:00,2023-03-01 17:31:05+00:00,-704.0,118.0,50.023773,8.441981,0,...,0.512371,-0.985180,-0.171525,0,0,1,0,0,0,0
15265,983.163389,38000.0,37525.0,2023-03-01 04:10:53+00:00,2023-03-01 02:50:48+00:00,0.0,505.0,41.191727,8.796021,0,...,0.512371,0.019711,-0.999806,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33137,52.180264,13050.0,13225.0,2023-03-01 06:11:38+00:00,2023-03-01 05:53:18+00:00,-576.0,343.0,50.319946,9.151105,0,...,0.512371,0.796698,0.604377,0,0,1,0,0,0,0
43205,90.845876,16225.0,16275.0,2023-03-01 19:21:29+00:00,2023-03-01 19:03:12+00:00,-1536.0,378.0,49.247473,8.889096,0,...,0.512371,0.258645,-0.965972,0,0,1,0,0,0,0
121388,16.858724,11000.0,11225.0,2023-03-01 19:25:57+00:00,2023-03-01 18:58:28+00:00,0.0,357.0,49.971313,8.777498,0,...,0.512371,0.900048,-0.435792,0,0,1,0,0,0,0
67980,990.825340,37000.0,37000.0,2023-03-01 06:56:10+00:00,2023-03-01 05:33:11+00:00,0.0,457.0,51.746211,-5.331497,0,...,0.512371,-0.959417,0.281991,0,0,1,0,0,0,0


In [6]:
df_eval =df_eval.sample(frac=1)
y = seconds_till_arrival(df_eval)
X = df_eval.drop(columns=["arrival_time", "timestamp", "latitude", "longitude"])
cols_numeric = ["distance", "altitude", "geoaltitude", "vertical_rate","groundspeed"]
X_numeric = X[cols_numeric]
X[cols_numeric] = scaler.transform(X_numeric)

In [14]:
def batch_generator(df: pd.DataFrame, y, batchsize, with_sample_weights = False, sample_weights=None ):
    # we want to penalize errors more strongly if the aircraft is far away from arrival and less severely
    # when nearer
    size = df.shape[0]
    i = 0
    while i < size:
        X_batch = df.iloc[i:i+batchsize,:]
        y_batch = y.iloc[i:i+batchsize].values
        if with_sample_weights:
            sample_batch = sample_weights.iloc[i:i+batchsize].values
            yield X_batch, y_batch, sample_batch
        else:
            yield X_batch, y_batch
        i += batchsize

    X_batch = df.iloc[i:,:]
    y_batch = y.iloc[i:].values
    if with_sample_weights:
        sample_batch = sample_weights.iloc[i:i+batchsize].values
        yield X_batch, y_batch, sample_batch
    else:
        yield X_batch, y_batch
gen = batch_generator(X, y, 256)
model.fit(gen)

    109/Unknown - 4s 7ms/step - loss: 5136.1411


KeyboardInterrupt



In [15]:
model.save(model_file)

INFO:tensorflow:Assets written to: ../trained_models/model_bering\assets


In [11]:
X

Unnamed: 0,distance,altitude,geoaltitude,vertical_rate,groundspeed,holiday,sec_sin,sec_cos,day_sin,day_cos,bearing_sin,bearing_cos,weekday_0,weekday_1,weekday_2,weekday_3,weekday_4,weekday_5,weekday_6
29831,-0.638548,-1.247067,-1.312611,-0.307731,-0.259387,0,-0.861075,0.508478,0.914128,0.405426,-0.995308,-0.096755,0,0,1,0,0,0,0
105008,0.037724,0.421633,0.380266,0.030885,-0.249395,0,0.715185,0.698936,0.920971,0.389630,0.765358,-0.643605,0,0,0,1,0,0,0
47316,6.312349,-1.210835,-1.248729,0.611369,0.220242,0,-0.229130,0.973396,0.907014,0.421101,-0.602869,0.797841,0,1,0,0,0,0,0
200222,0.099721,0.594743,0.418196,0.442061,0.909708,0,0.988072,0.153992,0.891981,0.452072,0.106747,0.994286,0,0,0,0,0,0,1
171868,-0.654376,-1.836849,-1.839639,-0.138423,-1.528405,0,-0.714727,-0.699404,0.884068,0.467359,0.910519,0.413467,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
142383,-0.649714,-1.720100,-1.771764,0.006698,-2.357763,0,-0.752271,-0.658854,0.907014,0.421101,0.915109,0.403206,0,1,0,0,0,0,0
87080,0.176128,1.063750,0.989143,0.030885,0.520010,0,0.663437,0.748232,0.891981,0.452072,0.020683,-0.999786,0,0,0,0,0,0,1
16914,3.921060,0.822201,0.729622,0.030885,1.749058,0,0.295916,0.955214,0.858764,0.512371,-0.882447,0.470412,0,0,1,0,0,0,0
23735,0.716648,0.902718,0.799493,0.030885,0.060365,0,0.561783,0.827285,0.907014,0.421101,0.951200,-0.308574,0,1,0,0,0,0,0
