In [1]:
from traffic.core import Traffic
from preprocessing import get_complete_flights, preprocess_traffic, generate_aux_columns
import h5py
import numpy as np
import pandas as pd
import datetime
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler
from joblib import dump, load
import itertools

In [2]:
def seconds_till_arrival(flights_data: pd.DataFrame):
    time_till_arrival = flights_data["arrival_time"]- flights_data["timestamp"]
    seconds = time_till_arrival.dt.total_seconds()
    return seconds

In [3]:
def build_sequential(lr, input_dims, output_dims, layerSizes, activation, loss):
    model = keras.Sequential()
    model.add (keras.layers.Input(shape=input_dims))
    for size in layerSizes:
        model.add(keras.layers.Dense(size))
        model.add(keras.layers.LeakyReLU(alpha=0.05))

    model.add(keras.layers.Dense(output_dims,activation=activation))

    model.compile(optimizer=Adam(learning_rate=lr), loss=loss)

    return model

In [4]:
scaler = load('../trained_models/std_scaler_reg.bin')
file = "../data/Frankfurt_LH_2301.h5"

with h5py.File(file, 'r') as f:
    first = True
    for key in list(f.keys()):
        print(key)
        try:
            flights = Traffic.from_file(file, key=key,
                                    parse_dates=["day", "firstseen", "hour", "last_position",
                                                 "lastseen", "timestamp"])
        except:
            continue

        try:
            df_flights = preprocess_traffic(flights)
        except AttributeError:
            continue

        df = df_flights[
            ["distance", "altitude", "geoaltitude", "arrival_time", "timestamp", "vertical_rate",
             "groundspeed"]].dropna()

        if not first:
            df_eval = pd.concat([df, df_eval])
        else:
            df_eval = df
            first = False


LH_230101
LH_230103
LH_230104
LH_230105
LH_230106
LH_230107
LH_230108
LH_230109
LH_230110
LH_230111
LH_230112
LH_230113
LH_230114
LH_230115
LH_230116
LH_230117
LH_230118
LH_230119
LH_230120
LH_230121
LH_230122
LH_230123
LH_230124
LH_230125
LH_230126
LH_230127
LH_230128
LH_230129
LH_230130
LH_230131


In [9]:
model_file = '../trained_models/model_with_cycle_no_month'
model = load_model(model_file)
#model = build_sequential(lr=0.0001, input_dims=(X.shape[1],),output_dims=1, layerSizes=(1024, 512, 256),activation="relu",loss="MAE")
model.summary()
#model.save(model_file)

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_16 (Dense)            (None, 1024)              18432     
                                                                 
 leaky_re_lu_13 (LeakyReLU)  (None, 1024)              0         
                                                                 
 dense_17 (Dense)            (None, 512)               524800    
                                                                 
 leaky_re_lu_14 (LeakyReLU)  (None, 512)               0         
                                                                 
 dense_18 (Dense)            (None, 256)               131328    
                                                                 
 leaky_re_lu_15 (LeakyReLU)  (None, 256)               0         
                                                                 
 dense_19 (Dense)            (None, 1)                

In [6]:
df_eval = generate_aux_columns(df_eval)
df_eval.head(100)

Unnamed: 0,distance,altitude,geoaltitude,arrival_time,timestamp,vertical_rate,groundspeed,holiday,sec_sin,sec_cos,day_sin,day_cos,weekday_0,weekday_1,weekday_2,weekday_3,weekday_4,weekday_5,weekday_6
34757,300.980610,350.0,600.0,2023-01-31 14:01:12+00:00,2023-01-31 13:25:40+00:00,192.0,119.0,0,-0.365148,-0.930950,0.508671,0.860961,0,1,0,0,0,0,0
34758,300.749698,425.0,600.0,2023-01-31 14:01:12+00:00,2023-01-31 13:25:41+00:00,192.0,119.0,0,-0.365215,-0.930923,0.508671,0.860961,0,1,0,0,0,0,0
34759,300.682789,475.0,600.0,2023-01-31 14:01:12+00:00,2023-01-31 13:25:42+00:00,2048.0,130.0,0,-0.365283,-0.930897,0.508671,0.860961,0,1,0,0,0,0,0
34760,300.619171,500.0,650.0,2023-01-31 14:01:12+00:00,2023-01-31 13:25:43+00:00,2368.0,130.0,0,-0.365351,-0.930870,0.508671,0.860961,0,1,0,0,0,0,0
34761,300.588307,525.0,700.0,2023-01-31 14:01:12+00:00,2023-01-31 13:25:44+00:00,2496.0,130.0,0,-0.365418,-0.930843,0.508671,0.860961,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34852,293.801747,5450.0,5450.0,2023-01-31 14:01:12+00:00,2023-01-31 13:27:15+00:00,3136.0,215.0,0,-0.371570,-0.928405,0.508671,0.860961,0,1,0,0,0,0,0
34853,293.712047,5500.0,5500.0,2023-01-31 14:01:12+00:00,2023-01-31 13:27:16+00:00,3200.0,217.0,0,-0.371638,-0.928378,0.508671,0.860961,0,1,0,0,0,0,0
34854,293.588607,5575.0,5575.0,2023-01-31 14:01:12+00:00,2023-01-31 13:27:17+00:00,3264.0,217.0,0,-0.371705,-0.928351,0.508671,0.860961,0,1,0,0,0,0,0
34855,293.500165,5625.0,5600.0,2023-01-31 14:01:12+00:00,2023-01-31 13:27:18+00:00,3328.0,219.0,0,-0.371773,-0.928324,0.508671,0.860961,0,1,0,0,0,0,0


In [7]:
y = seconds_till_arrival(df_eval)
X = df_eval.drop(columns=["arrival_time", "timestamp"])
cols_numeric = ["distance", "altitude", "geoaltitude", "vertical_rate","groundspeed"]
X_numeric = X[cols_numeric]
X[cols_numeric] = scaler.transform(X_numeric)

In [10]:
def batch_generator(df: pd.DataFrame, y, batchsize, with_sample_weights = False, sample_weights=None ):
    # we want to penalize errors more strongly if the aircraft is far away from arrival and less severely
    # when nearer
    size = df.shape[0]
    i = 0
    while i < size:
        X_batch = df.iloc[i:i+batchsize,:]
        y_batch = y.iloc[i:i+batchsize].values
        if with_sample_weights:
            sample_batch = sample_weights.iloc[i:i+batchsize].values
            yield X_batch, y_batch, sample_batch
        else:
            yield X_batch, y_batch
        i += batchsize

    X_batch = df.iloc[i:,:]
    y_batch = y.iloc[i:].values
    if with_sample_weights:
        sample_batch = sample_weights.iloc[i:i+batchsize].values
        yield X_batch, y_batch, sample_batch
    else:
        yield X_batch, y_batch
gen = batch_generator(X, y, 256)
model.evaluate(gen)



279.6767578125