In [1]:
from traffic.core import Traffic
from preprocessing import get_complete_flights, preprocess_traffic, generate_aux_columns
import h5py
import numpy as np
import pandas as pd
import datetime
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler
from joblib import dump, load
import itertools
from tqdm.auto import tqdm

In [2]:
def seconds_till_arrival(flights_data: pd.DataFrame):
    time_till_arrival = flights_data["arrival_time"]- flights_data["timestamp"]
    seconds = time_till_arrival.dt.total_seconds()
    return seconds

In [12]:
def build_sequential(lr, input_dims, output_dims, layerSizes, dropout_rate, activation, loss):
    model = keras.Sequential()
    model.add(keras.layers.Input(shape=input_dims))
    for size in layerSizes:
        model.add(keras.layers.Dense(size))
        model.add(keras.layers.LeakyReLU(alpha=0.05))
        model.add(keras.layers.Dropout(dropout_rate))  # Add dropout layer here

    model.add(keras.layers.Dense(output_dims,activation=activation))

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr), loss=loss)

    return model


In [7]:
scaler = load('../trained_models/std_scaler_reg.bin')
file_0 = "../data/Frankfurt_LH_2301.h5"
file_1 = "Frankfurt_LH_2302.h5"
file_2 = "../data/Frankfurt_LH_2303.h5"
file_batch = [file_1]
def load_data_batch(file_batch, sample_fraction=1):
    first_day = True
    for file in file_batch:
        with h5py.File(file, 'r') as f:
            for key in tqdm(list(f.keys()),desc=file):

                new_flights = Traffic.from_file(file, key=key,
                                                parse_dates=["day", "firstseen", "hour", "last_position",
                                                             "lastseen", "timestamp"]).data

                if first_day:
                    df_flights = preprocess_traffic(new_flights)
                    df_flights = df_flights[
                        [
                            "distance",
                            "altitude",
                            "geoaltitude",
                            "arrival_time",
                            "timestamp",
                            "vertical_rate",
                            "groundspeed",
                            "track",
                            "latitude",
                            "longitude"
                        ]
                    ].dropna()
                    df_flights = df_flights.sample(frac=sample_fraction)
                    first_day = False
                else:
                    old_flights = pd.concat([old_flights,new_flights])
                    start = new_flights.day.min().replace(tzinfo=None)
                    end = start + datetime.timedelta(days=1)
                    relevant_time = [str(start), str(end)]
                    df_add_flights = preprocess_traffic(old_flights, relevant_time)
                    df_add_flights = df_add_flights[
                        [
                            "distance",
                            "altitude",
                            "geoaltitude",
                            "arrival_time",
                            "timestamp",
                            "vertical_rate",
                            "groundspeed",
                            "track",
                            "latitude",
                            "longitude"
                        ]
                    ].dropna()
                    del(old_flights)
                    df_add_flights = df_add_flights.sample(frac=sample_fraction)
                    df_flights = pd.concat([df_flights, df_add_flights])
                    del(df_add_flights)
                old_flights = new_flights


    return df_flights


In [13]:
model_file = '../trained_models/model_bering'
#model = load_model(model_file)
model = build_sequential(lr=0.0001, input_dims=(X.shape[1],),output_dims=1, layerSizes=(1024, 512, 256),dropout_rate =0.2, activation="relu",loss="MAE")
model.summary()
model.save(model_file)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 1024)              20480     
                                                                 
 leaky_re_lu_3 (LeakyReLU)   (None, 1024)              0         
                                                                 
 dropout (Dropout)           (None, 1024)              0         
                                                                 
 dense_5 (Dense)             (None, 512)               524800    
                                                                 
 leaky_re_lu_4 (LeakyReLU)   (None, 512)               0         
                                                                 
 dropout_1 (Dropout)         (None, 512)               0         
                                                                 
 dense_6 (Dense)             (None, 256)              

In [9]:
df_eval = load_data_batch(file_batch)
df_eval = generate_aux_columns(df_eval)
df_eval.head(100)

Frankfurt_LH_2302.h5:   0%|          | 0/24 [00:00<?, ?it/s]

Unnamed: 0,distance,altitude,geoaltitude,arrival_time,timestamp,vertical_rate,groundspeed,track,latitude,longitude,...,bearing_cos,track_sin,track_cos,weekday_0,weekday_1,weekday_2,weekday_3,weekday_4,weekday_5,weekday_6
0,794.201342,36025.0,36325.0,2023-02-01 10:56:31+00:00,2023-02-01 09:36:50+00:00,64.0,459.0,42.878904,43.158068,5.739310,...,-0.957310,0.680451,0.732793,0,0,1,0,0,0,0
1,267.538137,34000.0,33525.0,2023-02-01 15:40:07+00:00,2023-02-01 14:56:36+00:00,0.0,322.0,25.769328,47.630081,8.565491,...,-1.000000,0.434749,0.900552,0,0,1,0,0,0,0
2,1715.137033,31525.0,32275.0,2023-02-01 19:16:16+00:00,2023-02-01 16:46:51+00:00,1280.0,431.0,45.000000,39.803970,-7.867465,...,-0.577195,0.707107,0.707107,0,0,1,0,0,0,0
3,4.958587,775.0,1125.0,2023-02-01 19:27:41+00:00,2023-02-01 19:26:22+00:00,-512.0,117.0,249.558203,50.055130,8.629110,...,0.392781,-0.937027,-0.349256,0,0,1,0,0,0,0
4,1191.571282,38000.0,38600.0,2023-02-01 04:20:20+00:00,2023-02-01 02:22:53+00:00,0.0,414.0,21.673001,39.614372,4.996826,...,-0.966226,0.369309,0.929307,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,5915.240554,35000.0,34600.0,2023-02-01 07:04:19+00:00,2023-02-01 01:04:18+00:00,0.0,609.0,62.004955,42.043886,-71.133536,...,0.409900,0.882988,0.469395,0,0,1,0,0,0,0
96,635.204801,36000.0,36025.0,2023-02-01 15:25:29+00:00,2023-02-01 14:26:04+00:00,64.0,458.0,59.680472,47.750061,0.591202,...,-0.350724,0.863224,0.504822,0,0,1,0,0,0,0
97,17.987046,4000.0,4250.0,2023-02-01 14:46:36+00:00,2023-02-01 14:41:06+00:00,-640.0,143.0,249.537728,50.107407,8.792749,...,0.432513,-0.936903,-0.349591,0,0,1,0,0,0,0
98,636.198239,36000.0,36100.0,2023-02-01 10:56:31+00:00,2023-02-01 09:48:56+00:00,0.0,433.0,21.113500,44.467987,6.609650,...,-0.969745,0.360217,0.932869,0,0,1,0,0,0,0


In [12]:
df_eval =df_eval.sample(frac=1)
y = seconds_till_arrival(df_eval)
X = df_eval.drop(columns=["arrival_time", "timestamp", "track", "latitude", "longitude"])
cols_numeric = ["distance", "altitude", "geoaltitude", "vertical_rate","groundspeed"]
X_numeric = X[cols_numeric]
X[cols_numeric] = scaler.transform(X_numeric)
X

Unnamed: 0,distance,altitude,geoaltitude,vertical_rate,groundspeed,holiday,sec_sin,sec_cos,day_sin,day_cos,...,bearing_cos,track_sin,track_cos,weekday_0,weekday_1,weekday_2,weekday_3,weekday_4,weekday_5,weekday_6
1503736,-0.547639,-0.210420,-0.206651,-0.718908,-0.519186,0,-0.986008,0.166697,0.746972,0.664855,...,-0.470794,-0.822192,0.569210,0,0,0,0,0,1,0
1780326,-0.659943,-1.997881,-1.981377,-0.331918,-2.487663,0,-0.937611,0.347686,0.840618,0.541628,...,-0.242212,0.936056,0.351851,1,0,0,0,0,0,0
625010,2.301439,0.741685,0.753577,0.030885,-0.309348,0,0.046743,0.998907,0.594727,0.803928,...,-0.328020,-0.644970,0.764208,1,0,0,0,0,0,0
931112,5.277457,0.661169,0.649769,0.030885,1.539221,0,-0.024214,0.999707,0.622047,0.782980,...,0.564241,0.622272,0.782801,0,0,1,0,0,0,0
1004953,0.060613,0.580653,0.577902,0.030885,0.310172,0,0.986691,-0.162608,0.635432,0.772157,...,-0.473804,0.359757,0.933046,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
371601,-0.545064,-0.305026,-0.316449,0.030885,-0.209426,0,-0.720198,-0.693769,0.580800,0.814046,...,-0.245669,0.949472,0.313853,0,0,0,0,0,0,1
245100,-0.306689,0.612859,0.615832,0.393688,0.769816,0,-0.997086,0.076284,0.566702,0.823923,...,0.290031,0.974072,-0.226236,0,0,0,0,0,1,0
1070550,-0.168961,0.582665,0.533983,0.006698,0.360133,0,0.790378,0.612620,0.648630,0.761104,...,-0.538025,-0.775693,0.631111,0,0,0,0,1,0,0
997013,0.096802,0.739672,0.667736,0.030885,-0.119495,0,-0.169278,-0.985568,0.635432,0.772157,...,0.785695,-0.636881,-0.770962,0,0,0,1,0,0,0


In [8]:
def batch_generator(df: pd.DataFrame, y, batchsize, with_sample_weights = False, sample_weights=None ):
    size = df.shape[0]
    i = 0
    while i < size:
        X_batch = df.iloc[i:i+batchsize,:]
        y_batch = y.iloc[i:i+batchsize].values
        if with_sample_weights:
            sample_batch = sample_weights.iloc[i:i+batchsize].values
            yield X_batch, y_batch, sample_batch
        else:
            yield X_batch, y_batch
        i += batchsize

    X_batch = df.iloc[i:,:]
    y_batch = y.iloc[i:].values
    if with_sample_weights:
        sample_batch = sample_weights.iloc[i:i+batchsize].values
        yield X_batch, y_batch, sample_batch
    else:
        yield X_batch, y_batch
gen = batch_generator(X, y, 256)
model.fit(gen)

NameError: name 'X' is not defined

In [15]:
model.save(model_file)

INFO:tensorflow:Assets written to: ../trained_models/model_bering\assets


In [11]:
X

Unnamed: 0,distance,altitude,geoaltitude,vertical_rate,groundspeed,holiday,sec_sin,sec_cos,day_sin,day_cos,bearing_sin,bearing_cos,weekday_0,weekday_1,weekday_2,weekday_3,weekday_4,weekday_5,weekday_6
29831,-0.638548,-1.247067,-1.312611,-0.307731,-0.259387,0,-0.861075,0.508478,0.914128,0.405426,-0.995308,-0.096755,0,0,1,0,0,0,0
105008,0.037724,0.421633,0.380266,0.030885,-0.249395,0,0.715185,0.698936,0.920971,0.389630,0.765358,-0.643605,0,0,0,1,0,0,0
47316,6.312349,-1.210835,-1.248729,0.611369,0.220242,0,-0.229130,0.973396,0.907014,0.421101,-0.602869,0.797841,0,1,0,0,0,0,0
200222,0.099721,0.594743,0.418196,0.442061,0.909708,0,0.988072,0.153992,0.891981,0.452072,0.106747,0.994286,0,0,0,0,0,0,1
171868,-0.654376,-1.836849,-1.839639,-0.138423,-1.528405,0,-0.714727,-0.699404,0.884068,0.467359,0.910519,0.413467,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
142383,-0.649714,-1.720100,-1.771764,0.006698,-2.357763,0,-0.752271,-0.658854,0.907014,0.421101,0.915109,0.403206,0,1,0,0,0,0,0
87080,0.176128,1.063750,0.989143,0.030885,0.520010,0,0.663437,0.748232,0.891981,0.452072,0.020683,-0.999786,0,0,0,0,0,0,1
16914,3.921060,0.822201,0.729622,0.030885,1.749058,0,0.295916,0.955214,0.858764,0.512371,-0.882447,0.470412,0,0,1,0,0,0,0
23735,0.716648,0.902718,0.799493,0.030885,0.060365,0,0.561783,0.827285,0.907014,0.421101,0.951200,-0.308574,0,1,0,0,0,0,0
