In [8]:
from traffic.core import Traffic
from preprocessing import get_complete_flights, preprocess_traffic, generate_aux_columns
import h5py
import numpy as np
import pandas as pd
import datetime
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler
from joblib import dump, load
import itertools
from tqdm.auto import tqdm

In [9]:
def seconds_till_arrival(flights_data: pd.DataFrame):
    time_till_arrival = flights_data["arrival_time"]- flights_data["timestamp"]
    seconds = time_till_arrival.dt.total_seconds()
    return seconds

In [10]:
def build_sequential(lr, input_dims, output_dims, layerSizes, activation, loss):
    model = keras.Sequential()
    model.add (keras.layers.Input(shape=input_dims))
    for size in layerSizes:
        model.add(keras.layers.Dense(size))
        model.add(keras.layers.LeakyReLU(alpha=0.05))

    model.add(keras.layers.Dense(output_dims,activation=activation))

    model.compile(optimizer=Adam(learning_rate=lr), loss=loss)

    return model

In [20]:
scaler = load('../trained_models/std_scaler_reg.bin')
file_0 = "../data/Frankfurt_LH_2301.h5"
file_1 = "Frankfurt_LH_2302.h5"
file_batch = [file_1,file_0]
def load_data_batch(file_batch, sample_fraction=1):
    first_day = True
    for file in file_batch:
        with h5py.File(file, 'r') as f:
            for key in tqdm(list(f.keys()),desc=file):

                new_flights = Traffic.from_file(file, key=key,
                                                parse_dates=["day", "firstseen", "hour", "last_position",
                                                             "lastseen", "timestamp"]).data

                if first_day:
                    df_flights = preprocess_traffic(new_flights)
                    df_flights = df_flights[
                        [
                            "distance",
                            "altitude",
                            "geoaltitude",
                            "arrival_time",
                            "timestamp",
                            "vertical_rate",
                            "groundspeed",
                        ]
                    ].dropna()
                    df_flights = df_flights.sample(frac=sample_fraction)
                    first_day = False
                else:
                    old_flights = pd.concat([old_flights,new_flights])
                    start = new_flights.day.min().replace(tzinfo=None)
                    end = start + datetime.timedelta(days=1)
                    relevant_time = [str(start), str(end)]
                    df_add_flights = preprocess_traffic(old_flights, relevant_time)
                    df_add_flights = df_add_flights[
                        [
                            "distance",
                            "altitude",
                            "geoaltitude",
                            "arrival_time",
                            "timestamp",
                            "vertical_rate",
                            "groundspeed",
                        ]
                    ].dropna()
                    del(old_flights)
                    df_add_flights = df_add_flights.sample(frac=sample_fraction)
                    df_flights = pd.concat([df_flights, df_add_flights])
                    del(df_add_flights)
                old_flights = new_flights


    return df_flights


In [21]:
model_file = '../trained_models/model_with_cycle_no_month'
model = load_model(model_file)
#model = build_sequential(lr=0.0001, input_dims=(X.shape[1],),output_dims=1, layerSizes=(1024, 512, 256),activation="relu",loss="MAE")
model.summary()
#model.save(model_file)

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_16 (Dense)            (None, 1024)              18432     
                                                                 
 leaky_re_lu_13 (LeakyReLU)  (None, 1024)              0         
                                                                 
 dense_17 (Dense)            (None, 512)               524800    
                                                                 
 leaky_re_lu_14 (LeakyReLU)  (None, 512)               0         
                                                                 
 dense_18 (Dense)            (None, 256)               131328    
                                                                 
 leaky_re_lu_15 (LeakyReLU)  (None, 256)               0         
                                                                 
 dense_19 (Dense)            (None, 1)                

In [22]:
df_eval = load_data_batch(file_batch)
df_eval = generate_aux_columns(df_eval)
df_eval.head(100)

OSError: Unable to open file (truncated file: eof = 2408693130, sblock->base_addr = 0, stored_eof = 2410789530)

In [16]:
df_eval =df_eval.sample(frac=1)
y = seconds_till_arrival(df_eval)
X = df_eval.drop(columns=["arrival_time", "timestamp"])
cols_numeric = ["distance", "altitude", "geoaltitude", "vertical_rate","groundspeed"]
X_numeric = X[cols_numeric]
X[cols_numeric] = scaler.transform(X_numeric)

In [17]:
def batch_generator(df: pd.DataFrame, y, batchsize, with_sample_weights = False, sample_weights=None ):
    # we want to penalize errors more strongly if the aircraft is far away from arrival and less severely
    # when nearer
    size = df.shape[0]
    i = 0
    while i < size:
        X_batch = df.iloc[i:i+batchsize,:]
        y_batch = y.iloc[i:i+batchsize].values
        if with_sample_weights:
            sample_batch = sample_weights.iloc[i:i+batchsize].values
            yield X_batch, y_batch, sample_batch
        else:
            yield X_batch, y_batch
        i += batchsize

    X_batch = df.iloc[i:,:]
    y_batch = y.iloc[i:].values
    if with_sample_weights:
        sample_batch = sample_weights.iloc[i:i+batchsize].values
        yield X_batch, y_batch, sample_batch
    else:
        yield X_batch, y_batch
gen = batch_generator(X, y, 256)
model.evaluate(gen)

  40808/Unknown - 94s 2ms/step - loss: 326.6957

KeyboardInterrupt: 