In [1]:
import gudhi
import numpy as np
import pandas as pd

from functions.data_loading import get_filtered_data_range, get_data_range, get_flight_persistence, get_wind_direction
from functions.data_filtering import complete_flight_filter
from functions.data_processing import get_takeoff_and_landing_directions, prepare_wind_data

from sklearn.model_selection import train_test_split

from datetime import datetime
from tqdm import tqdm
from traffic.core import Traffic, Flight

In [2]:
origin = "bergen"
destination = "oslo"

unfiltered_data, file_name = get_data_range(origin, destination, datetime(year=2023, month=1, day=1), datetime(year=2024, month=1, day=1))
filtered_flights, file_name = get_filtered_data_range(unfiltered_data, file_name, complete_flight_filter(origin, destination))
persistences, _ = get_flight_persistence(filtered_flights, file_name)

directions = list(get_takeoff_and_landing_directions(filtered_flights))

In [3]:
origin = "FLESLAND"
destination = "GARDERMOEN"

origin_wind = prepare_wind_data(get_wind_direction(origin))
destination_wind = prepare_wind_data(get_wind_direction(destination))

directions = list(get_takeoff_and_landing_directions(filtered_flights))
dataset = pd.DataFrame(directions)

dataset.rename({0: "start_time",  1: "end_time", 2: "start_direction", 3: "end_direction"}, axis=1, inplace=True)
dataset['start_x'], dataset['start_y']  = np.sin(dataset['start_direction']), np.cos(dataset['start_direction'])
dataset['end_x'], dataset['end_y'] = np.sin(dataset['end_direction']), np.cos(dataset['end_direction'])
dataset['start_time'] = pd.to_datetime(dataset['start_time']).dt.round("s")
dataset['end_time'] = pd.to_datetime(dataset['end_time']).dt.round("s")

merged_start = pd.merge(origin_wind, dataset, how="right", left_on="time", right_on="start_time")
merged_end = pd.merge(destination_wind, dataset, how="right", left_on="time", right_on="end_time")

final_dataset = pd.DataFrame({
    'start_wind_x': merged_start['x'],
    'start_wind_y': merged_start['y'],
    'start_wind_speed': merged_start['wind_speed'],
    'end_wind_x': merged_end['x'],
    'end_wind_y': merged_end['y'],
    'end_wind_speed': merged_end['wind_speed']
}).to_numpy()

In [15]:
rows = []
for flight, wind in tqdm(zip(filtered_flights, final_dataset)):
    data = flight.data.copy(deep=True)
    
    unwrapped = np.unwrap(np.deg2rad(data["heading"]), period=2 * np.pi, discont=np.pi)
    unwrapped_diff = unwrapped.max() - unwrapped.min()
    
    length = (data["timestamp"].max() - data["timestamp"].min()).total_seconds()
    
    max_velocity = data["velocity"].max()
    min_velocity = data["velocity"].min()
    
    start_wind_x = wind[0]
    start_wind_y = wind[1]
    start_wind_speed = wind[2]
    end_wind_x = wind[0]
    end_wind_y = wind[1]
    end_wind_speed = wind[2]
    
    row = [unwrapped_diff, length, start_wind_x, start_wind_y, start_wind_speed, end_wind_x, end_wind_y, end_wind_speed]
    rows.append(row)

# Convert the list of rows into a NumPy array
final_array = np.array(rows)

final_array

3219it [00:05, 636.28it/s] 


array([[ 2.28846671e+00,  2.78500000e+03, -3.09385839e-01, ...,
        -3.09385839e-01, -9.50936592e-01,  4.19888889e+00],
       [ 2.71179772e+00,  2.12500000e+03,  1.24832987e-01, ...,
         1.24832987e-01,  9.92177769e-01,  5.01555556e-01],
       [ 2.23689375e+00,  2.04600000e+03, -4.12971972e-01, ...,
        -4.12971972e-01, -9.10743735e-01,  2.86350000e+00],
       ...,
       [ 2.75720188e+00,  2.22100000e+03,  9.99666402e-01, ...,
         9.99666402e-01, -2.58280005e-02,  1.50800000e+00],
       [ 2.24563463e+00,  2.15100000e+03, -4.39939170e-01, ...,
        -4.39939170e-01, -8.98027576e-01,  3.47800000e+00],
       [ 2.24450425e+00,  2.01300000e+03, -5.48293230e-01, ...,
        -5.48293230e-01, -8.36286156e-01,  3.72500000e+00]])

In [5]:
|labels = []
for tree in persistences:
    persistence = tree.persistence()

    more_than_00001 = len([x for x in persistence if  x[0] == 1 and x[1][1] - x[1][0] > 0.0001])
    labels.append(more_than_00001)
labels = np.array(labels)

In [42]:
x_train, x_val_test, y_train, y_val_test = train_test_split(final_array, labels, test_size=0.3, random_state=42)
x_val, x_test, y_val, y_test = train_test_split(x_val_test, y_val_test, test_size=0.5, random_state=42)

In [21]:
from sklearn.neural_network import MLPRegressor
from sklearn.dummy import DummyRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor

models = {
    "Base line" : [DummyRegressor(strategy = "mean"), {
    
    }],
    "Multi-layer Perception" : [MLPRegressor(random_state=42, max_iter = 10000), {
        "hidden_layer_sizes" : [10, 25, 50], 
    }],
    "K Nearest Neighbors" : [KNeighborsRegressor(), {
        "n_neighbors" : [5, 10, 20, 40], 
        "p": [1, 2, 3]
    }],
    "Random Forrest Regressor" : [RandomForestRegressor(random_state=42), {
        "max_depth": [2, 3], 
        "n_estimators" : [50, 100, 300]
    }],
    "Decision Tree Regressor" : [DecisionTreeRegressor(random_state=42), {
        "min_samples_split" : [2, 3, 4],
        "min_samples_leaf" : [1, 2, 3]
    }],
}

In [43]:
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import root_mean_squared_error

datalist = []

for i in models.keys():
    model = GridSearchCV(models[i][0], models[i][1])
    prediction = model.fit(x_train, y_train).predict(x_test)
    
    error = root_mean_squared_error(y_test, prediction)
    rounded_error = root_mean_squared_error(y_test, np.round(prediction))
    datalist.append([model.best_estimator_, error, rounded_error])

In [44]:
for i, j, k in datalist:
    print(f"{i.__repr__(): <70}: {j: .3}, {k: .3}")

DummyRegressor()                                                      :  0.222,  0.223
MLPRegressor(hidden_layer_sizes=50, max_iter=10000, random_state=42)  :  0.158,  0.144
KNeighborsRegressor(p=1)                                              :  0.211,  0.236
RandomForestRegressor(max_depth=2, random_state=42)                   :  0.0743,  0.0788
DecisionTreeRegressor(min_samples_leaf=3, random_state=42)            :  0.082,  0.0788


In [45]:
best_model = sorted(datalist, key=lambda x: x[1])[0][0]

In [46]:
x_train_test = np.concatenate([x_train, x_test])
y_train_test = np.concatenate([y_train, y_test])

prediction = best_model.fit(x_train_test, y_train_test).predict(x_val)
rounded_error = root_mean_squared_error(y_val, np.round(prediction))
print(f"{best_model.__class__.__name__}: {rounded_error: .3}")

RandomForestRegressor:  0.091
