In [1]:
%load_ext autoreload
%autoreload 2

import datetime

import numpy as np
import pandas as pd
import gudhi

from traffic.core import Traffic, Flight
from functions.data_filtering import ICAO_codes, large_gap_filter
from functions.data_processing import flight_persistence, remove_outliers_z_score, remove_outliers_dbscan
from functions.data_loading import linkage_cluster_persistances, flights_from_query

from scipy.cluster.hierarchy import fcluster

In [2]:
# load minimum data set
df = pd.read_csv("../../data/go_arounds_augmented.csv", low_memory=False)
df["time"] = pd.to_datetime(df["time"])

start = datetime.datetime(year=2019, month=1, day=1).replace(
    tzinfo=datetime.timezone.utc
)
stop = datetime.datetime(year=2019, month=3, day=1).replace(
    tzinfo=datetime.timezone.utc
)

with_GA = df.query(f"has_ga & (@start <= time <= @stop)")
without_GA = df.query(f"not has_ga & (@start <= time <= @stop)")

In [3]:
# The flights can be converted into a Traffic object
count = with_GA.shape[0]
without_GA = without_GA.sample(n=count, random_state=42)
query = pd.concat([with_GA, without_GA], axis=0)

flights, data = flights_from_query(query, "../../data/GA_dataset.pkl", load_results=True)

100%|██████████| 10620/10620 [01:44<00:00, 101.24it/s]


In [None]:
flights = list(filter(large_gap_filter, flights))
labels = data[:, 0]

trees, paths = flight_persistence(flights)

In [None]:
path = f"../../data/linkage_clustering/cyclic_dataset.pkl"
Z = linkage_cluster_persistances(trees, path=path, dimension=1, load_results=True)

In [None]:
clustering = fcluster(Z, t=10, criterion='maxclust')

for i in range(1, len(np.unique(clustering))+1):
    print(f"{i}: {sum(clustering == i)}")

In [None]:
from matplotlib import pyplot as plt

ind = 8

flight = flights[np.where(clustering == ind)[0][4]]
before = flight.data[["longitude", "latitude"]].to_numpy()

dbscan_paths        = before[remove_outliers_dbscan(before, 10)]
z_score_paths       = before[remove_outliers_z_score(before)]
both_bethods_paths  = dbscan_paths[remove_outliers_z_score(dbscan_paths)]

path_filters = [(before, "before"), (z_score_paths, "z-score"), (dbscan_paths, "dbscan"), (both_bethods_paths, "dbscan + z-score")]

fig, axs = plt.subplots((len(path_filters)+1)//2, 2, sharex=True, sharey=True)
for i, (path, name) in enumerate(path_filters):
    axs[i//2, i%2].scatter(path[:, 0], path[:, 1])
    axs[i//2][i%2].set_title(name)

fig.set_figheight(10)
fig.set_figwidth(10)

In [None]:
tree = trees[np.where(clustering == ind)[0][4]]
points = paths[np.where(clustering == ind)[0][4]]
gudhi.plot_persistence_diagram(tree.persistence())

In [None]:
fig = plt.scatter(points[:, 1], points[:, 0])
plt.show()