In [53]:
import datetime
import gudhi
import pickle
import os

import numpy as np
import pandas as pd

from tqdm.auto import tqdm

from traffic.data import opensky
from traffic.core import Traffic
from functions.data_filtering import ICAO_codes
from functions.data_processing import flight_pers

from scipy.cluster.hierarchy import linkage, fcluster, dendrogram

In [54]:
# load minimum data set
df = pd.read_csv("data/go_arounds_augmented.csv", low_memory=False)
df["time"] = pd.to_datetime(df["time"])

start = datetime.datetime(year=2019, month=1, day=1).replace(
    tzinfo=datetime.timezone.utc
)
stop = datetime.datetime(year=2019, month=1, day=10).replace(
    tzinfo=datetime.timezone.utc
)

with_GA = df.query(f"has_ga & (@start <= time <= @stop)")
without_GA = df.query(f"not has_ga & (@start <= time <= @stop)")

In [67]:
def flights_from_query(query, n:int, file_name:str, delta_time:pd.Timedelta = pd.Timedelta(minutes=15)):
    if os.path.isfile(file_name):
        with open(file_name, "rb") as file:
            return pickle.load(file)
        
    flights = []
    for _, row in tqdm(query.sample(n=n, random_state=42).iterrows(), total=n):
        # take at most 10 minutes before and 10 minutes after the landing or go-around
        start_time = row["time"] - delta_time
        stop_time = row["time"] + delta_time
    
        # fetch the data from OpenSky Network
        flights.append(
            opensky.history(
                start=start_time.strftime("%Y-%m-%d %H:%M:%S"),
                stop=stop_time.strftime("%Y-%m-%d %H:%M:%S"),
                callsign=row["callsign"],
                return_flight=True,
            )
        )
        
    with open(file_name, "wb") as file:
        pickle.dump(flights, file)
    return flights


# The flights can be converted into a Traffic object
count = with_GA.shape[0]
with_GA_flights = flights_from_query(with_GA, count, "with_GA_flights.pkl")
without_GA_flights = flights_from_query(without_GA, count, "without_GA_flights.pkl")

In [56]:
#Traffic.from_flights(with_GA_flights).map_leaflet()

In [57]:
#Traffic.from_flights(without_GA_flights).map_leaflet()

In [58]:
i = 0

In [59]:
i += 1
with_GA_flights[i].map_leaflet()

Map(center=[55.91086593714756, -3.2108927757564656], controls=(ZoomControl(options=['position', 'zoom_in_text'…

In [60]:
len(with_GA_flights)

714

In [61]:
from functions.data_processing import generate_alpha_tree
trees = flight_pers(with_GA_flights)
trees.extend(flight_pers(without_GA_flights))

100%|██████████| 714/714 [00:10<00:00, 65.17it/s]
100%|██████████| 714/714 [00:06<00:00, 112.69it/s]


In [62]:
with_GA_labels = np.ones(len(with_GA))
without_GA_labels = np.zeros(len(without_GA))
labels = np.concatenate((with_GA_labels, without_GA_labels))

In [63]:
condensed_distance_matrix = []
for i in tqdm(range(len(trees))):
    for j in range(i+1, len(trees)):
        pers_i = trees[i].persistence_intervals_in_dimension(0)
        pers_j = trees[j].persistence_intervals_in_dimension(0)
        dist = gudhi.bottleneck_distance(np.array(pers_i), np.array(pers_j), 0.0001)
        condensed_distance_matrix.append(dist)

  0%|          | 0/1428 [00:00<?, ?it/s]

In [64]:
Z = linkage(condensed_distance_matrix, "complete")

#fig = plt.figure(figsize=(100, 10))
#dn = dendrogram(Z)

clustering = fcluster(Z, t=2.7, criterion='inconsistent', depth=4)


In [65]:
for i in range(1, len(np.unique(clustering))+1):
    print(f"{i}: {sum(clustering == i)}")

1: 1428
