In [1]:
import gudhi
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm_pandas
from traffic.core import Flight

from functions.data_loading import get_filtered_data_range, get_data_range, get_flight_persistence, get_removed_outliers
from functions.data_filtering import complete_flight_filter
from functions.data_filtering import filter_by_bools, filter_flights

from datetime import datetime

In [2]:
origin = "barcelona"
destination = "mallorca"

unfiltered_flights, file_name = get_data_range(origin, destination, datetime(year=2023, month=1, day=1), datetime(year=2024, month=1, day=1))
filtered_flights, file_name = get_filtered_data_range(unfiltered_flights, file_name, complete_flight_filter(origin, destination, 0.75)) 
#persistences, file_name = get_flight_persistence(filtered_flights, file_name)

In [3]:
def build_sublevelset_filtration(Y):
    """
    Y: array-like
        Array of function values
    """
    st = gudhi.SimplexTree()
    for i in range(len(Y)):
        # 0-simplices
        st.insert([i], filtration=Y.iloc[i])

        if i < len(Y) - 1:
            # 1-simplices
            st.insert([i, i + 1], filtration=max(Y.iloc[i], Y.iloc[i + 1]))

    return st

In [11]:
def get_sublevelset_filtration(flight: Flight):
    data = flight.data
    data = pd.DataFrame(data={"geoaltitude":list(data["geoaltitude"])},
                        index=pd.DatetimeIndex(data["timestamp"])).dropna()
    data["geoaltitude"] = data["geoaltitude"].astype("float32").interpolate("time")
    # Build sublevelset filtration and compute persistence
    st = build_sublevelset_filtration(data["geoaltitude"])
    st.persistence()
    return st

In [12]:
from tqdm import tqdm
trees = []

for f in tqdm(filtered_flights):
    trees.append(get_sublevelset_filtration(f))

100%|██████████| 7036/7036 [02:01<00:00, 57.74it/s]


In [13]:
condensed_distance_matrix = []
for i in tqdm(range(len(trees))):
    for j in range(i+1, len(trees)):
        pers_i = trees[i].persistence_intervals_in_dimension(0)
        pers_j = trees[j].persistence_intervals_in_dimension(0)
        dist = gudhi.bottleneck_distance(np.array(pers_i), np.array(pers_j), 0.0001)
        condensed_distance_matrix.append(dist)

100%|██████████| 7036/7036 [1:21:32<00:00,  1.44it/s]


In [42]:
import pickle
with open("condensed_distance_matrix", "wb") as f:
    pickle.dump(condensed_distance_matrix, f)

In [14]:
from matplotlib import pyplot as plt
from scipy.cluster.hierarchy import linkage, fcluster, dendrogram
import sys
sys.setrecursionlimit(10000)

Z = linkage(condensed_distance_matrix, "complete")

#fig = plt.figure(figsize=(100, 10))
#dn = dendrogram(Z)

clustering = fcluster(Z, t=1.1, criterion='inconsistent', depth=2)
clustering = fcluster(Z, t=10, criterion='maxclust', depth=2)

In [15]:
for i in range(1, len(np.unique(clustering))+1):
    print(f"{i}: {sum(clustering == i)}")

1: 6
2: 4
3: 6
4: 4
5: 6
6: 39
7: 55
8: 32
9: 6751
10: 133


In [33]:
from traffic.core import Traffic

bool_array = clustering == 1

loopy_flight_cluster = filter_flights(filter_by_bools(bool_array), filtered_flights)
loopy_flight_cluster_pers = list(filter(filter_by_bools(bool_array), trees))
print(len(loopy_flight_cluster))
Traffic.from_flights(loopy_flight_cluster).map_leaflet()

6


Map(center=[40.285559465633405, 2.5369085073182958], controls=(ZoomControl(options=['position', 'zoom_in_text'…

In [41]:
plt.plot(loopy_flight_cluster[].data["geoaltitude"])

IndexError: list index out of range