In [None]:
import os
import sys
current_dir = os.getcwd()
parent_parent_dir = os.path.abspath(os.path.join(current_dir, '../..')) # tweak so that you get dir of code project

sys.path.append(parent_parent_dir)

In [None]:
import json
import pandas as pd
import numpy as np
import ast
from sklearn.cluster import OPTICS
import matplotlib.pyplot as plt
# %matplotlib qt5
%matplotlib inline

In [None]:
# define intersection name
intersection_name = 'k729_2022'

In [None]:
from src.features.get_x_y_tuple_list import get_x_y_tuple_list
# get data
data_path = f'{parent_parent_dir}/data/processed/{intersection_name}_cuid.csv'
df_cuid = pd.read_csv(data_path)
df_cuid_grouped_path = data_path.replace('.csv', '_grouped.csv')
df_cuid_grouped = pd.read_csv(df_cuid_grouped_path)
df_cuid_grouped['x'] = df_cuid_grouped['x'].apply(lambda x: ast.literal_eval(x))
df_cuid_grouped['y'] = df_cuid_grouped['y'].apply(lambda y: ast.literal_eval(y))

In [None]:
from src.data.load_dtw_matrices_from_json import load_dtw_matrices_from_json

# load all dtw distance matrix
intersection_dtw_distance_matrix_dict = load_dtw_matrices_from_json(f'{parent_parent_dir}/data/processed/{intersection_name}_diff_itakura_slope_dtw_matrices.json')

# define initialization params for intersection optics optimization
id_init_params_big_optics_k729 = 'WDB'

# define for intersection model the optimized metric whose parameters we want to use
intersection_metric = 'silhouette'

# define which cluster we want to analyze
cluster_id = 0

# define initialization params id for in-cluster optics optimization
id_init_params_in_cluster_optics_k729 = 'SDB'

# define the optimized metric whose parameters we want to use for the in-cluster model
in_cluster_metric = 'silhouette'

In [None]:
from src.models.OUTLIER_DETECTION.OPTICS_MODELS.within_cluster_clustering.get_subcluster_indices_for_cluster import get_subcluster_indices_for_cluster

intersection_name = 'k729_2022'
intersection_dtw_distance_matrix_dict = load_dtw_matrices_from_json(f'{parent_parent_dir}/data/processed/{intersection_name}_diff_itakura_slope_dtw_matrices.json')
subcluster_indices = get_subcluster_indices_for_cluster(intersection_name, 'WDB', 'SDB', 'silhouette', 'silhouette', intersection_dtw_distance_matrix_dict)

print(subcluster_indices)
                                   

In [None]:
from src.models.optics.get_dtw_optics_params import get_dtw_optics_params
from src.models.optics.get_clusters_from_optics_labels import get_clusters_from_optics_labels
from src.visualization.render_vehicle_track_cluster_in_notebook import render_vehicle_track_cluster_in_notebook


# load optimized optics params for intersection model according to optimization init params
with open(f'{parent_parent_dir}/data/processed/{intersection_name}_optics_vehicle_paths_optimized_params_{id_init_params_big_optics_k729}.json') as f:
    intersection_optics_optimized_params = json.load(f)

# extract the optimized params
intersection_dtw_matrix_key, intersection_optics_optimized_params = get_dtw_optics_params(intersection_optics_optimized_params, intersection_metric)

# extract optimized dtw distance matrix
intersection_dtw_distance_matrix = intersection_dtw_distance_matrix_dict[intersection_dtw_matrix_key]

# define the model
optics_intersection = OPTICS(**intersection_optics_optimized_params, metric='precomputed')
optics_intersection.fit(intersection_dtw_distance_matrix)

# get formatted clusters for plotting
clusters_dict = get_clusters_from_optics_labels(optics_intersection.labels_)

# plot optics
fig, ax = plt.subplots(1,1 , figsize=(10,10))
render_vehicle_track_cluster_in_notebook(ax, df_cuid, df_cuid_grouped, clusters_dict)


In [None]:
from src.visualization.plot_vehicle_tracks_in_notebook import plot_vehicle_tracks_in_notebook



# load optimized optics params for in-cluster model according to optimization init params
with open(
    f'{parent_parent_dir}/data/processed/within_cluster_clustering_optimization/'
    f'{intersection_name}_optics_optimized_vehicle_paths_{id_init_params_big_optics_k729}_'
    f'{intersection_metric}_within_cluster_{cluster_id}_optimized_params_{id_init_params_in_cluster_optics_k729}.json'
) as f:
    in_cluster_optics_optimized_params = json.load(f)




# extract the optimized params
in_cluster_dtw_matrix_key, in_cluster_optics_optimized_params = get_dtw_optics_params(in_cluster_optics_optimized_params, in_cluster_metric)

# extract optimized dtw distance matrix
in_cluster_dtw_distance_matrix = intersection_dtw_distance_matrix_dict[in_cluster_dtw_matrix_key]


# create filtered dataframe for the cluster
df_grouped_within_cluster_filtered = df_cuid_grouped.loc[optics_intersection.labels_ == cluster_id] 

# create filtered distance matrix for the cluster, meaning using only the rows and columns for vehicle tracks that belong to the cluster
in_cluster_dtw_distance_matrix = np.array(in_cluster_dtw_distance_matrix)
filtered_rows = in_cluster_dtw_distance_matrix[optics_intersection.labels_ == cluster_id]
in_cluster_dtw_distance_matrix = filtered_rows[:, optics_intersection.labels_ == cluster_id]

# define the model
optics_in_cluster = OPTICS(**in_cluster_optics_optimized_params, metric='precomputed')
optics_in_cluster.fit(in_cluster_dtw_distance_matrix)

# get formatted clusters for plotting
clusters_dict = get_clusters_from_optics_labels(optics_in_cluster.labels_)

# create a dict with the indices of the vehicle tracks that belong to the in-cluster clusters
in_cluster_indices = {}
for sub_cluster_id, indices in clusters_dict.items():
    # Map the local indices from df_grouped_within_cluster_filtered to global indices in df_cuid_grouped
    global_indices = df_grouped_within_cluster_filtered.index[indices].tolist()
    in_cluster_indices[sub_cluster_id] = global_indices

In [None]:
%matplotlib qt5
# plot optics
fig, ax = plt.subplots(1, 2, figsize=(2*10,10))
fig.suptitle(f'{intersection_name} Cluster {cluster_id}')
render_vehicle_track_cluster_in_notebook(ax[0], df_cuid, df_grouped_within_cluster_filtered, clusters_dict)
plot_vehicle_tracks_in_notebook(ax[0], df_cuid, df_cuid_grouped, f'In-cluster clustering\n Cluster-ID: {id_init_params_big_optics_k729}\n In-Cluster-ID: {id_init_params_in_cluster_optics_k729}', color='gray', alpha=0.3)

# plot vehicle tracks according to indices in in_cluster_indices
colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k']
for i, (sub_cluster_id, indices) in enumerate(in_cluster_indices.items()):
    if sub_cluster_id == -1:
        continue
    print(indices)
    df_cuid_grouped_filtered = df_cuid_grouped.iloc[indices]
    plot_vehicle_tracks_in_notebook(ax[1], df_cuid, df_cuid_grouped_filtered, color=colors[i], linestyle='--')
plot_vehicle_tracks_in_notebook(ax[1], df_cuid, df_cuid_grouped, color='gray', alpha=0.3)

In [None]:
from src.models.optics.get_dtw_optics_params import get_dtw_optics_params
from src.models.optics.get_clusters_from_optics_labels import get_clusters_from_optics_labels
import json
import numpy as np
from sklearn.cluster import OPTICS

# Load intersection optics parameters and distance matrix
with open(f'{parent_parent_dir}/data/processed/{intersection_name}_optics_vehicle_paths_optimized_params_{id_init_params_big_optics_k729}.json') as f:
    intersection_optics_optimized_params = json.load(f)

intersection_metric = 'silhouette'
intersection_dtw_matrix_key, intersection_optics_optimized_params = get_dtw_optics_params(intersection_optics_optimized_params, intersection_metric)
intersection_dtw_distance_matrix = intersection_dtw_distance_matrix_dict[intersection_dtw_matrix_key]

# Fit optics model for the full dataset
optics_intersection = OPTICS(**intersection_optics_optimized_params, metric='precomputed').fit(intersection_dtw_distance_matrix)

# Load in-cluster optics parameters and distance matrix for the specific cluster
with open(f'{parent_parent_dir}/data/processed/within_cluster_clustering_optimization/'
          f'{intersection_name}_optics_optimized_vehicle_paths_{id_init_params_big_optics_k729}_'
          f'{intersection_metric}_within_cluster_{cluster_id}_optimized_params_{id_init_params_in_cluster_optics_k729}.json') as f:
    in_cluster_optics_optimized_params = json.load(f)

in_cluster_dtw_matrix_key, in_cluster_optics_optimized_params = get_dtw_optics_params(in_cluster_optics_optimized_params, 'silhouette')
in_cluster_dtw_distance_matrix = intersection_dtw_distance_matrix_dict[in_cluster_dtw_matrix_key]

# Filter data for the specific cluster
filtered_rows = np.array(in_cluster_dtw_distance_matrix)[optics_intersection.labels_ == cluster_id]
in_cluster_dtw_distance_matrix = filtered_rows[:, optics_intersection.labels_ == cluster_id]

# Fit optics model for the in-cluster data
optics_in_cluster = OPTICS(**in_cluster_optics_optimized_params, metric='precomputed').fit(in_cluster_dtw_distance_matrix)

# Get the global indices of vehicle tracks in each in-cluster subcluster
in_cluster_indices = {
    sub_cluster_id: df_cuid_grouped.index[optics_intersection.labels_ == cluster_id][indices].tolist()
    for sub_cluster_id, indices in get_clusters_from_optics_labels(optics_in_cluster.labels_).items()
}

# Result: Dictionary with subcluster IDs and corresponding global indices
print(in_cluster_indices)


In [None]:
from src.models.optics.get_dtw_optics_params import get_dtw_optics_params
from src.models.optics.get_clusters_from_optics_labels import get_clusters_from_optics_labels
import json
import numpy as np
from sklearn.cluster import OPTICS

def get_subcluster_indices_for_intersection(intersection_name, 
                                            id_init_params_intersection_optics_model, 
                                            id_init_params_in_cluster_optics, 
                                            intersection_eval_metric,
                                            in_cluster_eval_metric
                                            ):
    

    # Load intersection optics parameters and distance matrix
    with open(f'{parent_parent_dir}/data/processed/{intersection_name}_optics_vehicle_paths_optimized_params_{id_init_params_intersection_optics_model}.json') as f:
        intersection_optics_optimized_params = json.load(f)

    intersection_metric = 'silhouette'
    intersection_dtw_matrix_key, intersection_optics_optimized_params = get_dtw_optics_params(intersection_optics_optimized_params, intersection_metric)
    intersection_dtw_distance_matrix = intersection_dtw_distance_matrix_dict[intersection_dtw_matrix_key]

    # Fit optics model for the full dataset
    optics_intersection = OPTICS(**intersection_optics_optimized_params, metric='precomputed').fit(intersection_dtw_distance_matrix)

    # Load in-cluster optics parameters and distance matrix for the specific cluster
    with open(f'{parent_parent_dir}/data/processed/within_cluster_clustering_optimization/'
            f'{intersection_name}_optics_optimized_vehicle_paths_{id_init_params_big_optics_k729}_'
            f'{intersection_metric}_within_cluster_{cluster_id}_optimized_params_{id_init_params_in_cluster_optics_k729}.json') as f:
        in_cluster_optics_optimized_params = json.load(f)

    in_cluster_dtw_matrix_key, in_cluster_optics_optimized_params = get_dtw_optics_params(in_cluster_optics_optimized_params, 'silhouette')
    in_cluster_dtw_distance_matrix = intersection_dtw_distance_matrix_dict[in_cluster_dtw_matrix_key]

    # Filter data for the specific cluster
    filtered_rows = np.array(in_cluster_dtw_distance_matrix)[optics_intersection.labels_ == cluster_id]
    in_cluster_dtw_distance_matrix = filtered_rows[:, optics_intersection.labels_ == cluster_id]

    # Fit optics model for the in-cluster data
    optics_in_cluster = OPTICS(**in_cluster_optics_optimized_params, metric='precomputed').fit(in_cluster_dtw_distance_matrix)

    # Get the global indices of vehicle tracks in each in-cluster subcluster
    in_cluster_indices = {
        sub_cluster_id: df_cuid_grouped.index[optics_intersection.labels_ == cluster_id][indices].tolist()
        for sub_cluster_id, indices in get_clusters_from_optics_labels(optics_in_cluster.labels_).items()
    }

    # Result: Dictionary with subcluster IDs and corresponding global indices
    print(in_cluster_indices)
