In [None]:
import os
import sys
import matplotlib.pyplot as plt
current_dir = os.getcwd()
parent_parent_dir = os.path.abspath(os.path.join(current_dir, '../..')) # tweak so that you get dir of code project

sys.path.append(parent_parent_dir)
print(f'Appending to sys.path: {parent_parent_dir}')

In [None]:
import json
import pandas as pd
import numpy as np
import ast
from sklearn.cluster import OPTICS
# %matplotlib qt5
%matplotlib inline

In [None]:
from src.features.get_x_y_tuple_list import get_x_y_tuple_list
from src.visualization.render_vehicle_track_cluster_in_notebook import render_vehicle_track_cluster_in_notebook
from src.models.optics.get_clusters_from_optics_labels import get_clusters_from_optics_labels
from src.visualization.plot_vehicle_tracks_in_notebook import plot_vehicle_tracks_in_notebook


def plot_within_cluster_optics(intersection_name, params_uid, cluster_label, eval_metric, within_cluster_params_uid=None):

    print(f'------------------------within CLUSTER #{cluster_label} for INTERSECTION {intersection_name}------------------------')

    # get data
    data_path = f'{parent_parent_dir}/data/processed/{intersection_name}_cuid.csv'
    df_cuid = pd.read_csv(data_path)
    df_cuid_grouped_path = data_path.replace('.csv', '_grouped.csv')
    df_cuid_grouped = pd.read_csv(df_cuid_grouped_path)
    df_cuid_grouped['x'] = df_cuid_grouped['x'].apply(lambda x: ast.literal_eval(x))
    df_cuid_grouped['y'] = df_cuid_grouped['y'].apply(lambda y: ast.literal_eval(y))
    list_x_y_tuples = get_x_y_tuple_list(df_cuid_grouped, ['x','y'])

    # ----------------- UNDERLYING OPTICS -----------------
    def get_optics_params(optics_params, metric):
        optics_params = optics_params[metric]
        dtw_dist_matrix_key = optics_params['dtw_key']
        max_eps = optics_params['epsilon']
        min_samples = optics_params['min_samples']
        cluster_method = optics_params['cluster_method']
        xi = optics_params['xi']
        kwargs = {
            'max_eps': max_eps,
            'min_samples': min_samples,
            'cluster_method': cluster_method,
            'xi': xi
        }
        filtered_kwargs = {k: v for k, v in kwargs.items() if v is not None}
        return dtw_dist_matrix_key, filtered_kwargs
    
    # check for params uid
    if params_uid is not None:
        opt_params_path = f'{parent_parent_dir}/data/processed/{intersection_name}_optics_vehicle_paths_optimized_params_{params_uid}.json'
    else:
        opt_params_path = f'{parent_parent_dir}/data/processed/{intersection_name}_optics_vehicle_paths_optimized_params.json'
    with open(opt_params_path, 'r') as f:
        optics_params = json.load(f)

    # import optimized parameters
    dtw_matrix_key, kwargs_acc_score = get_optics_params(optics_params, eval_metric)

    # import dtw distance matrix
    dtw_distance_matrix_path = f'{parent_parent_dir}/data/processed/{intersection_name}_diff_itakura_slope_dtw_matrices.json'
    with open(dtw_distance_matrix_path) as f:
        dtw_distance_matrices_dict = json.load(f)

    # get the distance matrix
    dtw_distance_matrix = dtw_distance_matrices_dict[dtw_matrix_key]

    # create optics model
    optics_model = OPTICS(metric='precomputed', **kwargs_acc_score).fit(dtw_distance_matrix)

    # get labels for big models
    labels = get_clusters_from_optics_labels(optics_model.labels_)

    



    # ----------------- WITHIN CLUSTER OPTICS -----------------
    # path to optimized within cluster optics params
    path_optimized_in_cluster_optics_params = f'{parent_parent_dir}/data/processed/within_cluster_clustering_optimization/{intersection_name}_optics_optimized_vehicle_paths_{params_uid}_{eval_metric}_within_cluster_{cluster_label}_optimized_params_{within_cluster_params_uid}.json'

    # load optimized within cluster optics params
    try:
        with open(path_optimized_in_cluster_optics_params, 'r') as f:
            within_cluster_optics_params = json.load(f)
    except FileNotFoundError as e:
        print(f'File not found: {path_optimized_in_cluster_optics_params}')
        _, ax = plt.subplots(1, 1, figsize=(10,10))
        # filter labels to include whole cluster
        render_vehicle_track_cluster_in_notebook(ax, df_cuid, df_cuid_grouped, {'0': labels[cluster_label]})
        plot_vehicle_tracks_in_notebook(ax, df_cuid, df_cuid_grouped, color='gray', alpha=0.5, title=f'Intersection: {intersection_name} \n No within cluster optimization found for cluster #{cluster_label}')
        return
    
    # extract within cluster optics params according to eval metric
    within_cluster_optics_params = within_cluster_optics_params[eval_metric]

    # extract within cluster optics params
    within_cluster_max_eps = within_cluster_optics_params['epsilon']
    within_cluster_min_samples = within_cluster_optics_params['min_samples']
    within_cluster_cluster_method = within_cluster_optics_params['cluster_method']
    within_cluster_xi = within_cluster_optics_params['xi'] if within_cluster_optics_params['xi'] is not None else 0.05
    within_cluster_dtw_key = within_cluster_optics_params['dtw_key']

    # create filtered data using certain cluster
    df_grouped_within_cluster_filtered = df_cuid_grouped.loc[optics_model.labels_ == cluster_label] 

    # extract optimized within cluster dtw distance matrix
    within_cluster_dtw_distance_matrix = dtw_distance_matrices_dict[within_cluster_dtw_key]

    # create within cluster dtw distance matrices by filtering only for the cluster
    within_cluster_dtw_distance_matrix = np.array(within_cluster_dtw_distance_matrix)
    filtered_rows = within_cluster_dtw_distance_matrix[optics_model.labels_ == cluster_label]
    within_cluster_dtw_distance_matrix = filtered_rows[:, optics_model.labels_ == cluster_label]

    # fit new within cluster optics model
    optics_within_cluster = OPTICS(metric='precomputed', 
                                   max_eps=within_cluster_max_eps, 
                                   min_samples=within_cluster_min_samples, 
                                   cluster_method=within_cluster_cluster_method, 
                                   xi=within_cluster_xi
                                   ).fit(within_cluster_dtw_distance_matrix)

    # get labels for within cluster model
    labels_within_cluster = get_clusters_from_optics_labels(optics_within_cluster.labels_)


    # plot within cluster clusters
    fig, axs = plt.subplots(1, 1, figsize=(10,10))
    fig.suptitle(f'Intersection: {intersection_name}')
    title_str = f'OPTICS within Cluster #{cluster_label}' + '\n'\
        + f'Optimization init ID: {params_uid if params_uid is not None else "n/a"}' + '\n' + \
        f'Optimization within cluster ID: {within_cluster_params_uid if within_cluster_params_uid is not None else "n/a"}'
    render_vehicle_track_cluster_in_notebook(axs, df_cuid, df_grouped_within_cluster_filtered, labels_within_cluster)
    plot_vehicle_tracks_in_notebook(axs, df_cuid, df_cuid_grouped, title_str, color='gray', alpha=0.5)

In [None]:
def create_three_letter_uid(max_eps_range, min_samples_range, xi_range):
    """
    Create a 3-letter unique ID based on optimization parameters.
    """
    # Take the first letter of 'max_eps', 'min_samples', and 'xi' ranges for uniqueness
    max_eps_id = chr(65 + int(max(max_eps_range)) % 26)  # Convert max_eps to a letter
    min_samples_id = chr(65 + int(min(min_samples_range)) % 26)  # Convert min_samples to a letter
    xi_id = chr(65 + int(min(xi_range) * 100) % 26)  # Convert xi to a letter based on its percentage

    # Combine them to form a 3-letter ID
    uid = f"{max_eps_id}{min_samples_id}{xi_id}"
    return uid

In [None]:
from src.data.load_dtw_matrices_from_json import load_dtw_matrices_from_json
from src.models.optics.optimize_optics_for_precomputed_dtw import optimize_optics_for_precomputed_dtw
from src.data.save_optimization_parameters_in_json_file import save_optimization_parameters_in_json_file


def launch_within_cluster_optimization(intersection_name, params_uid, cluster_label, eval_metric='silhouette', within_cluster_params_uid=None, optimization_params=None):

    # get data
    data_path = f'{parent_parent_dir}/data/processed/{intersection_name}_cuid.csv'
    df_cuid = pd.read_csv(data_path)
    df_cuid_grouped_path = data_path.replace('.csv', '_grouped.csv')
    df_cuid_grouped = pd.read_csv(df_cuid_grouped_path)
    df_cuid_grouped['x'] = df_cuid_grouped['x'].apply(lambda x: ast.literal_eval(x))
    df_cuid_grouped['y'] = df_cuid_grouped['y'].apply(lambda y: ast.literal_eval(y))
    list_x_y_tuples = get_x_y_tuple_list(df_cuid_grouped, ['x','y'])




    # ------------------- build underlying optics model to get all clusters -------------------
    def get_optics_params(optics_params, metric):
        optics_params = optics_params[metric]
        dtw_dist_matrix_key = optics_params['dtw_key']
        max_eps = optics_params['epsilon']
        min_samples = optics_params['min_samples']
        cluster_method = optics_params['cluster_method']
        xi = optics_params['xi']
        kwargs = {
            'max_eps': max_eps,
            'min_samples': min_samples,
            'cluster_method': cluster_method,
            'xi': xi
        }
        filtered_kwargs = {k: v for k, v in kwargs.items() if v is not None}
        return dtw_dist_matrix_key, filtered_kwargs

    # get optics parameters from a json file according to optimization init uid
    if params_uid is not None:
        opt_params_path = f'{parent_parent_dir}/data/processed/{intersection_name}_optics_vehicle_paths_optimized_params_{params_uid}.json'
    else:
        opt_params_path = f'{parent_parent_dir}/data/processed/{intersection_name}_optics_vehicle_paths_optimized_params.json'
    
    # load optimized parameters
    with open(opt_params_path, 'r') as f:
        intersection_optics_params_dict = json.load(f)

    # extract optimized parameters
    dtw_matrix_key, kwargs_optimized_params = get_optics_params(intersection_optics_params_dict, eval_metric)

    # load all dtw distance matrices
    dtw_distance_matrix_path = f'{parent_parent_dir}/data/processed/{intersection_name}_diff_itakura_slope_dtw_matrices.json'
    with open(dtw_distance_matrix_path) as f:
        dtw_distance_matrix_dict = json.load(f)

    # extract optimized dtw distance matrix
    dtw_distance_matrix = dtw_distance_matrix_dict[dtw_matrix_key]

    # create optics model
    optics_model = OPTICS(metric='precomputed', **kwargs_optimized_params).fit(dtw_distance_matrix)






    # ------------------- build small model for specific cluster -------------------
    # filter all dtw matrices for specific cluster
    dtw_matrices_dict_filtered = {}
    for key, dtw_matrix in dtw_distance_matrix_dict.items():
        dtw_matrix = np.array(dtw_matrix)
        filtered_rows = dtw_matrix[optics_model.labels_ == cluster_label]
        dtw_matrices_dict_filtered[key] = filtered_rows[:, optics_model.labels_ == cluster_label]
    
    #  handle optimization init params
    if within_cluster_params_uid is not None:
        # load optimization parameters
        path_optimized_in_cluster_optics_params = f'{parent_parent_dir}/data/processed/within_cluster_clustering_optimization/within_cluster_optimization_parameters_{within_cluster_params_uid}.json'
        with open(path_optimized_in_cluster_optics_params, 'r') as f:
            optimized_in_cluster_optics_params = json.load(f)
            optimization_params = {k: v for k, v in optimized_in_cluster_optics_params.items() if k in ['max_eps_range', 'min_samples_range', 'cluster_methods', 'xis']}
            unique_id = within_cluster_params_uid

    # else use optimization params from args
    else:
        # create unique id for optimization parameters range
        unique_id = create_three_letter_uid(optimization_params['max_eps_range'], 
                                            optimization_params['min_samples_range'], 
                                            optimization_params['xis'])
        save_optimization_parameters_in_json_file(f'{parent_parent_dir}/data/processed/within_cluster_clustering_optimization/within_cluster_optimization_parameters_{unique_id}', **optimization_params)

    # check if cluster only has two samples: if so, skip cluster
    num_samples_in_cluster = np.sum(optics_model.labels_ == cluster_label)
    if num_samples_in_cluster < 2:
        print(f'Cluster #{cluster_label} has less than 2 samples. Skipping optimization.')
        return

    # check if min_samples may exceed no of samples in cluster
    if max(optimization_params['min_samples_range']) > num_samples_in_cluster:
        optimization_params['min_samples_range'] = range(2, num_samples_in_cluster+1)

    # optimize optics for precomputed dtw filtered for specific cluster
    try:
        optimization_results_optics_within_cluster = optimize_optics_for_precomputed_dtw(dtw_matrices_dict_filtered,
                                                                                        **optimization_params,
                                                                                        n_jobs=-1)
    except ValueError as e:
        print(f'Error: {e}')
        return
    print(optimization_results_optics_within_cluster)

    # save optimization results
    unique_filename = f'{intersection_name}_optics_optimized_vehicle_paths_{params_uid}_{eval_metric}_within_cluster_{cluster_label}_optimized_params_{unique_id}'
    optimization_results_optics_within_cluster['unique_id'] = unique_id
    optimization_results_optics_within_cluster['underlying_optics_model'] = {'cluster_label': cluster_label, 'params_uid': params_uid, 'eval_metric': eval_metric}

    # save optimization results: save in file
    save_optimization_parameters_in_json_file(f'{parent_parent_dir}/data/processed/within_cluster_clustering_optimization/{unique_filename}.json', **optimization_results_optics_within_cluster)

## run 1

In [None]:
optimization_params_k729_2022 = {'max_eps_range': np.arange(1,100,5),
                        'min_samples_range': np.arange(3, 11, 1),
                        'xis': np.arange(0.01, 0.1, 0.01)}

In [None]:
optimization_params_k733 = {'max_eps_range': np.arange(1,100,5),
                        'min_samples_range': np.arange(3, 11, 1),
                        'xis': np.arange(0.01, 0.1, 0.01)}

In [None]:
intersection_name_k729_2022 = 'k729_2022'
params_uid_k729_2022 = 'WDB'
cluster_label_k729_2022 = 0
eval_metric_k729_2022 = 'silhouette'
within_cluster_params_uid_k729_2022 = 'SDB'

In [None]:
intersection_name_k733_2020 = 'k733_2020'
params_uid_k733_2020 = None
cluster_label_k733_2020 = 2
eval_metric_k733_2020 = 'silhouette'
within_cluster_params_uid_k733_2020 = 'SDB'

In [None]:
intersection_name_k733_2018='k733_2018'
params_uid_k733_2018 = None
cluster_label_k733_2018 = 2
eval_metric_k733_2018 = 'silhouette'
within_cluster_params_uid_k733_2018 = 'SDB'

In [None]:
# launch_within_cluster_optimization(intersection_name_k733_2018, params_uid_k733_2018, cluster_label_k733_2018, optimization_params_k733, eval_metric_k733_2018)
# launch_within_cluster_optimization(intersection_name_k733_2020, params_uid_k733_2020, cluster_label_k733_2020, optimization_params_k733, eval_metric_k733_2020)
# launch_within_cluster_optimization(intersection_name_k729_2022, params_uid_k729_2022, cluster_label_k729_2022, optimization_params_k729_2022, eval_metric_k729_2022)    

In [None]:
# plot_within_cluster_optics(intersection_name_k733_2018, params_uid_k733_2018, cluster_label_k733_2018, eval_metric_k733_2018, within_cluster_params_uid_k733_2018)
# plot_within_cluster_optics(intersection_name_k733_2020, params_uid_k733_2020, cluster_label_k733_2020, eval_metric_k733_2020, within_cluster_params_uid_k733_2020)
# plot_within_cluster_optics(intersection_name_k729_2022, params_uid_k729_2022, cluster_label_k729_2022, eval_metric_k729_2022, within_cluster_params_uid_k729_2022)

## run 2, different clusters for k729, different opt params for k733 

In [None]:
optimization_params_k729_2022 = {'max_eps_range': np.arange(1,100,5),
                        'min_samples_range': np.arange(3, 11, 1),
                        'xis': np.arange(0.01, 0.1, 0.01)}

In [None]:
optimization_params_k733 = {'max_eps_range': np.arange(50,200,5),
                        'min_samples_range': np.arange(2, 11, 1),
                        'xis': np.arange(0.01, 0.1, 0.01)}

In [None]:
intersection_name_k733_2018='k733_2018'
params_uid_k733_2018 = None
cluster_label_k733_2018 = 2
eval_metric_k733_2018 = 'silhouette'
within_cluster_params_uid_k733_2018 = 'NCB'

In [None]:
intersection_name_k733_2020 = 'k733_2020'
params_uid_k733_2020 = None
cluster_label_k733_2020 = 2
eval_metric_k733_2020 = 'silhouette'
within_cluster_params_uid_k733_2020 = 'NCB'

In [None]:
intersection_name_k729_2022 = 'k729_2022'
params_uid_k729_2022 = 'WDB'
cluster_label_k729_2022 = 2
eval_metric_k729_2022 = 'silhouette'
within_cluster_params_uid_k729_2022 = 'SDB'

In [None]:
# launch_within_cluster_optimization(intersection_name_k733_2018, params_uid_k733_2018, cluster_label_k733_2018, optimization_params_k733, eval_metric_k733_2018)
# launch_within_cluster_optimization(intersection_name_k733_2020, params_uid_k733_2020, cluster_label_k733_2020, optimization_params_k733, eval_metric_k733_2020)
# launch_within_cluster_optimization(intersection_name_k729_2022, params_uid_k729_2022, cluster_label_k729_2022, optimization_params_k729_2022, eval_metric_k729_2022)    

In [None]:
# plot_within_cluster_optics(intersection_name_k733_2018, params_uid_k733_2018, cluster_label_k733_2018, eval_metric_k733_2018, within_cluster_params_uid_k733_2018)
# plot_within_cluster_optics(intersection_name_k733_2020, params_uid_k733_2020, cluster_label_k733_2020, eval_metric_k733_2020, within_cluster_params_uid_k733_2020)
# plot_within_cluster_optics(intersection_name_k729_2022, params_uid_k729_2022, cluster_label_k729_2022, eval_metric_k729_2022, within_cluster_params_uid_k729_2022)

## optimize, plot for each cluster

#### k729_2022

In [None]:
within_cluster_optimization_init_params_id = 'SDB'
intersection_name_k729_2022 = 'k729_2022'
intersection_optics_params_uid_k729_2022 = 'WDB'
eval_metric_k729_2022 = 'silhouette'

In [None]:
# # k729_2022: 10 clusters
# for cluster_label in range(0,10):
#     launch_within_cluster_optimization(intersection_name_k729_2022, intersection_optics_params_uid_k729_2022, cluster_label, eval_metric_k729_2022, within_cluster_params_uid=within_cluster_optimization_init_params_id)    

In [None]:
for cluster_label in range(0,10):
    plot_within_cluster_optics(intersection_name_k729_2022, params_uid_k729_2022, cluster_label, eval_metric_k729_2022, within_cluster_params_uid_k729_2022)

#### k733_2018 + k733_2020

In [None]:
within_cluster_optimization_init_params_id_k733_2018 = 'NCB'
intersection_name_k733_2018 = 'k733_2018'
intersection_optics_params_uid_k733_2018 = None
eval_metric_k733_2018 = 'silhouette'

In [None]:
within_cluster_optimization_init_params_id_k733_2020 = 'NCB'
intersection_name_k733_2020 = 'k733_2020'
intersection_optics_params_uid_k733_2020 = None
eval_metric_k733_2020 = 'silhouette'

In [None]:
for cluster_label in range(0,5):
    plot_within_cluster_optics(intersection_name_k733_2020, intersection_optics_params_uid_k733_2020, cluster_label, eval_metric_k733_2020, within_cluster_optimization_init_params_id_k733_2020)

In [None]:
for cluster_label in range(0,7):
    plot_within_cluster_optics(intersection_name_k733_2018, intersection_optics_params_uid_k733_2018, cluster_label, eval_metric_k733_2018, within_cluster_optimization_init_params_id_k733_2018)