In [None]:
import plotly.graph_objects as go
import networkx as nx

import ast 
import networkx as nx
import matplotlib.pyplot as plt

input_keywords = ['handstand']
results_sknn = pd.read_csv('results/results_selected_features_with_knn.csv')
results_wknn = pd.read_csv('results/results_weighted_knn.csv')
filtered_data1 = results_wknn[results_wknn['label'].str.contains('|'.join(input_keywords), na=False)]
filtered_data2 = results_sknn[results_sknn['label'].str.contains('|'.join(input_keywords), na=False)]

results_wknn = filtered_data1
results_sknn = filtered_data2
results_sknn

G = nx.Graph()

for _, row in results_sknn.iterrows():
    pose = row['label']
    G.add_node(pose) 
    
    closest_poses = ast.literal_eval(row['lda_uppr_classes'])
    distances = ast.literal_eval(row['lda_uppr_distances'])
    for i in range(len(closest_poses)):
        closest_pose = closest_poses[i]
        distance = distances[i]
        
        if closest_pose == pose or distance == 0:
            continue        
        G.add_edge(pose, closest_pose, weight=distance)


edge_x = []
edge_y = []
for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.extend([x0, x1, None]) 
    edge_y.extend([y0, y1, None])

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines')

node_x = []
node_y = []
text = []
for node in G.nodes():
    x, y = pos[node]
    node_x.append(x)
    node_y.append(y)
    text.append(node)

node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers+text',
    hoverinfo='text',
    text=text,
    marker=dict(
        showscale=True,
        colorscale='Rainbow',
        reversescale=True,
        color=[],
        size=10,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line_width=2))

fig = go.Figure(data=[edge_trace, node_trace],
             layout=go.Layout(
                showlegend=False,
                hovermode='closest',
                height=1000,
                width=1000,
                margin=dict(b=0,l=0,r=0,t=0),
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                )

fig.update_layout(title_text="Web of Closest Pose Classes Based on SHAP Distances", title_font_size=16)
fig.show()

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors
from scipy.spatial.distance import pdist, squareform

# Assuming lda_logr['LDA1'] are your features and lda_logr['label'] are your class labels

lda_logr = pd.read_csv('results/lda_logistic_regression_2024-03-11_23-09-40.csv')
lda_gini = pd.read_csv('results/lda_gini_impurity_2024-03-11_23-09-41.csv')
lda_perm = pd.read_csv('results/lda_permutation_2024-03-11_23-09-41.csv')
lda_shap = pd.read_csv('results/lda_shap_2024-03-11_23-09-42.csv')
lda_uppr = pd.read_csv('results/lda_upper_body_2024-03-11_23-29-18.csv')
lda_lowr = pd.read_csv('results/lda_lower_body_2024-03-11_23-29-18.csv')
lda_allf = pd.read_csv('results/lda_all_features_2024-03-11_23-31-57.csv')

knn1 = KNeighborsClassifier(n_neighbors=15, metric='euclidean', weights='distance')
knn1.fit(lda_logr[['LDA1']], lda_logr['label'])
distances1, indices1 = knn1.kneighbors(lda_logr[['LDA1']].values)


# Initialize Nearest Neighbors for distance calculation
nn = NearestNeighbors(n_neighbors=15, metric='euclidean')
nn.fit(lda_logr[['LDA1']])

# Calculate the centroid for each class
class_labels = lda_logr['label'].unique()
centroids = np.array([lda_logr[lda_logr['label'] == label]['LDA1'].mean() for label in class_labels]).reshape(-1, 1)

# Find the nearest neighbors to each class centroid
distances, _ = nn.kneighbors(centroids)

# Aggregate the distances for a class-to-class distance matrix
# This will be an average of the distances to the nearest neighbors in the other classes
class_distance_matrix = np.zeros((len(class_labels), len(class_labels)))

for i, label_i in enumerate(class_labels):
    for j, label_j in enumerate(class_labels):
        if label_i == label_j:
            # Set distance to maximum value for better visualization since we don't compare the class to itself
            class_distance_matrix[i][j] = distances.max()
        else:
            class_indices_j = lda_logr['label'] == label_j
            # Select the distances to the nearest neighbors in class j for class i's centroid
            nearest_neighbor_distances = distances[i][class_indices_j]
            class_distance_matrix[i][j] = nearest_neighbor_distances.mean()

# Plotting the matrix using seaborn
sns.heatmap(class_distance_matrix, annot=True, fmt=".2f", xticklabels=class_labels, yticklabels=class_labels)
plt.xlabel('Class')
plt.ylabel('Class')
plt.title('Average Nearest Neighbor Distance Between Classes')
plt.show()