In [2]:
import os
import numpy as np
import pandas as pd
from itertools import combinations
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

from scipy.spatial.distance import pdist
from scipy.cluster.hierarchy import linkage, fcluster
from sklearn.metrics import (
    adjusted_rand_score, adjusted_mutual_info_score,
    fowlkes_mallows_score, completeness_score
)
from sklearn.cluster import SpectralClustering

# ======================= CARGA DE DATOS =======================
df = pd.read_csv(r"resultados_pipeline_20250425_070339\spike_trains.csv")

neurons = df.drop(columns=['filter']).values
filtros = df["filter"].values
true_labels = filtros 
num_neurons = len(neurons)

# ======================= PARÁMETROS =======================
N_values = [25] #Cambiar la cantidad de senos y cosenos
output_dir = "matrices_distancia_optima"
os.makedirs(output_dir, exist_ok=True)
LINKAGE_METHODS = ["single", "complete", "average", "ward"]

# ======================= FUNCIONES AUXILIARES =======================
def compute_phi_psi(spike_train, N):
    spike_train = spike_train[~np.isnan(spike_train)]/21.5
    phi = np.array([np.sum(np.sin(np.pi * spike_train * j)) for j in range(1, N+1)])
    psi = np.array([np.sum(np.cos(np.pi * spike_train * k)) for k in range(1, N+1)])
    

    return phi, psi



def heatmap_distancias(dist_matrix, title="Heatmap de Distancias", save_path=None):
    if not isinstance(dist_matrix, pd.DataFrame):
        dist_df = pd.DataFrame(dist_matrix)
    else:
        dist_df = dist_matrix
    plt.figure(figsize=(10, 8))
    sns.heatmap(dist_df, cmap="viridis", square=True)
    plt.title(title)
    plt.xlabel("Spike Train")
    plt.ylabel("Spike Train")
    plt.tight_layout()
    if save_path:
        plt.savefig(save_path)
        print(f"✅ Heatmap guardado en: {save_path}")
    plt.close()

def heatmap_filtro_vs_cluster(filtros, clusters, method, distancia_label, ax):
    df_temp = pd.DataFrame({
        "Filtro": filtros,
        "Cluster": clusters
    })
    crosstab = pd.crosstab(df_temp["Filtro"], df_temp["Cluster"])
    sns.heatmap(crosstab, annot=True, fmt="d", cmap="YlGnBu", ax=ax)
    ax.set_title(f"{method} ({distancia_label})")
    ax.set_xlabel("Cluster")
    ax.set_ylabel("Filtro")

def evaluar_clusterings(dist_matrix, distancia_label, filtros, true_labels, LINKAGE_METHODS, N_CLUSTERS=8):
    results = []
    fig, axs = plt.subplots(2, 3, figsize=(18, 10))
    axs = axs.flatten()
    condensed = pdist(dist_matrix.values, metric="sqeuclidean")

    for idx, method in enumerate(LINKAGE_METHODS):
        try:
            linked = linkage(condensed, method=method)
            clusters = fcluster(linked, t=N_CLUSTERS, criterion="maxclust")
            ari = adjusted_rand_score(true_labels, clusters)
            ami = adjusted_mutual_info_score(true_labels, clusters)
            fmi = fowlkes_mallows_score(true_labels, clusters)
            completeness = completeness_score(true_labels, clusters)
            results.append({
                "Distance": distancia_label,
                "Method": method,
                "ARI": ari,
                "AMI": ami,
                "FMI": fmi,
                "Completeness": completeness
            })
            heatmap_filtro_vs_cluster(filtros, clusters, method, distancia_label, axs[idx])
        except Exception as e:
            axs[idx].set_title(f"{method} - error")
            axs[idx].axis("off")

    plt.tight_layout()
    heatmap_file = os.path.join(output_dir, f"clusters_vs_filtro_{distancia_label}.png")
    plt.savefig(heatmap_file)
    plt.close()
    return pd.DataFrame(results)

def clustering_espectral(dist_matrix, filtros, true_labels, distancia_label):
    results = []
    fig, ax = plt.subplots(figsize=(8, 6))
    try:
        sigma = np.median(dist_matrix.values)
        affinity = np.exp(-dist_matrix.values ** 2 / (2 * sigma ** 2))
        spectral = SpectralClustering(n_clusters=8, affinity='precomputed', assign_labels='kmeans', random_state=0)
        clusters = spectral.fit_predict(affinity)
        ari = adjusted_rand_score(true_labels, clusters)
        ami = adjusted_mutual_info_score(true_labels, clusters)
        fmi = fowlkes_mallows_score(true_labels, clusters)
        completeness = completeness_score(true_labels, clusters)
        results.append({
            "Distance": distancia_label,
            "Method": "Spectral",
            "ARI": ari,
            "AMI": ami,
            "FMI": fmi,
            "Completeness": completeness
        })
        heatmap_filtro_vs_cluster(filtros, clusters, "Spectral", distancia_label, ax)
        heatmap_file = os.path.join(output_dir, f"clusters_vs_filtro_Spectral_{distancia_label}.png")
        plt.tight_layout()
        plt.savefig(heatmap_file)
        plt.close()
    except Exception as e:
        print(f"Error en clustering espectral para {distancia_label}: {e}")
    return pd.DataFrame(results)

# ======================= MAIN =======================
all_results = []

for N in tqdm(N_values, desc="Calculando matrices y evaluando clustering"):
    distancia_label = f"N={N}"
    
    # Calcular Phi y Psi
    phi_psi = [compute_phi_psi(neurons[n], N) for n in range(num_neurons)]
    
    # Matriz de distancias
    dist_matrix = np.zeros((num_neurons, num_neurons))
    for i, j in combinations(range(num_neurons), 2):
        phi_i, psi_i = phi_psi[i]
        phi_j, psi_j = phi_psi[j]
        delta_phi = phi_i - phi_j
        delta_psi = psi_i - psi_j
        distance = np.sum(delta_phi ** 2) + np.sum(delta_psi ** 2)
        dist_matrix[i, j] = distance
        dist_matrix[j, i] = distance

    # Guardar CSV
    df_matrix = pd.DataFrame(dist_matrix)
    csv_path = os.path.join(output_dir, f"dist_{distancia_label}.csv")
    df_matrix.to_csv(csv_path, index=False)

    # Guardar heatmap
    heatmap_path = os.path.join(output_dir, f"heatmap_{distancia_label}.png")
    heatmap_distancias(df_matrix, title=f"Heatmap N={N}", save_path=heatmap_path)

    # Evaluar clustering jerárquico
    df_hier = evaluar_clusterings(df_matrix, distancia_label, filtros, true_labels, LINKAGE_METHODS)
    df_spec = clustering_espectral(df_matrix, filtros, true_labels, distancia_label)
    
    all_results.append(df_hier)
    all_results.append(df_spec)

# ======================= GUARDAR RESULTADOS =======================
df_total_resultados = pd.concat(all_results, ignore_index=True)


df_total_resultados

Calculando matrices y evaluando clustering:   0%|          | 0/1 [00:00<?, ?it/s]

✅ Heatmap guardado en: matrices_distancia_optima\heatmap_N=25.png


Calculando matrices y evaluando clustering: 100%|██████████| 1/1 [00:09<00:00,  9.68s/it]


Unnamed: 0,Distance,Method,ARI,AMI,FMI,Completeness
0,N=25,single,0.379822,0.656737,0.571093,0.967601
1,N=25,complete,0.537315,0.722909,0.607299,0.761617
2,N=25,average,0.651615,0.814698,0.726964,0.91885
3,N=25,ward,0.678811,0.802616,0.722356,0.819479
4,N=25,Spectral,0.79847,0.894727,0.828759,0.915412
