In [27]:
from joblib import Parallel, delayed
import numpy as np
import pandas as pd
from scipy.cluster.hierarchy import linkage, fcluster
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import adjusted_rand_score, adjusted_mutual_info_score, fowlkes_mallows_score, completeness_score
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [28]:
# === FUNCIONES AUXILIARES ===
def fourier_precomputed(precomputed_item, a, b):
    sin_matrix = precomputed_item['sin']  # shape: (T, M)
    cos_matrix = precomputed_item['cos']  # shape: (T, N)
    
    matrix1 = sin_matrix * a
    matrix2 = cos_matrix * b
    
    total_sum = np.sum(matrix1) + np.sum(matrix2)
    return total_sum


def fourier_dist_precomputed(pre1, pre2, a, b):
    s1 = fourier_precomputed(pre1, a, b)
    s2 = fourier_precomputed(pre2, a, b)
    return np.abs(s1 - s2)

def heatmap_distancias(dist_matrix, title="Heatmap de Distancias", save_path=None):
    """
    Genera un heatmap a partir de la matriz de distancias entre spike trains.
    
    Parámetros:
    - dist_matrix: array o DataFrame (matriz de distancias simétrica).
    - title: título opcional del gráfico.
    - save_path: si se indica, guarda la imagen en esa ruta.
    """
    # Si es array lo convertimos en DataFrame para tener índices bonitos
    if not isinstance(dist_matrix, pd.DataFrame):
        dist_df = pd.DataFrame(dist_matrix)
    else:
        dist_df = dist_matrix

    plt.figure(figsize=(10, 8))
    sns.heatmap(dist_df,  cmap="viridis", square=True)
    plt.title(title)
    plt.xlabel("Spike Train")
    plt.ylabel("Spike Train")
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path)
        print(f"✅ Heatmap guardado en: {save_path}")
    plt.show()

def grid_search_fourier_parallel(spike_trains, true_labels, n_clusters, ranges_a, ranges_b, n_jobs=-1):
    """
    Hace una búsqueda en rejilla sobre los coeficientes a y b en paralelo.

    Parámetros:
    - spike_trains: lista de arrays de spike trains.
    - true_labels: etiquetas verdaderas (array).
    - n_clusters: número de clusters a formar.
    - ranges_a: lista de arrays para probar como 'a' (lista de np.array).
    - ranges_b: lista de arrays para probar como 'b' (lista de np.array).
    - n_jobs: núcleos paralelos (por defecto usa todos).

    Retorna:
    - DataFrame ordenado con las métricas y los coeficientes probados.
    """

    n = len(spike_trains)
    combinaciones = [(a, b) for a in ranges_a for b in ranges_b]

    def evaluar_par(a, b):
        # Crear matriz de distancia
        dist_matrix = np.zeros((n, n))
        for i in range(n):
            for j in range(i + 1, n):
                d = fourier_dist_precomputed(a[i], b[j], a, b)
                dist_matrix[i, j] = d
                dist_matrix[j, i] = d

        # Clustering jerárquico
        condensed = dist_matrix[np.triu_indices(n, k=1)]
        linked = linkage(condensed, method="ward")
        clusters = fcluster(linked, t=n_clusters, criterion="maxclust")

        # Métricas
        ari = adjusted_rand_score(true_labels, clusters)
        ami = adjusted_mutual_info_score(true_labels, clusters)
        fmi = fowlkes_mallows_score(true_labels, clusters)
        completeness = completeness_score(true_labels, clusters)

        return {
            'a': a.tolist(),
            'b': b.tolist(),
            'ARI': ari,
            'AMI': ami,
            'FMI': fmi,
            'Completeness': completeness
        }

    resultados = Parallel(n_jobs=n_jobs)(
        delayed(evaluar_par)(a, b) for (a, b) in tqdm(combinaciones, desc="Grid Search Fourier")
    )

    resultados_df = pd.DataFrame(resultados)
    resultados_df = resultados_df.sort_values(by='ARI', ascending=False)
    return resultados_df

In [29]:

# === CONFIGURACIÓN ===
N_CLUSTERS = 8


# Cargar los datos
data=pd.read_csv(r'resultados_pipeline_20250425_010216\spike_trains.csv')
filters = data["filter"]
spike_trains = data.drop(columns=["filter"]).apply(lambda row: row.dropna().to_numpy(), axis=1)
label_encoder = LabelEncoder()
true_labels = label_encoder.fit_transform(filters)

In [30]:
# === PRECOMPUTAR SENOS Y COSENOS ===

M = 3  # Número de términos en 'a' (ajustar si cambia)
N = 3  # Número de términos en 'b' (ajustar si cambia)

precomputed_matrices = []

for idx, train in enumerate(tqdm(spike_trains, desc="Precomputando matrices")):
    multiples_a = np.arange(1, M + 1)
    multiples_b = np.arange(1, N + 1)
    
    sin_matrix = np.sin(np.pi * np.outer(train, multiples_a))  # (len(train), M)
    cos_matrix = np.cos(np.pi * np.outer(train, multiples_b))  # (len(train), N)
    
    precomputed_matrices.append({
        'sin': sin_matrix,
        'cos': cos_matrix
    })

Precomputando matrices: 100%|██████████| 80/80 [00:00<00:00, 25217.52it/s]


In [24]:
precomputed_matrices

[{'sin': array([[ 0.06756035,  0.13481198,  0.20144757],
         [ 0.3946355 ,  0.725212  ,  0.93806883],
         [ 0.94849813, -0.60093563, -0.56776609],
         [-0.95798951, -0.54950898,  0.64278761],
         [-0.9105797 , -0.75274624,  0.28830919],
         [-0.80864715, -0.95144447, -0.31081094],
         [-0.76705419, -0.98425681, -0.49590932],
         [-0.72195609, -0.99909897, -0.66067472],
         [-0.67355893, -0.99569997, -0.79835154],
         [-0.62208385, -0.97412188, -0.90329482],
         [-0.56776609, -0.93475866, -0.97120175],
         [-0.4516072 , -0.80586312, -0.98640214],
         [-0.32720304, -0.61838368, -0.8414853 ],
         [-0.06285329, -0.12545803, -0.18756665],
         [ 0.07226591,  0.14415393,  0.21528814],
         [ 0.13948451,  0.27624188,  0.40759832],
         [ 0.20606571,  0.40328632,  0.5831964 ],
         [ 0.27170527,  0.52296772,  0.7348826 ],
         [ 0.33610324,  0.63310098,  0.8564376 ],
         [ 0.39896535,  0.73167533,  0.9428

In [25]:
from joblib import dump

# Guardar las matrices precomputadas
dump(precomputed_matrices, 'precomputed_matrices.joblib')
print("✅ Matrices precomputadas guardadas en 'precomputed_matrices.joblib'")

✅ Matrices precomputadas guardadas en 'precomputed_matrices.joblib'


In [None]:
# Grid más fino para 3 términos
valores = np.linspace(-10, 10, 5)  # 5 valores entre -10 y 10
a_candidates = [np.array([a1, a2, a3]) for a1 in valores for a2 in valores for a3 in valores]
b_candidates = [np.array([b1, b2, b3]) for b1 in valores for b2 in valores for b3 in valores]

resultados_df = grid_search_fourier_parallel(spike_trains, true_labels, N_CLUSTERS, a_candidates, b_candidates, n_jobs=-1)

Grid Search Fourier: 100%|██████████| 15625/15625 [22:32:11<00:00,  5.19s/it]  


In [11]:
resultados_df

Unnamed: 0,a,b,ARI,AMI,FMI,Completeness
11506,"[5.0, 5.0, 0.0]","[-10.0, -5.0, -5.0]",0.162141,0.274235,0.279365,0.296371
4118,"[-5.0, -5.0, 0.0]","[10.0, 5.0, 5.0]",0.162141,0.274235,0.279365,0.296371
4112,"[-5.0, -5.0, 0.0]","[10.0, 0.0, 0.0]",0.144739,0.270189,0.255679,0.284634
11512,"[5.0, 5.0, 0.0]","[-10.0, 0.0, 0.0]",0.144739,0.270189,0.255679,0.284634
14756,"[10.0, 5.0, 5.0]","[-10.0, -5.0, -5.0]",0.142871,0.262100,0.256700,0.279178
...,...,...,...,...,...,...
5057,"[-5.0, 5.0, -10.0]","[0.0, -5.0, 0.0]",0.003110,0.011185,0.145429,0.028338
10567,"[5.0, -5.0, 10.0]","[0.0, 5.0, 0.0]",0.003110,0.011185,0.145429,0.028338
9933,"[5.0, -10.0, 10.0]","[0.0, -5.0, 5.0]",0.002536,0.007978,0.137985,0.023873
5691,"[-5.0, 10.0, -10.0]","[0.0, 5.0, -5.0]",0.002536,0.007978,0.137985,0.023873


In [12]:
resultados_df.to_csv("resultados_combinaciones",index=False)

In [5]:
"""    # Crear matriz de distancias
n = len(spike_trains)
dist_matrix = np.zeros((n, n))
a = [0.3,0.4,0.7,-0.3,-0.25]
b = [0.6,0.9,-0.6,0.2,0.7]
for i in range(n):
    for j in range(i+1, n):
        d = fourier_dist(spike_trains.iloc[i], spike_trains.iloc[j], a,b)
        dist_matrix[i, j] = d
        dist_matrix[j, i] = d"""

'    # Crear matriz de distancias\nn = len(spike_trains)\ndist_matrix = np.zeros((n, n))\na = [0.3,0.4,0.7,-0.3,-0.25]\nb = [0.6,0.9,-0.6,0.2,0.7]\nfor i in range(n):\n    for j in range(i+1, n):\n        d = fourier_dist(spike_trains.iloc[i], spike_trains.iloc[j], a,b)\n        dist_matrix[i, j] = d\n        dist_matrix[j, i] = d'

In [6]:
"heatmap_distancias(dist_matrix)"

'heatmap_distancias(dist_matrix)'

In [7]:
"""condensed = dist_matrix[np.triu_indices(n, k=1)]
linked = linkage(condensed, method="ward")
clusters = fcluster(linked, t=N_CLUSTERS, criterion="maxclust")

# Evaluación
ari = adjusted_rand_score(true_labels, clusters)
ami = adjusted_mutual_info_score(true_labels, clusters)
fmi = fowlkes_mallows_score(true_labels, clusters)
completeness = completeness_score(true_labels, clusters)"""


'condensed = dist_matrix[np.triu_indices(n, k=1)]\nlinked = linkage(condensed, method="ward")\nclusters = fcluster(linked, t=N_CLUSTERS, criterion="maxclust")\n\n# Evaluación\nari = adjusted_rand_score(true_labels, clusters)\nami = adjusted_mutual_info_score(true_labels, clusters)\nfmi = fowlkes_mallows_score(true_labels, clusters)\ncompleteness = completeness_score(true_labels, clusters)'

In [8]:
"""resultados=[ari,ami,fmi,completeness]
resultados=np.array(resultados)"""

'resultados=[ari,ami,fmi,completeness]\nresultados=np.array(resultados)'

In [9]:
"resultados"

'resultados'

In [10]:
"""import matplotlib.pyplot as plt

nombres = ["ARI", "AMI", "FMI", "Completeness"]

# Crear gráfico de barras
plt.figure(figsize=(6, 4))
plt.bar(nombres, resultados)
plt.ylim(0, 1)  # Si tus valores están entre 0 y 1
plt.title("Métricas de Clustering")
plt.ylabel("Valor")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.tight_layout()
plt.show()"""

'import matplotlib.pyplot as plt\n\nnombres = ["ARI", "AMI", "FMI", "Completeness"]\n\n# Crear gráfico de barras\nplt.figure(figsize=(6, 4))\nplt.bar(nombres, resultados)\nplt.ylim(0, 1)  # Si tus valores están entre 0 y 1\nplt.title("Métricas de Clustering")\nplt.ylabel("Valor")\nplt.grid(axis="y", linestyle="--", alpha=0.7)\nplt.tight_layout()\nplt.show()'