In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle
import os

In [None]:
# Abrir archivo .txt con nombres de ROIs
def load_roi_names(txt_file):
    with open(txt_file, 'r') as f:
        roi_names = [line.strip() for line in f.readlines()]
    return roi_names

# Abrir archivo .dat (diccionario Python)
def load_rat_data(dat_file):
    with open(dat_file, 'rb') as f:
        data = pickle.load(f)
    return data

# Explorar estructura de datos
def explore_data(roi_names, rat_data):
    print("=== NOMBRES DE ROIs ===")
    print(f"Número total de ROIs: {len(roi_names)}")
    print("Primeros 10 ROIs:")
    for i, roi in enumerate(roi_names[:10]):
        print(f"{i}: {roi}")
    
    print("\n=== ESTRUCTURA DEL DICCIONARIO ===")
    print(f"Tipo de datos: {type(rat_data)}")
    print(f"Claves principales: {list(rat_data.keys())}")
    
    for key, value in rat_data.items():
        print(f"\nClave '{key}':")
        print(f"  Tipo: {type(value)}")
        if isinstance(value, np.ndarray):
            print(f"  Shape: {value.shape}")
            print(f"  Dtype: {value.dtype}")
        elif isinstance(value, list):
            print(f"  Longitud: {len(value)}")
            if len(value) > 0:
                print(f"  Tipo elementos: {type(value[0])}")
        elif isinstance(value, dict):
            print(f"  Sub-claves: {list(value.keys())}")
            

In [None]:
path = './data/Toni_2025-08-06/'
files = os.listdir(path)
path+files[0], path+files[1]

In [None]:
roi_names = load_roi_names(path+files[0])
roi_names, len(roi_names)

In [None]:
rat_data = load_rat_data(path+files[1])
explore_data(roi_names, rat_data)

In [None]:
# Extraer pares de ROIs
roi_pairs = list(rat_data.keys())

# Análisis de pares
pairs_df = pd.DataFrame(roi_pairs, columns=['roi_i', 'roi_j'])
print(f"Total conexiones: {len(pairs_df)}")
print(f"Rango ROIs: {pairs_df.min().min()} - {pairs_df.max().max()}")

# Verificar estructura de listas
sample_key = roi_pairs[1]
sample_data = rat_data[sample_key]
print(f"\nEjemplo conexión {sample_key}:")
print(f"Tipo: {type(sample_data)}")
print(f"Longitud: {len(sample_data)}")
print(f"Primeros valores: {sample_data[:5]}")

lengths = []

for i in range(len(pairs_df)):

    key = (pairs_df.loc[i, 'roi_i'], pairs_df.loc[i, 'roi_j'])
    
    lengths.append(len(rat_data[key]))
    
plt.hist(lengths, bins=100, range = (0,1000)), max(lengths)
plt.title(files[1])

In [None]:
import pickle
import pandas as pd
import numpy as np

# Cargar datos
def load_data():
    # ROIs
    roi_names = pd.read_csv('./data/Toni_2025-08-06/atlas_cg_3d5_names.txt', 
                       sep=';', header=None, names=['roi_name'], engine='python')
    
    # Datos rata
    with open('./data/Toni_2025-08-06/th-0.0_R01_b20_r_Fit_Histogram_Tau_all_fibers.dat', 'rb') as f:
        data = pickle.load(f)
    
    return roi_names, data

# Explorar estructura de conectividad
def explore_connectivity_structure(data, roi_names):
    # Extraer pares de ROIs
    roi_pairs = list(data.keys())
    
    # Análisis de pares
    pairs_df = pd.DataFrame(roi_pairs, columns=['roi_i', 'roi_j'])
    print(f"Total conexiones: {len(pairs_df)}")
    print(f"Rango ROIs: {pairs_df.min().min()} - {pairs_df.max().max()}")
    
    # Verificar estructura de listas
    sample_key = roi_pairs[1]
    sample_data = data[sample_key]
    print(f"\nEjemplo conexión {sample_key}:")
    print(f"Tipo: {type(sample_data)}")
    print(f"Longitud: {len(sample_data)}")
    print(f"Primeros valores: {sample_data[:5]}")
    
    return pairs_df

# Convertir a matriz de conectividad
def create_connectivity_matrix(data, roi_names, metric='mean'):
    n_rois = len(roi_names)
    
    # Crear matriz vacía
    connectivity_matrix = np.full((n_rois, n_rois), np.nan)
    
    # Llenar matriz
    for (i, j), values in data.items():
        if i < n_rois and j < n_rois:  # Verificar índices válidos
            if metric == 'mean':
                connectivity_matrix[i, j] = np.mean(values)
            elif metric == 'std':
                connectivity_matrix[i, j] = np.std(values)
            elif metric == 'length':
                connectivity_matrix[i, j] = len(values)
    
    # Convertir a DataFrame
    matrix_df = pd.DataFrame(connectivity_matrix, 
                           index=roi_names['roi_name'], 
                           columns=roi_names['roi_name'])
    
    return matrix_df

# Ejecutar análisis
roi_names, data = load_data()
pairs_df = explore_connectivity_structure(data, roi_names)

# Crear matrices de conectividad
conn_mean = create_connectivity_matrix(data, roi_names, 'mean')
conn_std = create_connectivity_matrix(data, roi_names, 'std') 
conn_length = create_connectivity_matrix(data, roi_names, 'length')

print(f"\nMatriz de conectividad (promedio):")
print(conn_mean.describe())

In [None]:
def create_connectivity_matrices(data, roi_names):
    # Índices únicos
    all_indices = sorted(set().union(*[{i, j} for i, j in data.keys()]))
    n_rois = len(all_indices)
    index_map = {idx: pos for pos, idx in enumerate(all_indices)}
    
    # ROI labels
    roi_labels = [roi_names.iloc[idx]['roi_name'] if idx < len(roi_names) 
                  else f"ROI_{idx}" for idx in all_indices]
    
    # Matrices diferentes
    matrices = {}
    
    for (i, j), measurements in data.items():
        pos_i, pos_j = index_map[i], index_map[j]
        
        if not measurements:  # Conexión vacía
            continue
            
        # Extraer valores
        vals_0 = [m[0] for m in measurements]
        vals_1 = [m[1] for m in measurements]
        
        # Llenar matrices (inicializar si no existe)
        for name in ['val0_mean', 'val1_mean', 'n_measurements', 'val0_std', 'val1_std']:
            if name not in matrices:
                matrices[name] = np.full((n_rois, n_rois), np.nan)
        
        matrices['val0_mean'][pos_i, pos_j] = np.mean(vals_0)
        matrices['val1_mean'][pos_i, pos_j] = np.mean(vals_1)
        matrices['n_measurements'][pos_i, pos_j] = len(measurements)
        matrices['val0_std'][pos_i, pos_j] = np.std(vals_0) if len(vals_0) > 1 else 0
        matrices['val1_std'][pos_i, pos_j] = np.std(vals_1) if len(vals_1) > 1 else 0
    
    # Convertir a DataFrames
    result = {}
    for name, matrix in matrices.items():
        result[name] = pd.DataFrame(matrix, index=roi_labels, columns=roi_labels)
    
    return result, all_indices

# Crear matrices
matrices, roi_indices = create_connectivity_matrices(data, roi_names)

# Resumen
for name, df in matrices.items():
    print(f"\n{name.upper()}:")
    print(f"Conexiones no-NaN: {(~df.isna()).sum().sum()}")
    print(f"Rango: {df.min().min():.2e} - {df.max().max():.2e}")

In [None]:
# Visualizar matrices principales
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# VAL0 mean
im1 = axes[0,0].imshow(matrices['val0_mean'].values, cmap='viridis', aspect='auto')
axes[0,0].set_title('VAL0 Mean')
plt.colorbar(im1, ax=axes[0,0])

# VAL1 mean  
im2 = axes[0,1].imshow(matrices['val1_mean'].values, cmap='viridis', aspect='auto')
axes[0,1].set_title('VAL1 Mean')
plt.colorbar(im2, ax=axes[0,1])

# Número de mediciones
im3 = axes[1,0].imshow(matrices['n_measurements'].values, cmap='plasma', aspect='auto')
axes[1,0].set_title(f'N Measurements - {files[1]}')
plt.colorbar(im3, ax=axes[1,0])

# Correlación VAL0 vs VAL1
val0_flat = matrices['val0_mean'].values.flatten()
val1_flat = matrices['val1_mean'].values.flatten()
mask = ~(np.isnan(val0_flat) | np.isnan(val1_flat))
axes[1,1].scatter(val0_flat[mask], val1_flat[mask], alpha=0.5, s=1)
axes[1,1].set_xlabel(f'VAL0 - {files[1]}')
axes[1,1].set_ylabel(f'VAL1 - {files[1]}')
axes[1,1].set_title(f'VAL0 vs VAL1 - {files[1]}')

plt.tight_layout()
plt.show()

# Estadísticas por ROI
roi_stats = pd.DataFrame({
    'roi_index': roi_indices,
    'roi_name': [roi_names.iloc[i]['roi_name'] if i < len(roi_names) else f"ROI_{i}" 
                 for i in roi_indices],
    'out_connections': (~matrices['val1_mean'].isna()).sum(axis=1),
    'in_connections': (~matrices['val1_mean'].isna()).sum(axis=0),
    'mean_val1_out': matrices['val1_mean'].mean(axis=1),
    'mean_val1_in': matrices['val1_mean'].mean(axis=0)
})

print(roi_stats.nlargest(10, 'out_connections')[['roi_name', 'out_connections', 'in_connections']])

In [None]:
sum(roi_stats['in_connections'] > 0)/(len(roi_stats)*len(roi_stats)/2)

In [None]:
roi_stats['out_connections'].sum()

In [None]:
np.array(list(data.keys()))[np.array(list(data.keys()))[:,1] == 15,0]

In [None]:
np.array(list(data.keys()))[np.array(list(data.keys()))[:,0] == 35,1]

In [None]:
np.unique(np.array(list(data.keys()))[:,0]).shape , np.unique(np.array(list(data.keys()))[:,1]).shape

In [None]:
np.array(data[(35,  36)]).shape

In [None]:
unique_i = np.unique(np.array(list(data.keys()))[:,0])
unique_j = np.unique(np.array(list(data.keys()))[:,1])
print(f"Rango IDs: {unique_i.min()}-{unique_i.max()}")

In [None]:
# Verificar si hay patrón hemisférico
left_ids = [id for id in unique_i if id < 80]  # ejemplo
right_ids = [id for id in unique_i if id >= 80]

# O verificar divisiones por rangos
print(f"IDs 3-81: {sum(1 for id in unique_i if 3 <= id <= 81)}")
print(f"IDs 82-155: {sum(1 for id in unique_i if 82 <= id <= 155)}")

In [None]:
# Extraer todos los valores para análisis
import numpy as np

all_vals = []
for measurements in data.values():
    for measurement in measurements:
        if len(measurement) == 3:
            all_vals.append(measurement)

vals_array = np.array(all_vals)
vals_array = vals_array[~np.isnan(vals_array).any(axis=1)]

# Estadísticas por columna
for i in range(3):
    print(f"Val{i}: min={vals_array[:,i].min():.6f}, max={vals_array[:,i].max():.6f}, mean={vals_array[:,i].mean():.6f}")

# Correlaciones esperadas
print(f"Corr(Val3,Val4): {np.corrcoef(vals_array[:,0], vals_array[:,1])[0,1]:.3f}")  # velocidad vs distancia
print(f"Corr(Val2,Val4): {np.corrcoef(vals_array[:,1], vals_array[:,2])[0,1]:.3f}")  # tau vs distancia

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from collections import defaultdict

# Extraer datos de velocidad por conexión
connection_strenght = defaultdict(list)
unknown = defaultdict(list)
connection_taus = defaultdict(list)

for (i,j), measurements in data.items():
    if len(measurements) > 0:
        for measurement in measurements:
            if len(measurement) == 3 and not np.isnan(measurement[0]):
                connection_strenght[(i,j)].append(measurement[0])  # Val3
                unknown[(i,j)].append(measurement[1])   # Val4
                connection_taus[(i,j)].append(measurement[2])        # Val2

# Crear DataFrame para análisis
plot_data = []
for conn, unk in unknown.items():
    if len(unknown) >= 10:  # Solo conexiones con suficientes datos
        plot_data.append({
            'connection': f"{conn[0]}-{conn[1]}",
            'unk': unk,
            'mean_unk': np.mean(unk),
            'std_unk': np.std(unk),
            'cv_unk': np.std(unk)/np.mean(unk),
            'n_measurements': len(unk),
            'mean_strenght': np.mean(connection_strenght[conn]),
            'mean_tau': np.mean(connection_taus[conn])
        })

df_connections = pd.DataFrame(plot_data)
df_connections = df_connections.sort_values('n_measurements', ascending=False)

# Crear plots
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# 1. Histograma general de velocidades
all_velocities = [v for unk in unknown.values() for v in unk]
axes[0,0].hist(all_velocities, bins=50, alpha=0.7, edgecolor='black')
axes[0,0].set_xlabel('unknown')
axes[0,0].set_ylabel('Frecuencia')
axes[0,0].set_title('Distribución General de unknown')
axes[0,0].axvline(np.mean(all_velocities), color='red', linestyle='--', 
                  label=f'Media: {np.mean(all_velocities):.2f}')
axes[0,0].legend()

# 2. Coeficiente de variación por conexión
axes[0,1].scatter(df_connections['mean_unk'], df_connections['cv_unk'], 
                  s=df_connections['n_measurements']*3, alpha=0.6)
axes[0,1].set_xlabel('unknown')
axes[0,1].set_ylabel('Coeficiente Variación')
axes[0,1].set_title('Consistencia de unknown por Conexión\n(tamaño = n_mediciones)')

# 3. Velocidad vs Distancia por conexión
colors = plt.cm.viridis(np.linspace(0, 1, len(df_connections.head(20))))
for color_idx, (idx, row) in enumerate(df_connections.head(20).iterrows()):
    axes[1,0].scatter(row['mean_tau'], row['mean_unk'], 
                      c=[colors[color_idx]], s=row['n_measurements']*2, 
                      alpha=0.7)
axes[1,0].set_xlabel('Distancia Media (mm)')
axes[1,0].set_ylabel('Velocidad Media (m/s)')
axes[1,0].set_title('Velocidad vs Distancia (Top 20 conexiones)')

# 4. Boxplot de velocidades para top conexiones
top_connections = df_connections.head(12)
vel_data = [unknown[(int(conn.split('-')[0]), int(conn.split('-')[1]))] 
            for conn in top_connections['connection']]
axes[1,1].boxplot(vel_data, labels=[f"{conn[:8]}..." for conn in top_connections['connection']])
axes[1,1].set_ylabel('Velocidad (m/s)')
axes[1,1].set_title('Distribución unknown por Conexión')
axes[1,1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

# Estadísticas de consistencia
print("=== ANÁLISIS DE CONSISTENCIA DE unknown ===")
print(f"Conexiones con ≥10 mediciones: {len(df_connections)}")
print(f"CV promedio: {df_connections['cv_unk'].mean():.3f}")
print(f"Conexiones muy variables (CV>0.5): {sum(df_connections['cv_unk'] > 0.5)}")

print("\n=== TOP 10 CONEXIONES MÁS CONSISTENTES ===")
consistent = df_connections.nsmallest(10, 'cv_unk')[['connection', 'mean_unk', 'cv_unk', 'n_measurements']]
print(consistent.to_string(index=False))

print("\n=== TOP 10 CONEXIONES MÁS VARIABLES ===")
variable = df_connections.nlargest(10, 'cv_unk')[['connection', 'mean_unk', 'cv_unk', 'n_measurements']]
print(variable.to_string(index=False))

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from collections import defaultdict

# Extraer datos de velocidad por conexión
connection_velocities = defaultdict(list)
connection_distances = defaultdict(list)
connection_taus = defaultdict(list)

for (i,j), measurements in data.items():
    if len(measurements) > 0:
        for measurement in measurements:
            if len(measurement) == 3 and not np.isnan(measurement[0]):
                connection_velocities[(i,j)].append(measurement[0])  # Val3
                connection_distances[(i,j)].append(measurement[1])   # Val4
                connection_taus[(i,j)].append(measurement[2])        # Val2

# Crear DataFrame para análisis
plot_data = []
for conn, velocities in connection_velocities.items():
    if len(velocities) >= 10:  # Solo conexiones con suficientes datos
        plot_data.append({
            'connection': f"{conn[0]}-{conn[1]}",
            'velocities': velocities,
            'mean_vel': np.mean(velocities),
            'std_vel': np.std(velocities),
            'cv_vel': np.std(velocities)/np.mean(velocities),
            'n_measurements': len(velocities),
            'mean_dist': np.mean(connection_distances[conn]),
            'mean_tau': np.mean(connection_taus[conn])
        })

df_connections = pd.DataFrame(plot_data)
df_connections = df_connections.sort_values('n_measurements', ascending=False)

# Crear plots
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# 1. Histograma general de velocidades
all_velocities = [v for vels in connection_velocities.values() for v in vels]
axes[0,0].hist(all_velocities, bins=50, alpha=0.7, edgecolor='black')
axes[0,0].set_xlabel('Velocidad (m/s)')
axes[0,0].set_ylabel('Frecuencia')
axes[0,0].set_title('Distribución General de Val_0')
axes[0,0].axvline(np.mean(all_velocities), color='red', linestyle='--', 
                  label=f'Media: {np.mean(all_velocities):.2f}')
axes[0,0].legend()

# 2. Coeficiente de variación por conexión
axes[0,1].scatter(df_connections['mean_vel'], df_connections['cv_vel'], 
                  s=df_connections['n_measurements']*3, alpha=0.6)
axes[0,1].set_xlabel('Velocidad Media (m/s)')
axes[0,1].set_ylabel('Coeficiente Variación')
axes[0,1].set_title('Consistencia de Velocidad por Conexión\n(tamaño = n_mediciones)')

# 3. Velocidad vs Distancia por conexión
colors = plt.cm.viridis(np.linspace(0, 1, len(df_connections.head(20))))
for color_idx, (idx, row) in enumerate(df_connections.head(20).iterrows()):
    axes[1,0].scatter(row['mean_dist'], row['mean_vel'], 
                      c=[colors[color_idx]], s=row['n_measurements']*2, 
                      alpha=0.7)
axes[1,0].set_xlabel('Distancia Media (mm)')
axes[1,0].set_ylabel('Velocidad Media (m/s)')
axes[1,0].set_title('Velocidad vs Distancia (Top 20 conexiones)')

# 4. Boxplot de velocidades para top conexiones
top_connections = df_connections.head(12)
vel_data = [connection_velocities[(int(conn.split('-')[0]), int(conn.split('-')[1]))] 
            for conn in top_connections['connection']]
axes[1,1].boxplot(vel_data, labels=[f"{conn[:8]}..." for conn in top_connections['connection']])
axes[1,1].set_ylabel('Velocidad (m/s)')
axes[1,1].set_title('Distribución Velocidades por Conexión')
axes[1,1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

# Estadísticas de consistencia
print("=== ANÁLISIS DE CONSISTENCIA DE VELOCIDADES ===")
print(f"Conexiones con ≥10 mediciones: {len(df_connections)}")
print(f"CV promedio: {df_connections['cv_vel'].mean():.3f}")
print(f"Conexiones muy variables (CV>0.5): {sum(df_connections['cv_vel'] > 0.5)}")

print("\n=== TOP 10 CONEXIONES MÁS CONSISTENTES ===")
consistent = df_connections.nsmallest(10, 'cv_vel')[['connection', 'mean_vel', 'cv_vel', 'n_measurements']]
print(consistent.to_string(index=False))

print("\n=== TOP 10 CONEXIONES MÁS VARIABLES ===")
variable = df_connections.nlargest(10, 'cv_vel')[['connection', 'mean_vel', 'cv_vel', 'n_measurements']]
print(variable.to_string(index=False))

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from scipy.stats import pearsonr

# Extraer datos para correlaciones
velocities = []
distances = []
taus = []
qualities = []

for measurements in data.values():
    for measurement in measurements:
        if len(measurement) == 3:
            v, d, t = measurement[0], measurement[1], measurement[2]
            if not any(np.isnan([v, d, t])):
                velocities.append(v)
                distances.append(d)
                taus.append(t)

velocities = np.array(velocities)
distances = np.array(distances)
taus = np.array(taus)
qualities = np.array(qualities)

# Crear figura con subplots
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# 1. Velocidad vs Distancia
axes[0,0].scatter(distances, velocities, alpha=0.3, s=1)
# Ajuste lineal
slope_vd, intercept_vd, r_vd, p_vd, _ = stats.linregress(distances, velocities)
line_vd = slope_vd * distances + intercept_vd
axes[0,0].plot(distances, line_vd, 'r-', linewidth=2)
axes[0,0].set_xlabel('Val_1 (mm)')
axes[0,0].set_ylabel('Val_0 (m/s)')
axes[0,0].set_title(f'Val_0 vs Val_1\nr = {r_vd:.3f}, p < 0.001')
axes[0,0].grid(True, alpha=0.3)

# 2. Velocidad vs Tau
axes[0,1].scatter(taus, velocities, alpha=0.3, s=1)
slope_vt, intercept_vt, r_vt, p_vt, _ = stats.linregress(taus, velocities)
line_vt = slope_vt * taus + intercept_vt
axes[0,1].plot(taus, line_vt, 'r-', linewidth=2)
axes[0,1].set_xlabel('Val_2 (ms)')
axes[0,1].set_ylabel('Val_0 (m/s)')
axes[0,1].set_title(f'Val_0 vs Val_2\nr = {r_vt:.3f}, p < 0.001')
axes[0,1].grid(True, alpha=0.3)

# 3. Distribución de velocidades por rangos de calidad
quality_bins = [0, 0.3, 0.5, 0.7, 1.0]
quality_labels = ['<0.3', '0.3-0.5', '0.5-0.7', '0.7-1.0']
vel_by_quality = []

for i in range(len(quality_bins)-1):
    mask = (qualities >= quality_bins[i]) & (qualities < quality_bins[i+1])
    vel_by_quality.append(velocities[mask])

axes[1,0].boxplot(vel_by_quality, labels=quality_labels)
axes[1,0].set_xlabel('Calidad del Ajuste')
axes[1,0].set_ylabel('Val_0 (m/s)')
axes[1,0].set_title('Val_0 por Calidad del Modelo')
axes[1,0].grid(True, alpha=0.3)

# 4. Tau vs Distancia (verificación física)
axes[1,1].scatter(distances, taus, alpha=0.3, s=1)
slope_td, intercept_td, r_td, p_td, _ = stats.linregress(distances, taus)
line_td = slope_td * distances + intercept_td
axes[1,1].plot(distances, line_td, 'r-', linewidth=2)
axes[1,1].set_xlabel('Val_1 (mm)')
axes[1,1].set_ylabel('Val_2 (ms)')
axes[1,1].set_title(f'Val_1 vs Val_1\nr = {r_td:.3f}, p < 0.001')
axes[1,1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Estadísticas detalladas
print("=== ANÁLISIS DE CORRELACIONES ===")
print(f"Velocidad vs Distancia: r = {r_vd:.4f}, pendiente = {slope_vd:.4f} m/s/mm")
print(f"Velocidad vs Tau: r = {r_vt:.4f}, pendiente = {slope_vt:.4f} m/s/ms")
print(f"Tau vs Distancia: r = {r_td:.4f}, pendiente = {slope_td:.4f} ms/mm")

print(f"\n=== RANGOS DE VELOCIDADES ===")
print(f"Velocidades < 2 m/s: {sum(velocities < 2)/len(velocities)*100:.1f}%")
print(f"Velocidades 2-4 m/s: {sum((velocities >= 2) & (velocities <= 4))/len(velocities)*100:.1f}%")
print(f"Velocidades > 4 m/s: {sum(velocities > 4)/len(velocities)*100:.1f}%")
print(f"Velocidades > 6 m/s: {sum(velocities > 6)/len(velocities)*100:.1f}%")

# Velocidad promedio por distancia
print(f"\n=== VELOCIDAD MEDIA POR RANGO DE DISTANCIA ===")
dist_ranges = [(0, 0.05), (0.05, 0.1), (0.1, 0.2), (0.2, 0.5), (0.5, 1.0)]
for d_min, d_max in dist_ranges:
    mask = (distances >= d_min) & (distances < d_max)
    if sum(mask) > 0:
        mean_vel = velocities[mask].mean()
        print(f"{d_min:.2f}-{d_max:.2f} mm: {mean_vel:.2f} ± {velocities[mask].std():.2f} m/s (n={sum(mask)})")

# Correlaciones por calidad
# print(f"\n=== CORRELACIONES POR CALIDAD ===")
# high_quality = qualities > 0.5
# print(f"Alta calidad (>0.5): Vel-Dist r = {pearsonr(distances[high_quality], velocities[high_quality])[0]:.3f}")
# print(f"Baja calidad (≤0.5): Vel-Dist r = {pearsonr(distances[~high_quality], velocities[~high_quality])[0]:.3f}")

In [None]:
# Verificar si hay patrón hemisférico
left_ids = [id for id in unique_i if id < 80]  # ejemplo
right_ids = [id for id in unique_i if id >= 80]

# O verificar divisiones por rangos
print(f"IDs 3-81: {sum(1 for id in unique_i if 3 <= id <= 81)}")
print(f"IDs 82-155: {sum(1 for id in unique_i if 82 <= id <= 155)}")

In [None]:
# Con tus datos 'data' y unique_i
unique_i = np.unique(np.array(list(data.keys()))[:,0])

# Análisis hemisférico
left_ids = [id for id in unique_i if id < 80]
right_ids = [id for id in unique_i if id >= 80]

print(f"IDs < 80: {len(left_ids)}")
print(f"IDs >= 80: {len(right_ids)}")

# Rangos específicos  
ids_3_81 = sum(1 for id in unique_i if 3 <= id <= 81)
ids_82_155 = sum(1 for id in unique_i if 82 <= id <= 155)

print(f"IDs 3-81: {ids_3_81}")
print(f"IDs 82-155: {ids_82_155}")

# Buscar patrón de offset
for offset in [79, 78, 77]:
    pairs = [(id, id+offset) for id in unique_i if id+offset in unique_i]
    print(f"Parejas con offset +{offset}: {len(pairs)}")

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def analyze_data_structure(data):
    # Estadísticas de longitudes
    lengths = [len(v) for v in data.values()]
    
    print("DISTRIBUCIÓN DE MEDICIONES POR CONEXIÓN:")
    print(f"Min: {min(lengths)}, Max: {max(lengths)}, Media: {np.mean(lengths):.1f}")
    print(f"Conexiones vacías: {sum(1 for l in lengths if l == 0)}")
    print(f"Conexiones con datos: {sum(1 for l in lengths if l > 0)}")
    
    # Histograma de longitudes
    plt.figure(figsize=(10, 4))
    plt.subplot(1, 2, 1)
    plt.hist(lengths, bins=50, alpha=0.7)
    plt.xlabel('Número de mediciones')
    plt.ylabel('Frecuencia')
    plt.title('Distribución de mediciones por conexión')
    
    # Análisis de los 6 valores
    non_empty = [v for v in data.values() if len(v) > 0]
    sample_measurements = non_empty[0][:5]  # Primeras 5 mediciones
    
    print(f"\nANÁLISIS DE LOS 6 VALORES:")
    for i, measurement in enumerate(sample_measurements):
        print(f"Medición {i}: {[f'{val:.6f}' for val in measurement]}")
    
    # Estadísticas por cada columna de los 6 valores
    all_measurements = []
    for measurements in non_empty[:100]:  # Muestra de 100 conexiones
        all_measurements.extend(measurements)
    
    measurements_array = np.array(all_measurements)
    
    plt.subplot(1, 2, 2)
    plt.boxplot([measurements_array[:, i] for i in range(3)], 
                labels=[f'Valor_{i}' for i in range(3)])
    plt.yscale('log')
    plt.title('Distribución de los 6 valores')
    plt.xticks(rotation=45)
    
    plt.tight_layout()
    plt.show()
    
    print(f"\nESTADÍSTICAS DE LOS 6 VALORES (muestra):")
    for i in range(3):
        vals = measurements_array[:, i]
        print(f"Valor {i}: min={vals.min():.2e}, max={vals.max():.2e}, media={vals.mean():.2e}")

analyze_data_structure(data)

In [None]:
# Identificar ROIs más conectados y con mayor variabilidad
def find_rich_rois(matrices, roi_indices, roi_names):
    stats = []
    
    for i, roi_idx in enumerate(roi_indices):
        roi_name = roi_names.iloc[roi_idx]['roi_name'] if roi_idx < len(roi_names) else f"ROI_{roi_idx}"
        
        # Conexiones salientes
        out_connections = (~matrices['val1_mean'].iloc[i].isna()).sum()
        out_variability = matrices['val1_std'].iloc[i].mean()
        
        # Conexiones entrantes  
        in_connections = (~matrices['val1_mean'].iloc[:, i].isna()).sum()
        in_variability = matrices['val1_std'].iloc[:, i].mean()
        
        # Total mediciones
        total_measurements = matrices['n_measurements'].iloc[i].sum()
        
        stats.append({
            'roi_idx': roi_idx,
            'roi_name': roi_name,
            'out_conn': out_connections,
            'in_conn': in_connections,
            'total_conn': out_connections + in_connections,
            'out_var': out_variability,
            'total_meas': total_measurements,
            'richness': total_measurements * (out_connections + in_connections)
        })
    
    return pd.DataFrame(stats).sort_values('richness', ascending=False)

# Analizar ROIs más ricos
rich_rois = find_rich_rois(matrices, roi_indices, roi_names)
print("TOP 10 ROIs MÁS RICOS:")
print(rich_rois.head(10)[['roi_name', 'total_conn', 'total_meas', 'richness']])

# Seleccionar 2 ROIs para análisis detallado
top_rois = rich_rois.head(2)
print(f"\nSELECCIONADOS: {top_rois['roi_name'].tolist()}")

In [None]:
# Filtrar ROIs corticales
cortical_keywords = ['cortex', 'area', 'Primary', 'Secondary', 'Cingulate', 'motor', 'visual', 'somatosensory']
cortical_rois = rich_rois[rich_rois['roi_name'].str.contains('|'.join(cortical_keywords), case=False)]

print("TOP ROIs CORTICALES:")
print(cortical_rois.head(8)[['roi_name', 'total_conn', 'total_meas']])

# Analizar conexión específica entre dos ROIs corticales
roi1_name = "Primary somatosensory area"  
roi2_name = "Primary motor area"  # o el segundo más rico cortical

# Encontrar índices
roi1_idx = roi_indices[roi_names[roi_names['roi_name'] == roi1_name].index[0]]
roi2_idx = roi_indices[roi_names[roi_names['roi_name'] == roi2_name].index[0]]

print(f"\nConexión {roi1_name} ↔ {roi2_name}")

In [None]:
# Identificar conexiones cortico-corticales con mejor señal/ruido
cortical_rois = {
    8: 'Primary somatosensory area',
    54: 'Primary motor area', 
    10: 'Secondary visual area',
    3: 'Secondary auditory area',
    40: 'Postrhinal cortex',
    28: 'Cingulate area 2',
    66: 'Prelimbic area',
    67: 'Infralimbic area'
}

# Buscar conexiones cortico-corticales ricas
def find_cortical_connections(data, cortical_dict):
    cortical_connections = []
    
    for (i, j), measurements in data.items():
        if i in cortical_dict and j in cortical_dict and len(measurements) > 50:
            cortical_connections.append({
                'connection': f"{cortical_dict[i]} → {cortical_dict[j]}",
                'roi_pair': (i, j),
                'n_measurements': len(measurements),
                'val0_mean': np.mean([m[0] for m in measurements]),
                'val1_mean': np.mean([m[1] for m in measurements])
            })
    
    return sorted(cortical_connections, key=lambda x: x['n_measurements'], reverse=True)

# Analizar conexiones cortico-corticales
cortical_connections = find_cortical_connections(data, cortical_rois)

print("TOP CONEXIONES CORTICO-CORTICALES:")
for conn in cortical_connections[:8]:
    print(f"{conn['connection']}: {conn['n_measurements']} mediciones")

# Seleccionar conexión específica para análisis detallado
target_connection = cortical_connections[0] if cortical_connections else None
print(f"\nCONEXIÓN SELECCIONADA: {target_connection['connection']}")
print(f"Mediciones: {target_connection['n_measurements']}")

In [None]:
# Analizar conexión Secondary visual area → Cingulate area 2
roi_pair = (10, 28)  # Secondary visual area → Cingulate area 2
measurements = data[roi_pair]

print(f"Análisis: Secondary visual area → Cingulate area 2")
print(f"Total mediciones: {len(measurements)}")

# Extraer los 6 valores
values_array = np.array(measurements)
print(f"Shape: {values_array.shape}")

# Estadísticas por valor
for i in range(3):
    vals = values_array[:, i]
    print(f"Valor {i}: min={vals.min():.6f}, max={vals.max():.6f}, "
          f"mean={vals.mean():.6f}, std={vals.std():.6f}")

# Visualizar distribuciones
fig, axes = plt.subplots(1, 3, figsize=(12, 5))
axes = axes.flatten()

for i in range(3):
    axes[i].hist(values_array[:, i], bins=50, alpha=0.7)
    axes[i].set_title(f'Valor {i}')
    axes[i].set_xlabel('Valor')
    axes[i].set_ylabel('Frecuencia')

plt.tight_layout()
plt.show()

# Correlaciones entre valores
import pandas as pd
df_values = pd.DataFrame(values_array, columns=[f'Val_{i}' for i in range(3)])
corr_matrix = df_values.corr()
print("\nMatriz de correlación:")
print(corr_matrix)

In [None]:
# Analizar distribución de retrasos (Val_2)
tau_values = values_array[:, 2]

print(f"ANÁLISIS DE RETRASOS TEMPORALES (TAU):")
print(f"Media: {tau_values.mean():.3f}")
print(f"Mediana: {np.median(tau_values):.3f}")
print(f"Rango: {tau_values.min():.3f} - {tau_values.max():.3f}")

# Percentiles de interés
percentiles = [10, 25, 50, 75, 90, 95, 99]
for p in percentiles:
    print(f"P{p}: {np.percentile(tau_values, p):.3f}")

# Relación conectividad-retrasos
plt.figure(figsize=(10, 6))
plt.subplot(1, 2, 1)
plt.scatter(values_array[:, 0], tau_values, alpha=0.5, s=1)
plt.xlabel('Conectividad (Val_0)')
plt.ylabel('Tau (retrasos)')
plt.title('Conectividad vs Retrasos')

plt.subplot(1, 2, 2)
plt.hist(tau_values, bins=50, alpha=0.7)
plt.xlabel('Tau (retrasos)')
plt.ylabel('Frecuencia')
plt.title('Distribución de retrasos')
plt.tight_layout()
plt.show()

In [None]:
# Analizar distribuciones de retrasos en múltiples conexiones ricas
target_connections = [
    ((10, 28), "Secondary visual area → Cingulate area 2"),
    ((28, 67), "Cingulate area 2 → Infralimbic area"), 
    ((28, 54), "Cingulate area 2 → Primary motor area"),
    ((25, 30), "Hippocampus → Septal region"),
    ((48, 25), "Caudate putamen → Hippocampus"),
    ((8, 54), "Primary somatosensory → Primary motor"),
]

# Extraer retrasos por conexión
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()

stats_summary = []

for i, (roi_pair, name) in enumerate(target_connections):
    if roi_pair in data and len(data[roi_pair]) > 50:
        measurements = data[roi_pair]
        tau_values = np.array([m[2] for m in measurements])
        
        # Estadísticas
        stats = {
            'connection': name,
            'n_meas': len(measurements),
            'tau_mean': tau_values.mean(),
            'tau_median': np.median(tau_values),
            'tau_std': tau_values.std(),
            'tau_p95': np.percentile(tau_values, 95)
        }
        stats_summary.append(stats)
        
        # Plot
        axes[i].hist(tau_values, bins=40, alpha=0.7, density=True)
        axes[i].set_title(f"{name}\n(n={len(measurements)})")
        axes[i].set_xlabel('Tau (ms)')
        axes[i].axvline(tau_values.mean(), color='red', linestyle='--', alpha=0.8)

plt.tight_layout()
plt.show()

# Resumen estadístico
df_stats = pd.DataFrame(stats_summary)
print("Estadísticas de retrasos por conexión:")
print(df_stats[['connection', 'n_meas', 'tau_mean', 'tau_median', 'tau_std']].round(3))

In [None]:
# Comparar con conexiones subcorticales
subcortical_pairs = [
    ((25, 30), "Hippocampus → Septal region"),
    ((48, 25), "Caudate putamen → Hippocampus"),
    ((5, 48), "Substantia nigra → Caudate putamen")
]

print("Comparación cortico-cortical vs subcortical:")
# Código para analizar si subcorticales tienen diferentes rangos de tau

In [None]:
# ROIs corticales ricos según criterios combinados
top_cortical_rois = {
    8: 'Primary somatosensory area',
    10: 'Secondary visual area', 
    28: 'Cingulate area 2',
    54: 'Primary motor area',
    3: 'Secondary auditory area',
    40: 'Postrhinal cortex',
    66: 'Prelimbic area',
    67: 'Infralimbic area'
}

# Encontrar conexiones cortico-corticales ricas
def find_rich_cortical_connections(data, roi_dict, min_measurements=100):
    connections = []
    for (i, j), measurements in data.items():
        if (i in roi_dict and j in roi_dict and 
            len(measurements) >= min_measurements):
            tau_values = [m[2] for m in measurements]
            connections.append({
                'pair': (i, j),
                'name': f"{roi_dict[i]} → {roi_dict[j]}",
                'n_meas': len(measurements),
                'tau_mean': np.mean(tau_values),
                'tau_std': np.std(tau_values),
                'tau_values': tau_values
            })
    return sorted(connections, key=lambda x: x['n_meas'], reverse=True)

rich_connections = find_rich_cortical_connections(data, top_cortical_rois)

# Visualizar top 6 conexiones
fig, axes = plt.subplots(2, 3, figsize=(15, 8))
axes = axes.flatten()

for i, conn in enumerate(rich_connections[:6]):
    axes[i].hist(conn['tau_values'], bins=30, alpha=0.7, density=True)
    axes[i].set_title(f"{conn['name']}\nn={conn['n_meas']}")
    axes[i].set_xlabel('Tau (ms)')
    axes[i].axvline(conn['tau_mean'], color='red', linestyle='--')

plt.tight_layout()
plt.show()

# Resumen estadístico
print("Top conexiones cortico-corticales:")
for conn in rich_connections[:8]:
    print(f"{conn['name']}: {conn['n_meas']} meas, τ={conn['tau_mean']:.2f}±{conn['tau_std']:.2f} ms")

In [None]:
# Expandir análisis a más conexiones (reducir umbral)
rich_connections = find_rich_cortical_connections(data, top_cortical_rois, min_measurements=50)

# Visualizar top 9 conexiones
fig, axes = plt.subplots(3, 3, figsize=(15, 12))
axes = axes.flatten()

for i, conn in enumerate(rich_connections[:9]):
    axes[i].hist(conn['tau_values'], bins=25, alpha=0.7, density=True, color=f'C{i}')
    axes[i].set_title(f"{conn['name'][:25]}...\nn={conn['n_meas']}")
    axes[i].set_xlabel('Tau (ms)')
    axes[i].axvline(conn['tau_mean'], color='red', linestyle='--', alpha=0.8)

plt.tight_layout()
plt.show()

# Tabla completa
print("Todas las conexiones cortico-corticales ricas:")
for i, conn in enumerate(rich_connections[:12]):
    print(f"{i+1:2d}. {conn['name'][:50]:<50} {conn['n_meas']:4d} meas, τ={conn['tau_mean']:.2f}±{conn['tau_std']:.2f} ms")

In [None]:
# ROIs subcorticales/límbicos ricos
subcortical_rois = {
    48: 'Caudate putamen',
    25: 'Hippocampus', 
    24: 'Globus pallidus external',
    30: 'Septal region',
    5: 'Substantia nigra',
    20: 'RT',
    26: 'Subiculum',
    27: 'Nucleus accumbens'
}

# Conexiones subcorticales ricas
subcortical_connections = find_rich_cortical_connections(data, subcortical_rois, min_measurements=50)

fig, axes = plt.subplots(3, 3, figsize=(15, 12))
axes = axes.flatten()

for i, conn in enumerate(subcortical_connections[:9]):
    axes[i].hist(conn['tau_values'], bins=25, alpha=0.7, density=True, color=f'C{i}')
    axes[i].set_title(f"{conn['name'][:30]}\nn={conn['n_meas']}")
    axes[i].set_xlabel('Tau (ms)')
    axes[i].axvline(conn['tau_mean'], color='red', linestyle='--')

plt.tight_layout()
plt.show()

print("Conexiones subcorticales/límbicas:")
for conn in subcortical_connections[:8]:
    print(f"{conn['name']}: {conn['n_meas']} meas, τ={conn['tau_mean']:.2f}±{conn['tau_std']:.2f} ms")

In [None]:
# Análisis completo de distribuciones de retrasos
def comprehensive_delay_analysis(data, roi_names, min_measurements=20):
    all_connections = []
    
    for (i, j), measurements in data.items():
        if len(measurements) >= min_measurements:
            tau_values = np.array([m[2] for m in measurements])
            
            # Clasificar tipo conexión
            roi_i = roi_names.iloc[i]['roi_name'] if i < len(roi_names) else f"ROI_{i}"
            roi_j = roi_names.iloc[j]['roi_name'] if j < len(roi_names) else f"ROI_{j}"
            
            # Determinar tipo anatómico
            cortical_keywords = ['area', 'cortex', 'motor', 'visual', 'auditory', 'somatosensory']
            is_cortical_i = any(kw in roi_i.lower() for kw in cortical_keywords)
            is_cortical_j = any(kw in roi_j.lower() for kw in cortical_keywords)
            
            if is_cortical_i and is_cortical_j:
                conn_type = 'Cortico-cortical'
            elif not is_cortical_i and not is_cortical_j:
                conn_type = 'Subcortical'
            else:
                conn_type = 'Mixed'
            
            all_connections.append({
                'pair': (i, j),
                'type': conn_type,
                'n_meas': len(measurements),
                'tau_mean': tau_values.mean(),
                'tau_median': np.median(tau_values),
                'tau_std': tau_values.std(),
                'tau_min': tau_values.min(),
                'tau_max': tau_values.max(),
                'tau_skew': pd.Series(tau_values).skew(),
                'tau_p25': np.percentile(tau_values, 25),
                'tau_p75': np.percentile(tau_values, 75)
            })
    
    return pd.DataFrame(all_connections)

# Ejecutar análisis
df_analysis = comprehensive_delay_analysis(data, roi_names)

# Estadísticas por tipo de conexión
print("Retrasos por tipo de conexión:")
summary = df_analysis.groupby('type').agg({
    'n_meas': ['count', 'mean'],
    'tau_mean': ['mean', 'std'],
    'tau_std': 'mean'
}).round(3)
print(summary)

# Visualización comparativa
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for i, conn_type in enumerate(['Cortico-cortical', 'Subcortical', 'Mixed']):
    subset = df_analysis[df_analysis['type'] == conn_type]
    axes[i].hist(subset['tau_mean'], bins=20, alpha=0.7)
    axes[i].set_title(f'{conn_type}\n(n={len(subset)})')
    axes[i].set_xlabel('Tau medio (ms)')
    axes[i].axvline(subset['tau_mean'].mean(), color='red', linestyle='--')

plt.tight_layout()
plt.show()

print(f"\nTop 10 conexiones por número de mediciones:")
print(df_analysis.nlargest(10, 'n_meas')[['type', 'n_meas', 'tau_mean', 'tau_std']])

In [None]:
def clean_data(data, roi_names):
    cleaned_connections = {}
    cleaning_stats = {
        'original': len(data),
        'nan_removed': 0,
        'min_measurements': 0,
        'tau_outliers': 0,
        'quality_filtered': 0,
        'empty_connections': 0,
        'final': 0
    }
    
    for (i, j), measurements in data.items():
        if not measurements:
            cleaning_stats['empty_connections'] += 1
            continue
        # Convert to array for processing
        values_array = np.array(measurements)
        
        # 1. Remove NaN/inf
        valid_mask = np.all(np.isfinite(values_array), axis=1)
        if not valid_mask.any():
            cleaning_stats['nan_removed'] += 1
            continue
        values_clean = values_array[valid_mask]
        
        # 2. Minimum measurements threshold
        if len(values_clean) < 50:
            cleaning_stats['min_measurements'] += 1
            continue
            
        # 3. Tau outlier removal (2-10ms)
        tau_values = values_clean[:, 2]
        tau_mask = (tau_values >= 2.0) & (tau_values <= 10.0)
        if tau_mask.sum() < 20:  # Need minimum after tau filtering
            cleaning_stats['tau_outliers'] += 1
            continue
        values_filtered = values_clean[tau_mask]
        
        # 4. Quality control (Val_5 > 0.05)
        quality_values = values_filtered[:, 5]
        quality_threshold = np.percentile(quality_values, 10)
        quality_mask = quality_values > quality_threshold
        if quality_mask.sum() < 10:
            cleaning_stats['quality_filtered'] += 1
            continue
        values_final = values_filtered[quality_mask]
        
        cleaned_connections[(i, j)] = values_final.tolist()
        cleaning_stats['final'] += 1
    
    return cleaned_connections, cleaning_stats

# Apply cleaning
cleaned_data, stats = clean_data(data, roi_names)

print("Cleaning results:")
for step, count in stats.items():
    print(f"{step}: {count}")
print(f"Retention rate: {stats['final']/stats['original']:.1%}")

In [None]:
def select_strong_connections(cleaned_data):
   # Calculate mean connectivity metrics from cleaned data
   all_val0 = []
   all_val1 = []
   
   for measurements in cleaned_data.values():
       values_array = np.array(measurements)
       all_val0.extend(values_array[:, 0])
       all_val1.extend(values_array[:, 1])
   
   # Calculate sigma thresholds
   val0_mean, val0_std = np.mean(all_val0), np.std(all_val0)
   val1_mean, val1_std = np.mean(all_val1), np.std(all_val1)
   
   val0_threshold = val0_mean + val0_std  # μ + 1σ
   val1_threshold = val1_mean + val1_std
   
   print(f"Val0: μ={val0_mean:.6f}, σ={val0_std:.6f}, threshold={val0_threshold:.6f}")
   print(f"Val1: μ={val1_mean:.6f}, σ={val1_std:.6f}, threshold={val1_threshold:.6f}")
   
   # Filter strong connections
   strong_connections = {}
   for (i, j), measurements in cleaned_data.items():
       values_array = np.array(measurements)
       
       # Use mean connectivity per connection
       conn_val0_mean = np.mean(values_array[:, 0])
       conn_val1_mean = np.mean(values_array[:, 1])
       
       if conn_val0_mean > val0_threshold or conn_val1_mean > val1_threshold:
           strong_connections[(i, j)] = measurements
   
   print(f"\nStrong connections: {len(strong_connections)}/{len(cleaned_data)} ({len(strong_connections)/len(cleaned_data):.1%})")
   
   return strong_connections

# Apply strength filtering
strong_data = select_strong_connections(cleaned_data)

In [None]:
results