In [1]:
import os
import numpy as np
import pandas as pd
from typing import Dict, List, Tuple
import matplotlib.pyplot as plt

import neo
import quantities as pq
from elephant.statistics import instantaneous_rate
from elephant.kernels import GaussianKernel



In [2]:
# 配置参数
DATE_STR = "20240112"
SAMPLE_RATE = 30000.0  # Hz


# 文件路径
BASE_DIR = "/media/ubuntu/sda/Monkey/sorted_result_combined"
TRIGGER_DIR = "/media/ubuntu/sda/Monkey/trigger"

# 要合并的array数据
ARRAYS = [
    'Hub1-instance1_V1',
    'Hub2-instance1_V1'
]

print(f"处理日期: {DATE_STR}")
print(f"合并的arrays: {ARRAYS}")
print(f"采样率: {SAMPLE_RATE} Hz")


处理日期: 20240112
合并的arrays: ['Hub1-instance1_V1', 'Hub2-instance1_V1']
采样率: 30000.0 Hz


In [3]:
def load_combined_data(date_str: str, arrays: List[str]) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """加载合并的cluster和spike数据"""
    cluster_frames = []
    spike_frames = []
    
    for array_name in arrays:
        cluster_path = os.path.join(BASE_DIR, date_str, f"cluster_inf_{array_name}.csv")
        spike_path = os.path.join(BASE_DIR, date_str, f"spike_inf_{array_name}.csv")
        
        if os.path.exists(cluster_path) and os.path.exists(spike_path):
            print(f"加载 {array_name} 数据...")
            cluster_df = pd.read_csv(cluster_path)
            spike_df = pd.read_csv(spike_path)
            
            cluster_frames.append(cluster_df)
            spike_frames.append(spike_df)
            
            print(f"  - cluster数据: {len(cluster_df)} 个神经元")
            print(f"  - spike数据: {len(spike_df)} 个spikes")
        else:
            print(f"警告: {array_name} 数据文件不存在")
    
    if not cluster_frames or not spike_frames:
        raise ValueError("没有找到有效的数据文件")
    
    # 合并数据
    combined_cluster = pd.concat(cluster_frames, ignore_index=True)
    combined_spike = pd.concat(spike_frames, ignore_index=True)
    
    print(f"\n合并后数据:")
    print(f"- 总神经元数: {len(combined_cluster)}")
    print(f"- 总spikes数: {len(combined_spike)}")
    
    return combined_cluster, combined_spike

# 加载合并数据
cluster_df, spike_df = load_combined_data(DATE_STR, ARRAYS)


加载 Hub1-instance1_V1 数据...
  - cluster数据: 120 个神经元
  - spike数据: 13625231 个spikes
加载 Hub2-instance1_V1 数据...
  - cluster数据: 124 个神经元
  - spike数据: 16888023 个spikes

合并后数据:
- 总神经元数: 244
- 总spikes数: 30513254


In [4]:
def load_triggers_exclude_block1(date_str: str) -> pd.DataFrame:
    """加载trigger数据，排除Block1"""
    trigger_files = []
    
    # 获取所有相关的trigger文件（排除Block1）
    for block_num in [2, 3, 4]:  # 排除Block1
        trigger_file = os.path.join(TRIGGER_DIR, f"trigger_df_monkyF_{date_str}_B{block_num}_instance1.csv")
        if os.path.exists(trigger_file):
            trigger_files.append(trigger_file)
            print(f"找到trigger文件: Block{block_num}_instance1")
        else:
            print(f"警告: trigger文件不存在: {trigger_file}")
    
    if not trigger_files:
        raise ValueError("没有找到有效的trigger文件")
    
    # 加载并合并trigger数据
    trigger_frames = []
    for trigger_file in trigger_files:
        df = pd.read_csv(trigger_file)
        trigger_frames.append(df)
    
    combined_triggers = pd.concat(trigger_frames, ignore_index=True)
    
    # 数据清理
    required_cols = ['start_time', 'stop_time', 'train_image', 'test_image', 'image_rep_num', 'single_train_rep']
    for col in required_cols:
        if col not in combined_triggers.columns:
            raise ValueError(f"Trigger文件缺少列: {col}")
    
    # 仅保留有效图像试次
    if 'valid_image' in combined_triggers.columns:
        combined_triggers = combined_triggers[combined_triggers['valid_image'] == 1].copy()
    
    # 填充NaN值
    combined_triggers['train_image'] = combined_triggers['train_image'].fillna(0)
    combined_triggers['test_image'] = combined_triggers['test_image'].fillna(0)
    combined_triggers['image_rep_num'] = combined_triggers['image_rep_num'].fillna(0)
    combined_triggers['single_train_rep'] = combined_triggers['single_train_rep'].fillna(0)
    
    print(f"\n加载的trigger数据:")
    print(f"- 总试次数: {len(combined_triggers)}")
    print(f"- 时间范围: {combined_triggers['start_time'].min():.2f} - {combined_triggers['stop_time'].max():.2f} 秒")
    
    return combined_triggers

# 加载trigger数据（排除Block1）
triggers_df = load_triggers_exclude_block1(DATE_STR)


找到trigger文件: Block2_instance1
找到trigger文件: Block3_instance1
找到trigger文件: Block4_instance1

加载的trigger数据:
- 总试次数: 3422
- 时间范围: 8.64 - 1245.65 秒


In [5]:
def build_neuron_index_combined(cluster_df: pd.DataFrame) -> Tuple[pd.DataFrame, Dict[Tuple[str, int], int]]:
    """构建合并数据的神经元索引"""
    # 仅选择good neurons
    if 'group' in cluster_df.columns:
        cluster_df = cluster_df[cluster_df['group'] == 'good'].copy()
    
    # 检查必要的列
    if 'array' not in cluster_df.columns or 'cluster_id' not in cluster_df.columns:
        raise ValueError('cluster数据缺少array或cluster_id列')
    
    # 构建神经元索引
    neurons = cluster_df[['array', 'cluster_id']].copy()
    neurons['array'] = neurons['array'].astype(str)
    neurons['cluster_id'] = neurons['cluster_id'].astype(int)
    neurons = neurons.drop_duplicates().reset_index(drop=True)
    neurons['neuron_index'] = np.arange(len(neurons), dtype=int)
    
    # 创建映射字典
    mapping: Dict[Tuple[str, int], int] = {
        (row['array'], row['cluster_id']): int(row['neuron_index'])
        for _, row in neurons.iterrows()
    }
    
    print(f"构建神经元索引:")
    print(f"- 总神经元数: {len(neurons)}")
    print(f"- 各array的神经元数:")
    for array_name in neurons['array'].unique():
        count = len(neurons[neurons['array'] == array_name])
        print(f"  - {array_name}: {count}")
    
    return neurons, mapping

# 构建神经元索引
neuron_order, neuron_mapping = build_neuron_index_combined(cluster_df)


构建神经元索引:
- 总神经元数: 244
- 各array的神经元数:
  - Hub1_instance1: 120
  - Hub2_instance1: 124


In [6]:
def group_spike_times_combined(spike_df: pd.DataFrame, sample_rate: float) -> Dict[Tuple[str, int], np.ndarray]:
    """按神经元分组spike时间，转换为秒"""
    required_cols = ['array', 'cluster_id', 'time']
    for col in required_cols:
        if col not in spike_df.columns:
            raise ValueError(f'spike数据缺少列: {col}')
    
    s = spike_df[['array', 'cluster_id', 'time']].copy()
    s['array'] = s['array'].astype(str)
    s['cluster_id'] = s['cluster_id'].astype(int)
    
    # 转换为秒
    s['t_sec'] = s['time'].astype(float) / float(sample_rate)
    
    # 按神经元分组
    grouped: Dict[Tuple[str, int], np.ndarray] = {}
    for (arr, clu), g in s.groupby(['array', 'cluster_id']):
        grouped[(arr, int(clu))] = g['t_sec'].to_numpy()
    
    print(f"分组spike时间:")
    print(f"- 总神经元数: {len(grouped)}")
    print(f"- 总spike数: {sum(len(spikes) for spikes in grouped.values())}")
    
    return grouped

# 分组spike时间
spikes_by_neuron = group_spike_times_combined(spike_df, SAMPLE_RATE)


分组spike时间:
- 总神经元数: 244
- 总spike数: 30513254


In [7]:
def make_trigger_key(row: pd.Series) -> str:
    """为trigger行生成唯一键"""
    train_val = int(row['train_image']) if not pd.isna(row['train_image']) else 0
    test_val = int(row['test_image']) if not pd.isna(row['test_image']) else 0
    
    is_train = train_val != 0
    if is_train:
        image_id = train_val
        phase = 'train'
    else:
        image_id = test_val
        phase = 'test'
    
    rep_num = int(row['image_rep_num']) if not pd.isna(row['image_rep_num']) else 0
    single_rep = int(row['single_train_rep']) if not pd.isna(row['single_train_rep']) else 0
    
    key = f"{phase}_{image_id}_{rep_num}_{single_rep}"
    return key


In [8]:
def compute_firing_rate_matrix_for_window_instant(
    start_sec: float,
    stop_sec: float,
    neuron_order: pd.DataFrame,
    spike_times_by_neuron: Dict[Tuple[str, int], np.ndarray],
    sampling_period_sec: float,
    kernel_sigma_sec: float
) -> Tuple[np.ndarray, np.ndarray]:
    """计算瞬时发放率矩阵"""
    if neo is None or instantaneous_rate is None or GaussianKernel is None or pq is None:
        raise RuntimeError('需要安装neo和elephant库以使用instantaneous_rate计算发放率')
    
    duration = max(0.0, float(stop_sec) - float(start_sec))
    if duration <= 0:
        edges = np.array([start_sec, start_sec + sampling_period_sec], dtype=float)
        n_bins = 1
    else:
        n_bins = int(np.ceil(duration / sampling_period_sec))
        edges = start_sec + np.arange(n_bins + 1, dtype=float) * sampling_period_sec
        if edges[-1] < stop_sec:
            edges = np.append(edges, stop_sec)
            n_bins = len(edges) - 1
    
    n_neurons = len(neuron_order)
    fr = np.zeros((n_neurons, n_bins), dtype=float)
    
    kernel = GaussianKernel(sigma=kernel_sigma_sec * pq.s)
    sampling_period = sampling_period_sec * pq.s
    effective_duration = max(duration, sampling_period_sec)
    t_stop = effective_duration * pq.s
    
    # 对每个神经元计算瞬时发放率
    for _, row in neuron_order.iterrows():
        idx = int(row['neuron_index'])
        key = (row['array'], int(row['cluster_id']))
        t = spike_times_by_neuron.get(key)
        
        if t is None or t.size == 0:
            continue
        
        # 相对时间（秒）
        rel_t = t[(t >= start_sec) & (t < stop_sec)] - start_sec
        if rel_t.size == 0:
            continue
        
        st = neo.SpikeTrain(rel_t * pq.s, t_start=0 * pq.s, t_stop=t_stop)
        rates = instantaneous_rate(st, sampling_period=sampling_period, kernel=kernel)
        r = np.asarray(rates.magnitude).reshape(-1)  # Hz
        
        # 截断或填充到n_bins
        if r.size >= n_bins:
            fr[idx, :] = r[:n_bins]
        else:
            fr[idx, :r.size] = r
    
    return fr, edges


In [9]:
def build_firing_rate_matrices_combined(
    triggers_df: pd.DataFrame,
    neuron_order: pd.DataFrame,
    spikes_by_neuron: Dict[Tuple[str, int], np.ndarray],
    sampling_period_sec: float,
    kernel_sigma_sec: float
) -> Dict[str, Dict[str, object]]:
    """构建所有trigger窗口的发放率矩阵"""
    fr_dict: Dict[str, Dict[str, object]] = {}
    
    print(f"开始构建发放率矩阵，共 {len(triggers_df)} 个试次...")
    
    for idx, (_, row) in enumerate(triggers_df.iterrows()):
        if idx % 100 == 0:
            print(f"处理进度: {idx}/{len(triggers_df)} ({idx/len(triggers_df)*100:.1f}%)")
        
        start_sec = float(row['start_time'])
        stop_sec = float(row['stop_time'])
        key = make_trigger_key(row)
        
        try:
            fr_mat, edges = compute_firing_rate_matrix_for_window_instant(
                start_sec=start_sec,
                stop_sec=stop_sec,
                neuron_order=neuron_order,
                spike_times_by_neuron=spikes_by_neuron,
                sampling_period_sec=sampling_period_sec,
                kernel_sigma_sec=kernel_sigma_sec,
            )
            
            train_val = int(row['train_image']) if not pd.isna(row['train_image']) else 0
            test_val = int(row['test_image']) if not pd.isna(row['test_image']) else 0
            
            if train_val != 0:
                phase = 'train'
                image_id = train_val
            else:
                phase = 'test'
                image_id = test_val
            
            fr_dict[key] = {
                'phase': phase,
                'image_id': image_id,
                'firing_rate': fr_mat[:, :-1],
            }
            
        except Exception as e:
            print(f"警告: 处理试次 {key} 时出错: {e}")
            continue
    
    print(f"\n完成！成功构建了 {len(fr_dict)} 个发放率矩阵")
    return fr_dict

SAMPLING_PERIOD_SEC = 0.01  
KERNEL_SIGMA_SEC = 0.02  

firing_rate_dict = build_firing_rate_matrices_combined(
    triggers_df,
    neuron_order,
    spikes_by_neuron,
    SAMPLING_PERIOD_SEC,
    KERNEL_SIGMA_SEC
)


开始构建发放率矩阵，共 3422 个试次...
处理进度: 0/3422 (0.0%)
处理进度: 100/3422 (2.9%)
处理进度: 200/3422 (5.8%)
处理进度: 300/3422 (8.8%)
处理进度: 400/3422 (11.7%)
处理进度: 500/3422 (14.6%)
处理进度: 600/3422 (17.5%)
处理进度: 700/3422 (20.5%)
处理进度: 800/3422 (23.4%)
处理进度: 900/3422 (26.3%)
处理进度: 1000/3422 (29.2%)
处理进度: 1100/3422 (32.1%)
处理进度: 1200/3422 (35.1%)
处理进度: 1300/3422 (38.0%)
处理进度: 1400/3422 (40.9%)
处理进度: 1500/3422 (43.8%)
处理进度: 1600/3422 (46.8%)
处理进度: 1700/3422 (49.7%)
处理进度: 1800/3422 (52.6%)
处理进度: 1900/3422 (55.5%)
处理进度: 2000/3422 (58.4%)
处理进度: 2100/3422 (61.4%)
处理进度: 2200/3422 (64.3%)
处理进度: 2300/3422 (67.2%)
处理进度: 2400/3422 (70.1%)
处理进度: 2500/3422 (73.1%)
处理进度: 2600/3422 (76.0%)
处理进度: 2700/3422 (78.9%)
处理进度: 2800/3422 (81.8%)
处理进度: 2900/3422 (84.7%)
处理进度: 3000/3422 (87.7%)
处理进度: 3100/3422 (90.6%)
处理进度: 3200/3422 (93.5%)
处理进度: 3300/3422 (96.4%)
处理进度: 3400/3422 (99.4%)

完成！成功构建了 3422 个发放率矩阵


In [12]:
import pickle 
with open("firing_rate_summary_0112.pkl", 'wb') as f:
    pickle.dump(firing_rate_dict, f)

In [None]:
def save_firing_rate_dict(fr_dict: Dict[str, Dict[str, object]], output_dir: str, date_str: str):
    """保存发放率字典到文件"""
    os.makedirs(output_dir, exist_ok=True)
    
    # 保存为npz格式
    npz_path = os.path.join(output_dir, f"firing_rate_matrices_{date_str}_combined.npz")
    pack = {}
    
    for k, v in fr_dict.items():
        pack[f'{k}__firing_rate'] = v['firing_rate']
    
    if fr_dict:
        sample_data = list(fr_dict.values())[0]
        neuron_order = sample_data['neuron_order']
        pack['neuron_order_csv'] = neuron_order.to_csv(index=False).encode('utf-8')
    
    np.savez_compressed(npz_path, **pack)
    print(f"发放率矩阵已保存到: {npz_path}")
    
    # 保存汇总信息
    summary_path = os.path.join(output_dir, f"firing_rate_summary_{date_str}_combined.csv")
    summary_data = []
    
    for key, data in fr_dict.items():
        summary_data.append({
            'key': key,
            'phase': data['phase'],
            'image_id': data['image_id'],
            'image_rep_num': data['image_rep_num'],
            'single_train_rep': data['single_train_rep'],
            'start_time': data['start_time'],
            'stop_time': data['stop_time'],
            'duration': data['stop_time'] - data['start_time'],
            'n_neurons': data['firing_rate'].shape[0],
            'n_bins': data['firing_rate'].shape[1]
        })
    
    summary_df = pd.DataFrame(summary_data)
    summary_df.to_csv(summary_path, index=False)
    print(f"汇总信息已保存到: {summary_path}")
    
    return npz_path, summary_path

# 保存结果
output_dir = "/media/ubuntu/sda/Monkey/sorted_result_combined/firing_rate_matrices"
npz_path, summary_path = save_firing_rate_dict(firing_rate_dict, output_dir, DATE_STR)


发放率矩阵已保存到: /media/ubuntu/sda/Monkey/sorted_result_combined/firing_rate_matrices/firing_rate_matrices_20240112_combined.npz
汇总信息已保存到: /media/ubuntu/sda/Monkey/sorted_result_combined/firing_rate_matrices/firing_rate_summary_20240112_combined.csv


In [None]:
# 可视化示例
if firing_rate_dict:
    # 选择一个示例进行可视化
    sample_key = list(firing_rate_dict.keys())[0]
    sample_data = firing_rate_dict[sample_key]
    fr_matrix = sample_data['firing_rate']
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # 1. 整体发放率矩阵热图
    ax1 = axes[0, 0]
    im1 = ax1.imshow(fr_matrix, aspect='auto', cmap='viridis', interpolation='nearest')
    ax1.set_title(f'发放率矩阵热图\n{sample_key}')
    ax1.set_xlabel('时间bins')
    ax1.set_ylabel('神经元索引')
    plt.colorbar(im1, ax=ax1, label='发放率 (Hz)')
    
    # 2. 平均发放率随时间变化
    ax2 = axes[0, 1]
    mean_fr = np.mean(fr_matrix, axis=0)
    time_bins = np.arange(len(mean_fr)) * sample_data['bin_width_sec']
    ax2.plot(time_bins, mean_fr, 'b-', linewidth=2)
    ax2.set_title('平均发放率随时间变化')
    ax2.set_xlabel('时间 (秒)')
    ax2.set_ylabel('平均发放率 (Hz)')
    ax2.grid(True, alpha=0.3)
    
    # 3. 神经元平均发放率分布
    ax3 = axes[1, 0]
    neuron_mean_fr = np.mean(fr_matrix, axis=1)
    ax3.hist(neuron_mean_fr, bins=50, alpha=0.7, color='skyblue', edgecolor='black')
    ax3.set_title('神经元平均发放率分布')
    ax3.set_xlabel('平均发放率 (Hz)')
    ax3.set_ylabel('神经元数量')
    ax3.grid(True, alpha=0.3)
    
    # 4. 发放率统计
    ax4 = axes[1, 1]
    ax4.axis('off')
    stats_text = f"""
    统计信息:
    神经元数: {fr_matrix.shape[0]}
    时间bins: {fr_matrix.shape[1]}
    时间窗口: {sample_data['stop_time'] - sample_data['start_time']:.3f} 秒
    平均发放率: {np.mean(fr_matrix):.2f} Hz
    最大发放率: {np.max(fr_matrix):.2f} Hz
    最小发放率: {np.min(fr_matrix):.2f} Hz
    Phase: {sample_data['phase']}
    Image ID: {sample_data['image_id']}
    """
    ax4.text(0.1, 0.9, stats_text, transform=ax4.transAxes, fontsize=12,
             verticalalignment='top', fontfamily='monospace')
    
    plt.tight_layout()
    plt.show()
    
    print(f"\n可视化完成！示例数据来自试次: {sample_key}")
