In [1]:
import numpy as np
import scipy.io as sio
import spikeinterface.extractors as se
import spikeinterface as si
import spikeinterface.sorters as ss
from pathlib import Path
import matplotlib.pyplot as plt
import json


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 配置参数
monkey = 'monkeyF'
date = '20240115'  # 可以修改为其他日期
base_dir = '/media/ubuntu/sda/Monkey/TVSD'
output_base_dir = '/media/ubuntu/sda/Monkey/sorted_result_combined'
probe_file = '/media/ubuntu/sda/Monkey/scripts/probe_256.json'
mapping_file = '/media/ubuntu/sda/Monkey/TVSD/monkeyF/_logs/1024chns_mapping_20220105.mat'

print(f"配置参数:")
print(f"  猴子: {monkey}")
print(f"  日期: {date}")
print(f"  基础目录: {base_dir}")
print(f"  输出目录: {output_base_dir}")
print(f"  Probe文件: {probe_file}")
print(f"  映射文件: {mapping_file}")


配置参数:
  猴子: monkeyF
  日期: 20240115
  基础目录: /media/ubuntu/sda/Monkey/TVSD
  输出目录: /media/ubuntu/sda/Monkey/sorted_result_combined
  Probe文件: /media/ubuntu/sda/Monkey/scripts/probe_256.json
  映射文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/_logs/1024chns_mapping_20220105.mat


In [3]:
# 设置路径和加载配置文件
data_dir = Path(base_dir) / monkey / date
output_dir = Path(output_base_dir) / date
output_dir.mkdir(parents=True, exist_ok=True)

# 加载映射文件
mapping_data = sio.loadmat(mapping_file)
mapping = mapping_data['mapping'].flatten() - 1  # 转换为0-based索引

# 定义脑区映射
if monkey == 'monkeyN':
    rois = np.ones(1024)  # V1
    rois[512:768] = 2  # V4 (513-768)
    rois[768:1024] = 3  # IT (769-1024)
else:
    rois = np.ones(1024)  # V1
    rois[512:832] = 3  # IT (513-832)
    rois[832:1024] = 2  # V4 (833-1024)

# 加载probe配置
with open(probe_file, 'r') as f:
    probe_config = json.load(f)

print(f"数据目录: {data_dir}")
print(f"输出目录: {output_dir}")
print(f"映射数组形状: {mapping.shape}")
print(f"脑区数组形状: {rois.shape}")
print(f"Probe配置已加载")


数据目录: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115
输出目录: /media/ubuntu/sda/Monkey/sorted_result_combined/20240115
映射数组形状: (1024,)
脑区数组形状: (1024,)
Probe配置已加载


In [4]:
# 获取所有Block目录
block_dirs = sorted([d for d in data_dir.iterdir() if d.is_dir() and d.name.startswith('Block_')])
print(f"找到 {len(block_dirs)} 个Block: {[d.name for d in block_dirs]}")

# 定义Hub-instance组合
hub_instance_combinations = [
    ('Hub1', 'instance1'),
    ('Hub1', 'instance2'), 
    ('Hub2', 'instance1'),
    ('Hub2', 'instance2')
]

print(f"Hub-instance组合: {hub_instance_combinations}")

# 检查文件是否存在
for hub_name, instance_name in hub_instance_combinations:
    print(f"\n检查 {hub_name}-{instance_name}:")
    for block_dir in block_dirs:
        block_num = block_dir.name.split('_')[1]
        file_pattern = f"{hub_name}-{instance_name}_B{block_num.zfill(3)}.ns6"
        file_path = block_dir / file_pattern
        exists = "✓" if file_path.exists() else "✗"
        print(f"  {exists} {file_path}")


找到 5 个Block: ['Block_1', 'Block_2', 'Block_3', 'Block_4', 'Block_5']
Hub-instance组合: [('Hub1', 'instance1'), ('Hub1', 'instance2'), ('Hub2', 'instance1'), ('Hub2', 'instance2')]

检查 Hub1-instance1:
  ✓ /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_1/Hub1-instance1_B001.ns6
  ✓ /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_2/Hub1-instance1_B002.ns6
  ✓ /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_3/Hub1-instance1_B003.ns6
  ✓ /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_4/Hub1-instance1_B004.ns6
  ✓ /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_5/Hub1-instance1_B005.ns6

检查 Hub1-instance2:
  ✓ /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_1/Hub1-instance2_B001.ns6
  ✓ /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_2/Hub1-instance2_B002.ns6
  ✓ /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_3/Hub1-instance2_B003.ns6
  ✓ /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_4/Hub1-instance2_B004.ns6
  ✓ /media/ubuntu/sda/Monkey/TVSD/m

In [5]:
# 处理单个Hub-instance组合的函数
def process_hub_instance(hub_name, instance_name):
    """处理单个Hub-instance组合的数据"""
    print(f"\n{'='*60}")
    print(f"处理 {hub_name}-{instance_name}")
    print(f"{'='*60}")
    
    # 收集所有Block中该Hub-instance的文件
    recording_files = []
    for block_dir in block_dirs:
        block_num = block_dir.name.split('_')[1]
        file_pattern = f"{hub_name}-{instance_name}_B{block_num.zfill(3)}.ns6"
        file_path = block_dir / file_pattern
        
        if file_path.exists():
            recording_files.append(file_path)
            print(f"  找到文件: {file_path}")
        else:
            print(f"  警告: 未找到文件 {file_path}")
    
    if not recording_files:
        print(f"  跳过 {hub_name}-{instance_name}: 未找到任何文件")
        return None
    
    # 读取并合并所有Block的recording
    recordings = []
    for file_path in recording_files:
        print(f"  读取文件: {file_path}")
        recording = se.read_blackrock(file_path)
        
        # 处理多段数据
        if recording.get_num_segments() > 1:
            recording_list = []
            for i in range(recording.get_num_segments()):
                recording_list.append(recording.select_segments(i))
            recording = si.concatenate_recordings(recording_list)
        
        recordings.append(recording)
    
    # 合并所有recording
    print(f"  合并 {len(recordings)} 个recording...")
    combined_recording = si.concatenate_recordings(recordings)
    
    # 确定当前Hub-instance的脑区
    if hub_name == 'Hub1' and instance_name == 'instance1':
        file_start_idx = 0
    elif hub_name == 'Hub2' and instance_name == 'instance1':
        file_start_idx = 256
    elif hub_name == 'Hub1' and instance_name == 'instance2':
        file_start_idx = 512
    elif hub_name == 'Hub2' and instance_name == 'instance2':
        file_start_idx = 768
    
    # 确定主要脑区
    roi_counts = np.bincount(rois[file_start_idx:file_start_idx+256].astype(int))
    primary_roi = np.argmax(roi_counts)
    
    if primary_roi == 1:
        region_name = 'V1'
    elif primary_roi == 2:
        region_name = 'V4'
    else:
        region_name = 'IT'
    
    print(f"  主要脑区: {region_name}")
    print(f"  脑区分布: {roi_counts}")
    
    # 设置输出目录
    hub_instance_output_dir = output_dir / f"{hub_name}-{instance_name}_{region_name}"
    hub_instance_output_dir.mkdir(exist_ok=True)
    
    return combined_recording, region_name, hub_instance_output_dir

print("处理函数已定义")


处理函数已定义


In [None]:
from probeinterface import write_probeinterface, read_probeinterface
import sys
import spikeinterface as si
import matplotlib.pyplot as plt
import spikeinterface.extractors as se
import spikeinterface.preprocessing as spre
import spikeinterface.sorters as ss
import spikeinterface.qualitymetrics as sqm

for i in [1, 2, 3]:
    hub_name, instance_name = hub_instance_combinations[i]
    result = process_hub_instance(hub_name, instance_name)

    if result is not None:
        combined_recording, region_name, hub_instance_output_dir = result
        
        print(f"\nRecording信息:")
        print(f"  采样率: {combined_recording.get_sampling_frequency()} Hz")
        print(f"  通道数: {combined_recording.get_num_channels()}")
        print(f"  样本数: {combined_recording.get_num_samples()}")
        print(f"  持续时间: {combined_recording.get_num_samples() / combined_recording.get_sampling_frequency():.2f} 秒")
        print(f"  输出目录: {hub_instance_output_dir}")
    else:
        print("处理失败")

    



    probe_30channel = read_probeinterface('/media/ubuntu/sda/Monkey/scripts/probe_256.json')
    probe_30channel.set_global_device_channel_indices([i for i in range(256)])

    recording_recorded = combined_recording.set_probegroup(probe_30channel)

    recording_cmr = recording_recorded
    recording_f = spre.bandpass_filter(recording_recorded, freq_min=300, freq_max=3000)
    print(recording_f)
    recording_cmr = spre.common_reference(recording_f, reference="global", operator="median")
    print(recording_cmr)
    hub_instance_output_dir = str(hub_instance_output_dir)
    # this computes and saves the recording after applying the preprocessing chain
    recording_preprocessed = recording_cmr.save(format="binary")
    print(recording_preprocessed)
    sorting_kilosort4 = ss.run_sorter(sorter_name="kilosort4", recording=recording_preprocessed, folder=hub_instance_output_dir + "/kilosort4")
    analyzer_kilosort4 = si.create_sorting_analyzer(sorting=sorting_kilosort4, recording=recording_preprocessed, format='binary_folder', folder=hub_instance_output_dir + '/analyzer_kilosort4_binary')

    extensions_to_compute = [
                "random_spikes",
                "waveforms",
                "noise_levels",
                "templates",
                "spike_amplitudes",
                "unit_locations",
                "spike_locations",
                "correlograms",
                "template_similarity"
            ]

    extension_params = {
        "unit_locations": {"method": "center_of_mass"},
        "spike_locations": {"ms_before": 0.1},
        "correlograms": {"bin_ms": 0.1},
        "template_similarity": {"method": "cosine_similarity"}
    }

    analyzer_kilosort4.compute(extensions_to_compute, extension_params=extension_params)

    qm_params = sqm.get_default_qm_params()
    analyzer_kilosort4.compute("quality_metrics", qm_params)

    import spikeinterface.exporters as sexp
    sexp.export_to_phy(analyzer_kilosort4, hub_instance_output_dir + "/phy_folder_for_kilosort", verbose=True)


处理 Hub1-instance2
  找到文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_1/Hub1-instance2_B001.ns6
  找到文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_2/Hub1-instance2_B002.ns6
  找到文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_3/Hub1-instance2_B003.ns6
  找到文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_4/Hub1-instance2_B004.ns6
  找到文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_5/Hub1-instance2_B005.ns6
  读取文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_1/Hub1-instance2_B001.ns6
  读取文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_2/Hub1-instance2_B002.ns6
  读取文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_3/Hub1-instance2_B003.ns6
  读取文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_4/Hub1-instance2_B004.ns6
  读取文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_5/Hub1-instance2_B005.ns6
  合并 5 个recording...
  主要脑区: IT
  脑区分布: [  0   0   0 256]

Recording信息:
  采样率: 30000.0 Hz
  通道数: 256
  样本数: 187

write_binary_recording (no parallelization): 100%|██████████| 6257/6257 [23:56<00:00,  4.36it/s] 


BinaryFolderRecording: 256 channels - 30.0kHz - 1 segments - 187,701,229 samples 
                       6,256.71s (1.74 hours) - int16 dtype - 89.50 GiB


100%|██████████| 3129/3129 [22:03<00:00,  2.36it/s]  
100%|██████████| 3129/3129 [22:24<00:00,  2.33it/s]  
100%|██████████| 16/16 [2:31:51<00:00, 569.44s/it]  
100%|██████████| 3129/3129 [13:00<00:00,  4.01it/s]
100%|██████████| 16/16 [44:23<00:00, 166.48s/it]
estimate_sparsity (no parallelization): 100%|██████████| 6257/6257 [00:05<00:00, 1127.12it/s]
compute_waveforms (no parallelization): 100%|██████████| 6257/6257 [05:40<00:00, 18.36it/s]
noise_level (no parallelization): 100%|██████████| 20/20 [00:01<00:00, 13.84it/s]
Compute : spike_amplitudes + spike_locations (no parallelization): 100%|██████████| 6257/6257 [04:17<00:00, 24.26it/s]
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)
noise_level (no parallelization): 100%|██████████| 20/20 [00:00<00:00, 173.61it/s]
write_binary_recording (no parallelization): 100%|██████████| 6257/6257 [18:51<00:00,  5.53it/s]
Fitting PCA: 100%|██

Run:
phy template-gui  /media/ubuntu/sda/Monkey/sorted_result_combined/20240115/Hub1-instance2_IT/phy_folder_for_kilosort/params.py

处理 Hub2-instance1
  找到文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_1/Hub2-instance1_B001.ns6
  找到文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_2/Hub2-instance1_B002.ns6
  找到文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_3/Hub2-instance1_B003.ns6
  找到文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_4/Hub2-instance1_B004.ns6
  找到文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_5/Hub2-instance1_B005.ns6
  读取文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_1/Hub2-instance1_B001.ns6
  读取文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_2/Hub2-instance1_B002.ns6
  读取文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_3/Hub2-instance1_B003.ns6
  读取文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_4/Hub2-instance1_B004.ns6
  读取文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_5/Hub

write_binary_recording (no parallelization): 100%|██████████| 6262/6262 [29:03<00:00,  3.59it/s]


BinaryFolderRecording: 256 channels - 30.0kHz - 1 segments - 187,836,208 samples 
                       6,261.21s (1.74 hours) - int16 dtype - 89.57 GiB


100%|██████████| 3131/3131 [20:55<00:00,  2.49it/s]  
100%|██████████| 3131/3131 [19:56<00:00,  2.62it/s]  
100%|██████████| 16/16 [3:54:44<00:00, 880.25s/it]   
100%|██████████| 3131/3131 [15:48<00:00,  3.30it/s]
100%|██████████| 16/16 [1:33:44<00:00, 351.56s/it]
estimate_sparsity (no parallelization): 100%|██████████| 6262/6262 [00:07<00:00, 838.71it/s] 
compute_waveforms (no parallelization): 100%|██████████| 6262/6262 [05:38<00:00, 18.50it/s]
noise_level (no parallelization): 100%|██████████| 20/20 [00:01<00:00, 13.69it/s]
Compute : spike_amplitudes + spike_locations (no parallelization): 100%|██████████| 6262/6262 [03:41<00:00, 28.23it/s]
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)
noise_level (no parallelization): 100%|██████████| 20/20 [00:00<00:00, 189.79it/s]
write_binary_recording (no parallelization): 100%|██████████| 6262/6262 [19:00<00:00,  5.49it/s]
Fitting PCA: 100%

Run:
phy template-gui  /media/ubuntu/sda/Monkey/sorted_result_combined/20240115/Hub2-instance1_V1/phy_folder_for_kilosort/params.py

处理 Hub2-instance2
  找到文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_1/Hub2-instance2_B001.ns6
  找到文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_2/Hub2-instance2_B002.ns6
  找到文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_3/Hub2-instance2_B003.ns6
  找到文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_4/Hub2-instance2_B004.ns6
  找到文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_5/Hub2-instance2_B005.ns6
  读取文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_1/Hub2-instance2_B001.ns6
  读取文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_2/Hub2-instance2_B002.ns6
  读取文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_3/Hub2-instance2_B003.ns6
  读取文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_4/Hub2-instance2_B004.ns6
  读取文件: /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_5/Hub

write_binary_recording (no parallelization): 100%|██████████| 6257/6257 [28:51<00:00,  3.61it/s]


BinaryFolderRecording: 256 channels - 30.0kHz - 1 segments - 187,701,626 samples 
                       6,256.72s (1.74 hours) - int16 dtype - 89.50 GiB


100%|██████████| 3129/3129 [19:47<00:00,  2.63it/s]  
100%|██████████| 3129/3129 [19:55<00:00,  2.62it/s]  
100%|██████████| 16/16 [4:26:28<00:00, 999.27s/it]   
100%|██████████| 3129/3129 [18:37<00:00,  2.80it/s]
 25%|██▌       | 4/16 [20:09<1:02:51, 314.28s/it]