In [1]:
import sys
import spikeinterface as si
import matplotlib.pyplot as plt
import spikeinterface.extractors as se
import spikeinterface.preprocessing as spre
import spikeinterface.sorters as ss
import spikeinterface.widgets as sw
import spikeinterface.qualitymetrics as sqm
import json
import probeinterface


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import numpy as np
import scipy.io as sio
import spikeinterface.extractors as se
import spikeinterface as si
from pathlib import Path
import matplotlib.pyplot as plt
import os
date = 20240115
block = 3
monkey = 'monkeyF'
datadir_gen = f'/media/ubuntu/sda/Monkey/sorted_result/{date}/Block_{block}/'
mapping_file = f'/media/ubuntu/sda/Monkey/TVSD/monkeyF/_logs/1024chns_mapping_20220105.mat'

os.makedirs(f'/media/ubuntu/sda/Monkey/sorted_result/{date}/Block_{block}/', exist_ok=True)
os.makedirs(f'/media/ubuntu/sda/Monkey/sorted_result/{date}/Block_{block}/processed_data', exist_ok=True)

file_list = [
    f'Hub1-instance1_B00{block}.ns6',
    f'Hub1-instance2_B00{block}.ns6',
    f'Hub2-instance1_B00{block}.ns6',
    f'Hub2-instance2_B00{block}.ns6'
]

# 加载映射文件
mapping_data = sio.loadmat(mapping_file)
mapping = mapping_data['mapping'].flatten() - 1  # 转换为0-based索引

# 定义脑区映射
if monkey == 'monkeyN':
    rois = np.ones(1024)  # V1
    rois[512:768] = 2  # V4 (513-768)
    rois[768:1024] = 3  # IT (769-1024)
else:
    rois = np.ones(1024)  # V1
    rois[512:832] = 3  # IT (513-832)
    rois[832:1024] = 2  # V4 (833-1024)

output_dir = Path(datadir_gen) / 'processed_data'
output_dir.mkdir(exist_ok=True)

# 设置块大小（根据可用内存调整）
chunk_size = 500000  # 每次处理的样本数

# 处理每个文件
for file_idx, file_name in enumerate(file_list):
    file_path = f'/media/ubuntu/sda/Monkey/TVSD/monkeyF/{date}/Block_{block}/{file_name}'
    print(f'Processing {file_path}')
    
    # 读取文件
    recording = se.read_blackrock(file_path)
    
    # 处理多段数据
    if recording.get_num_segments() > 1:
        recording_list = []
        for i in range(recording.get_num_segments()):
            recording_list.append(recording.select_segments(i))
        recording = si.concatenate_recordings(recording_list)
    
    # 获取采样率和样本数
    sample_rate = recording.get_sampling_frequency()
    n_samples = recording.get_num_samples()
    
    # 获取通道ID列表（字符串类型）
    channel_ids = np.array([str(i) for i in range(1, 257)])
    
    # 确定当前文件在映射中的位置
    if 'Hub1-instance1' in file_name:
        file_start_idx = 0
    elif 'Hub2-instance1' in file_name:
        file_start_idx = 256
    elif 'Hub1-instance2' in file_name:
        file_start_idx = 512
    elif 'Hub2-instance2' in file_name:
        file_start_idx = 768
    else:
        raise ValueError(f'Unknown file type: {file_name}')
    
    # 创建文件输出目录
    file_output_dir = output_dir / file_name.replace('.ns6', '')
    file_output_dir.mkdir(exist_ok=True)
    
    # 为每个阵列创建内存映射文件（每个文件有4个阵列）
    array_files = []
    array_info = []
    
    # 每个文件有256个通道，分成4组，每组64个通道
    for array_idx in range(4):
        # 确定阵列的主要脑区
        start_chan = file_start_idx + array_idx * 64
        end_chan = start_chan + 64
        
        array_roi_counts = np.bincount(rois[start_chan:end_chan].astype(int))
        primary_roi = np.argmax(array_roi_counts)
        
        if primary_roi == 1:
            roi_name = 'V1'
        elif primary_roi == 2:
            roi_name = 'V4'
        else:
            roi_name = 'IT'
        
        output_file = file_output_dir / f'array_{file_start_idx//64 + array_idx + 1:02d}_{roi_name}.npy'
        
        # 创建内存映射文件
        mmap_array = np.lib.format.open_memmap(
            output_file, mode='w+', dtype=np.float32, shape=(64, n_samples)
        )
        array_files.append(mmap_array)
        array_info.append({'roi_name': roi_name, 'output_file': output_file})
    
    # 分块处理数据
    for start in range(0, n_samples, chunk_size):
        end = min(start + chunk_size, n_samples)
        chunk_size_actual = end - start
        
        print(f'Processing chunk {start}-{end} ({chunk_size_actual} samples)')
        
        # 获取当前块的数据
        chunk_data = recording.get_traces(start_frame=start, end_frame=end)
        
        # 处理当前文件的每个通道
        for i in range(256):
            # 使用正确的通道ID获取数据
            channel_id = str(i + 1)  # 转换为字符串，因为Recording使用字符串ID
            channel_idx_in_recording = np.where(channel_ids == channel_id)[0][0]
            
            # 确定通道属于哪个阵列（在当前文件的4个阵列中）
            array_idx = i // 64
            channel_in_array = i % 64
            
            # 将数据写入对应阵列的内存映射文件
            array_files[array_idx][channel_in_array, start:end] = chunk_data[:, channel_idx_in_recording]
    
    # 保存并关闭内存映射文件
    for array_idx, mmap_array in enumerate(array_files):
        mmap_array.flush()
        del mmap_array  # 释放内存映射
        print(f'Saved {array_info[array_idx]["output_file"]}')
    
    print(f'Completed processing {file_name}')

print('All files processed successfully!')

Processing /media/ubuntu/sda/Monkey/TVSD/monkeyF/20240115/Block_3/Hub1-instance1_B003.ns6
Processing chunk 0-500000 (500000 samples)
Processing chunk 500000-1000000 (500000 samples)
Processing chunk 1000000-1500000 (500000 samples)
Processing chunk 1500000-2000000 (500000 samples)
Processing chunk 2000000-2500000 (500000 samples)
Processing chunk 2500000-3000000 (500000 samples)
Processing chunk 3000000-3500000 (500000 samples)
Processing chunk 3500000-4000000 (500000 samples)
Processing chunk 4000000-4500000 (500000 samples)
Processing chunk 4500000-5000000 (500000 samples)
Processing chunk 5000000-5500000 (500000 samples)
Processing chunk 5500000-6000000 (500000 samples)
Processing chunk 6000000-6500000 (500000 samples)
Processing chunk 6500000-7000000 (500000 samples)
Processing chunk 7000000-7500000 (500000 samples)
Processing chunk 7500000-8000000 (500000 samples)
Processing chunk 8000000-8500000 (500000 samples)
Processing chunk 8500000-9000000 (500000 samples)
Processing chunk 9

In [3]:
import os
date = 20240115
block = 3
for file in os.listdir(f"/media/ubuntu/sda/Monkey/sorted_result/{date}/Block_{block}/processed_data"):
    for array in os.listdir(f"/media/ubuntu/sda/Monkey/sorted_result/{date}/Block_{block}/processed_data/{file}"):
        recording = np.load(f"/media/ubuntu/sda/Monkey/sorted_result/{date}/Block_{block}/processed_data/{file}/{array}")
        recording = se.NumpyRecording(recording.T, sampling_frequency=30000)
        from probeinterface import write_probeinterface, read_probeinterface

        probe_30channel = read_probeinterface('/media/ubuntu/sda/Monkey/scripts/probe.json')
        probe_30channel.set_global_device_channel_indices([i for i in range(64)])
        recording_recorded = recording.set_probegroup(probe_30channel)

        recording_cmr = recording_recorded
        recording_f = spre.bandpass_filter(recording_recorded, freq_min=300, freq_max=3000)
        print(recording_f)
        recording_cmr = spre.common_reference(recording_f, reference="global", operator="median")
        print(recording_cmr)

        # this computes and saves the recording after applying the preprocessing chain
        recording_preprocessed = recording_cmr.save(format="binary")
        print(recording_preprocessed)
        os.makedirs(f"/media/ubuntu/sda/Monkey/sorted_result/{date}/Block_{block}/sort", exist_ok=True)

        os.makedirs(f"/media/ubuntu/sda/Monkey/sorted_result/{date}/Block_{block}/sort/{array}", exist_ok=True)
        output_folder = f"/media/ubuntu/sda/Monkey/sorted_result/{date}/Block_{block}/sort/{array}"
        sorting_kilosort4 = ss.run_sorter(sorter_name="kilosort4", recording=recording_preprocessed, output_folder=output_folder + "/kilosort4")
        analyzer_kilosort4 = si.create_sorting_analyzer(sorting=sorting_kilosort4, recording=recording_preprocessed, format='binary_folder', folder=output_folder + '/analyzer_kilosort4_binary')

        extensions_to_compute = [
                    "random_spikes",
                    "waveforms",
                    "noise_levels",
                    "templates",
                    "spike_amplitudes",
                    "unit_locations",
                    "spike_locations",
                    "correlograms",
                    "template_similarity"
                ]

        extension_params = {
            "unit_locations": {"method": "center_of_mass"},
            "spike_locations": {"ms_before": 0.1},
            "correlograms": {"bin_ms": 0.1},
            "template_similarity": {"method": "cosine_similarity"}
        }

        analyzer_kilosort4.compute(extensions_to_compute, extension_params=extension_params)

        qm_params = sqm.get_default_qm_params()
        analyzer_kilosort4.compute("quality_metrics", qm_params)

        import spikeinterface.exporters as sexp
        sexp.export_to_phy(analyzer_kilosort4, output_folder + "/phy_folder_for_kilosort", verbose=True)



BandpassFilterRecording: 64 channels - 30.0kHz - 1 segments - 36,578,160 samples 
                         1,219.27s (20.32 minutes) - float32 dtype - 8.72 GiB
CommonReferenceRecording: 64 channels - 30.0kHz - 1 segments - 36,578,160 samples 
                          1,219.27s (20.32 minutes) - float32 dtype - 8.72 GiB
Use cache_folder=/tmp/spikeinterface_cache/tmpwce3q708/014PDXYL
write_binary_recording 
engine=process - n_jobs=1 - samples_per_chunk=30,000 - chunk_memory=7.32 MiB - total_memory=7.32 MiB - chunk_duration=1.00s


write_binary_recording (no parallelization): 100%|██████████| 1220/1220 [01:05<00:00, 18.55it/s]
  sorting_kilosort4 = ss.run_sorter(sorter_name="kilosort4", recording=recording_preprocessed, output_folder=output_folder + "/kilosort4")


BinaryFolderRecording: 64 channels - 30.0kHz - 1 segments - 36,578,160 samples 
                       1,219.27s (20.32 minutes) - float32 dtype - 8.72 GiB


ValueError: Folder /media/ubuntu/sda/Monkey/sorted_result/20240115/Block_3/sort/array_09_IT.npy/kilosort4 already exists

In [None]:
recording = np.load("/media/ubuntu/sda/Monkey/TVSD/monkeyF/20240112/Block_1/processed_data/Hub1-instance1_B001/array_01_V1.npy")

In [None]:
recording = se.NumpyRecording(recording.T, sampling_frequency=30000)

In [None]:
from probeinterface import write_probeinterface, read_probeinterface

probe_30channel = read_probeinterface('/media/ubuntu/sda/Monkey/probe.json')
probe_30channel.set_global_device_channel_indices([i for i in range(64)])
recording_recorded = recording.set_probegroup(probe_30channel)



In [None]:
recording_cmr = recording_recorded
recording_f = spre.bandpass_filter(recording_recorded, freq_min=300, freq_max=3000)
print(recording_f)
recording_cmr = spre.common_reference(recording_f, reference="global", operator="median")
print(recording_cmr)

# this computes and saves the recording after applying the preprocessing chain
recording_preprocessed = recording_cmr.save(format="binary")
print(recording_preprocessed)
output_folder = '/media/ubuntu/sda/Monkey/test'
sorting_kilosort4 = ss.run_sorter(sorter_name="kilosort4", recording=recording_preprocessed, output_folder=output_folder + "/kilosort4")
analyzer_kilosort4 = si.create_sorting_analyzer(sorting=sorting_kilosort4, recording=recording_preprocessed, format='binary_folder', folder=output_folder + '/analyzer_kilosort4_binary')




BandpassFilterRecording: 64 channels - 30.0kHz - 1 segments - 7,031,518 samples 
                         234.38s (3.91 minutes) - float32 dtype - 1.68 GiB
CommonReferenceRecording: 64 channels - 30.0kHz - 1 segments - 7,031,518 samples 
                          234.38s (3.91 minutes) - float32 dtype - 1.68 GiB
Use cache_folder=/tmp/spikeinterface_cache/tmpq3oq4adi/UC8PBUQH
write_binary_recording 
engine=process - n_jobs=1 - samples_per_chunk=30,000 - chunk_memory=7.32 MiB - total_memory=7.32 MiB - chunk_duration=1.00s


write_binary_recording (no parallelization): 100%|██████████| 235/235 [00:11<00:00, 21.15it/s]
  sorting_kilosort4 = ss.run_sorter(sorter_name="kilosort4", recording=recording_preprocessed, output_folder=output_folder + "/kilosort4")


BinaryFolderRecording: 64 channels - 30.0kHz - 1 segments - 7,031,518 samples 
                       234.38s (3.91 minutes) - float32 dtype - 1.68 GiB


100%|██████████| 118/118 [00:09<00:00, 11.83it/s]
100%|██████████| 8/8 [00:21<00:00,  2.66s/it]
100%|██████████| 118/118 [00:03<00:00, 31.27it/s]
100%|██████████| 8/8 [00:06<00:00,  1.21it/s]
estimate_sparsity (no parallelization): 100%|██████████| 235/235 [00:00<00:00, 5650.40it/s]


In [None]:
extensions_to_compute = [
            "random_spikes",
            "waveforms",
            "noise_levels",
            "templates",
            "spike_amplitudes",
            "unit_locations",
            "spike_locations",
            "correlograms",
            "template_similarity"
        ]

extension_params = {
    "unit_locations": {"method": "center_of_mass"},
    "spike_locations": {"ms_before": 0.1},
    "correlograms": {"bin_ms": 0.1},
    "template_similarity": {"method": "cosine_similarity"}
}

analyzer_kilosort4.compute(extensions_to_compute, extension_params=extension_params)

qm_params = sqm.get_default_qm_params()
analyzer_kilosort4.compute("quality_metrics", qm_params)

import spikeinterface.exporters as sexp
sexp.export_to_phy(analyzer_kilosort4, output_folder + "/phy_folder_for_kilosort", verbose=True)


compute_waveforms (no parallelization): 100%|██████████| 235/235 [00:10<00:00, 22.62it/s]
noise_level (no parallelization): 100%|██████████| 20/20 [00:00<00:00, 50.80it/s]
Compute : spike_amplitudes + spike_locations (no parallelization): 100%|██████████| 235/235 [00:01<00:00, 231.21it/s]
noise_level (no parallelization): 100%|██████████| 20/20 [00:00<00:00, 884.63it/s]
write_binary_recording (no parallelization): 100%|██████████| 235/235 [00:24<00:00,  9.40it/s]
Fitting PCA: 100%|██████████| 64/64 [00:12<00:00,  5.21it/s]
Projecting waveforms: 100%|██████████| 64/64 [00:00<00:00, 2211.66it/s]
extract PCs (no parallelization): 100%|██████████| 235/235 [00:16<00:00, 13.94it/s]

Run:
phy template-gui  /media/ubuntu/sda/Monkey/test/phy_folder_for_kilosort/params.py



