In [1]:
from pathlib import Path
from kilosort.io import load_ops
import sys
import spikeinterface as si
import matplotlib.pyplot as plt

import spikeinterface.extractors as se
import spikeinterface.preprocessing as spre
import spikeinterface.sorters as ss
import spikeinterface.widgets as sw
import spikeinterface.qualitymetrics as sqm
import json
import numpy as np
import pandas as pd
import seaborn as sns
import warnings
from kilosort import io
import os
warnings.filterwarnings('ignore')

global_job_kwargs = dict(n_jobs = 4)
si.set_global_job_kwargs(**global_job_kwargs)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
os.makedirs("/media/ubuntu/sda/data/filter_neuron/neuron_loss_seg/seg", exist_ok=True)

In [7]:
date_order = ['021322', '022522', '031722', '042422', 
              '052422', '062422', '072322', '082322', 
              '092422', '102122', '112022', '122022', 
              '012123', 
              '022223', '032123', '042323']

date_order_num = [int(i) for i in date_order]

In [5]:
trigger_time = pd.read_csv("/media/ubuntu/sda/data/filter_neuron/mouse_6/natural_image/trigger_time.csv")

In [4]:
def get_spike_inf(file_path):
    spike_clusters = pd.DataFrame(np.load(file_path + "/kilosort4/sorter_output/spike_clusters.npy").astype(str))
    spike_positions = pd.DataFrame(np.load(file_path + "/kilosort4/sorter_output/spike_positions.npy").astype(float))
    spike_templates = pd.DataFrame(np.load(file_path + "/kilosort4/sorter_output/spike_templates.npy"))
    spike_times = pd.DataFrame(np.load(file_path + "/kilosort4/sorter_output/spike_times.npy").astype(int))
    tf = pd.DataFrame(np.load(file_path + "/kilosort4/sorter_output/tF.npy")[:, 0, :])

    spike_inf = pd.concat((spike_clusters, spike_positions, spike_templates, spike_times, tf), axis=1)
    spike_inf.columns = ['cluster', 'position_1', 'position_2', 'templates', 'time', 'PC_1', 'PC_2', 'PC_3', 'PC_4', 'PC_5', 'PC_6']

    spike_inf = spike_inf[spike_inf['time'] > 200]
    spike_inf['date'] = date

    return spike_inf

all_spike_inf = pd.DataFrame()

for date in os.listdir("/media/ubuntu/sda/data/sort_output/mouse6/natural_image"):
    spike_inf = get_spike_inf(file_path=f"/media/ubuntu/sda/data/sort_output/mouse6/natural_image/{date}")
    all_spike_inf = pd.concat([all_spike_inf, spike_inf], ignore_index=True)

In [8]:
all_cluster_inf = pd.DataFrame()
for i in range(2, len(date_order) + 1):
    temp = pd.read_csv(f"/media/ubuntu/sda/data/filter_neuron/neuron_loss/cluster_inf/mouse6_processed/cluster_inf_{i}.tsv", sep = '\t')
    all_cluster_inf = pd.concat((all_cluster_inf, temp), axis=0)

In [24]:
all_cluster_inf = all_cluster_inf.drop_duplicates(subset=['cluster_date'], keep='first')

In [9]:
all_spike_inf['cluster_date'] = all_spike_inf['date']  + "_" +  all_spike_inf['cluster']

In [27]:
all_spike_inf = all_spike_inf[all_spike_inf['cluster_date'].isin(all_cluster_inf['cluster_date'].values)]

In [33]:
all_spike_inf['Neuron'] = None
for i in range(len(all_cluster_inf)):
    all_spike_inf.loc[all_spike_inf['cluster_date'] == all_cluster_inf.iloc[i, 0], "Neuron"] = all_cluster_inf.iloc[i, 28]

In [50]:
import numpy as np
import pandas as pd
import quantities as pq
from neo import SpikeTrain
def generate_binned_spiketrains(trigger_time_df, spike_inf_df, target_image, all_neuron_ids):
    """
    生成指定image下的分箱脉冲矩阵
    
    参数
    ----
    trigger_time_df : pd.DataFrame
        列包括：start, end, image, date, order
    spike_inf_df : pd.DataFrame
        列包括：time, neuron, date
    target_image : str/int
        目标图像标识
    
    返回
    ----
    binned_data : list of ndarray
        [
            # Trial 1 的矩阵 (neurons × 100 bins)
            array([[n0_bin1_count, n0_bin2_count, ...],
                   [n1_bin1_count, n1_bin2_count, ...],
                   ...]),
            # Trial 2
            ...
        ]
    """
    # =====================================
    # 步骤 1: 筛选目标试次并转换时间单位
    # =====================================
    mask = (trigger_time_df['image'] == target_image)
    target_triggers = trigger_time_df[mask].sort_values('order')
    
    # 转换时间单位 (0.1ms → 秒)
    target_triggers = target_triggers.copy()
    target_triggers['start'] = target_triggers['start'] * 0.1e-3
    target_triggers['end'] = target_triggers['end'] * 0.1e-3

    # =====================================
    # 步骤 2: 处理神经脉冲数据
    # =====================================
    target_spikes = spike_inf_df.copy()
    target_spikes['time'] = target_spikes['time'] * 0.1e-3 

    # =====================================
    # 步骤 3: 分箱处理每个试次
    # =====================================
    binned_data = []
    for _, trial in target_triggers.iterrows():
        trial_start = trial['start']
        trial_end = trial['end']
        trial_duration = trial_end - trial_start
        
        spike_mask = (target_spikes['time'] >= trial_start) & (target_spikes['time'] < trial_end)
        trial_spikes = target_spikes[spike_mask].copy()
        trial_spikes['rel_time'] = trial_spikes['time'] - trial_start
        
        bin_matrix = np.zeros((len(all_neuron_ids), 40), dtype=int)
        
        neuron_groups = trial_spikes.groupby('Neuron')
        for neuron_idx, neuron_id in enumerate(all_neuron_ids):
            if neuron_id in neuron_groups.groups:
                group = neuron_groups.get_group(neuron_id)
                times = group['rel_time'].values
                
                counts, _ = np.histogram(times, bins=40, range=(0, trial_duration))
                bin_matrix[neuron_idx] = counts
                
        binned_data.append(bin_matrix)
    
    return binned_data

In [54]:
for date in date_order:
    for image in range(1, 118):
        num = 1
        all_neuron_ids = sorted(all_spike_inf['Neuron'].unique())
        temp = trigger_time[trigger_time['date'] == int(date)]
        temp_spike = all_spike_inf[all_spike_inf['date'] == date]
        spike_train = generate_binned_spiketrains(temp, temp_spike, image, all_neuron_ids)
        for i in range(len(spike_train)):
            np.save(f'/media/ubuntu/sda/data/filter_neuron/neuron_loss_seg/seg/mouse6/{date}_{image}_{num}.npy', spike_train[i])
            num += 1

In [135]:
trigger_time

Unnamed: 0,start,end,date,image,order
0,122828,127828,22223,27,27_1
1,142768,147768,22223,42,42_1
2,162720,167720,22223,5,5_1
3,182675,187675,22223,101,101_1
4,202638,207638,22223,84,84_1
...,...,...,...,...,...
17919,21812622,21817622,102122,83,83_9
17920,21832633,21837633,102122,17,17_9
17921,21852631,21857631,102122,11,11_9
17922,21872602,21877602,102122,41,41_9


In [124]:
a = pd.crosstab(all_cluster_inf['Neuron'], all_cluster_inf['date'])
a[a>1] = 1
a

date,12123,21322,22223,22522,31722,32123,42323,42422,52422,62422,72322,82322,92422,102122,112022,122022
Neuron,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Neuron_1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Neuron_10,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Neuron_11,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Neuron_12,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Neuron_13,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Neuron_14,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0
Neuron_15,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Neuron_16,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Neuron_17,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Neuron_18,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1


In [115]:
consistent_neuron = ['Neuron_1', "Neuron_10", 'Neuron_11', 'Neuron_12', 'Neuron_13', 'Neuron_15', 'Neuron_16', 'Neuron_17',
                     'Neuron_18', 'Neuron_19', 'Neuron_2', 'Neuron_3', 'Neuron_4', 'Neuron_5', 'Neuron_6', 'Neuron_7', 'Neuron_8',
                     'Neuron_9']
len(consistent_neuron)

18

In [125]:
a = pd.read_csv("/media/ubuntu/sda/data/filter_neuron/mouse_6/natural_image/cluster_inf.tsv", sep = '\t')