### Want to generate a table with each region, each subject, number of units recorded and analyzed
- Cleaning up of: /src/wcst_decode/notebooks/20250528_blanche_sam_unit_regions.ipynb
- only include units from sessions actually analyzed
- don't include drifting units

In [2]:
%load_ext autoreload
%autoreload 2

import os
import numpy as np
import pandas as pd
import utils.behavioral_utils as behavioral_utils
import utils.information_utils as information_utils
import utils.visualization_utils as visualization_utils
import utils.pseudo_classifier_utils as pseudo_classifier_utils
import utils.classifier_utils as classifier_utils

import utils.io_utils as io_utils

import utils.glm_utils as glm_utils
from matplotlib import pyplot as plt
import matplotlib
import utils.spike_utils as spike_utils
import utils.subspace_utils as subspace_utils
from trial_splitters.condition_trial_splitter import ConditionTrialSplitter 
from utils.session_data import SessionData
from constants.behavioral_constants import *
from constants.decoding_constants import *
import seaborn as sns
from scripts.pseudo_decoding.belief_partitions.belief_partition_configs import *
import scripts.pseudo_decoding.belief_partitions.belief_partitions_io as belief_partitions_io

import scipy
import argparse
import copy
import plotly.express as px
from scripts.anova_analysis.anova_configs import *



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### load units analyzed

In [3]:
def load_units_for_sub(subject, path=UNITS_PATH):
    units = pd.read_pickle(path.format(sub=subject))

    drift_units = pd.read_pickle(DRIFT_PATH.format(sub=subject))
    units = units[~units.PseudoUnitID.isin(drift_units.PseudoUnitID)]
    feats = pd.read_pickle(FEATS_PATH.format(sub=subject))
    sessions = feats.sessions.explode().unique()
    units = units[units.session.isin(sessions)]
    units = units.rename(columns={'structure_level2': 'Region'})
    units["Region"] = units["Region"].replace({": eexxttrraassttrriiaattee__vviissuuaall__aarreeaass__22--44 ((VV22--VV44))": "extrastriate_visual_areas_2-4 (V2-V4)"})
    units["subject"] = subject
    return units

### load sessions

In [4]:
sa_units = load_units_for_sub("SA")
sa_units_per_region = sa_units.groupby("Region").PseudoUnitID.nunique().reset_index(name="Subject S Units")

In [5]:
bl_units = load_units_for_sub("BL")
bl_units_per_region = bl_units.groupby("Region").PseudoUnitID.nunique().reset_index(name="Subject B Units")

In [5]:
combined = pd.merge(sa_units_per_region, bl_units_per_region, on="Region", how="outer").fillna(0)
combined = pd.merge(sa_units_per_region, bl_units_per_region, on="Region", how="outer").fillna(0)

combined["Subject B Units"] = combined["Subject B Units"].round().astype(int)
combined["Subject S Units"] = combined["Subject S Units"].round().astype(int)
combined["Both Subjects Units"] = combined["Subject B Units"] + combined["Subject S Units"]

totals = pd.DataFrame([{
    "Region": "All", 
    "Subject B Units": combined["Subject B Units"].sum(), 
    "Subject S Units": combined["Subject S Units"].sum(),
    "Both Subjects Units": combined["Both Subjects Units"].sum()
}])
combined = pd.concat((combined, totals))
combined = combined.sort_values(by="Both Subjects Units", ascending=False)


In [6]:
combined

Unnamed: 0,Region,Subject S Units,Subject B Units,Both Subjects Units
0,All,1103,347,1450
8,inferior_temporal_cortex (ITC),185,56,241
10,lateral_prefrontal_cortex (lat_PFC),239,0,239
11,medial_pallium (MPal),48,83,131
2,basal_ganglia (BG),77,34,111
0,amygdala (Amy),73,24,97
1,anterior_cingulate_gyrus (ACgG),85,0,85
9,lateral_and_ventral_pallium (LVPal),77,7,84
19,superior_parietal_lobule (SPL),61,16,77
13,motor_cortex (motor),63,0,63


In [67]:
combined.to_csv("/data/patrick_res/figures/wcst_paper/units/region_units.csv", index=False)

### Generate a csv of all units and all structure_levels for Mike: 

In [6]:
all_units = pd.concat((sa_units, bl_units))

In [8]:
all_units.structure_level2_cleaned.unique()

array(['lateral_and_ventral_pallium_LVPal',
       'lateral_prefrontal_cortex_lat_PFC', 'primary_visual_cortex_V1',
       'anterior_cingulate_gyrus_ACgG', 'posterior_medial_cortex_PMC',
       'orbital_frontal_cortex_OFC', 'unknown', 'basal_ganglia_BG',
       'inferior_temporal_cortex_ITC', 'motor_cortex_motor',
       'preoptic_complex_POC', 'amygdala_Amy',
       'extrastriate_visual_areas_2-4_V2-V4', 'medial_pallium_MPal',
       'thalamus_Thal', 'inferior_parietal_lobule_IPL',
       'superior_parietal_lobule_SPL',
       'floor_of_the_lateral_sulcus_floor_of_ls',
       'diagonal_subpallium_DSP', 'cerebellum_Cb',
       'medial_temporal_lobe_MTL', 'somatosensory_cortex_SI/SII',
       ':_eexxttrraassttrriiaattee__vviissuuaall__aarreeaass__22--44_VV22--VV44'],
      dtype=object)

In [14]:
all_units.to_csv("/data/patrick_res/figures/wcst_paper/units/all_units.csv", index=False)

In [15]:
all_units.columns

Index(['Channel', 'Unit', 'SpikeTimesFile', 'UnitID', 'electrode_id', 'x', 'y',
       'z', 'distance', 'in_brain', 'tissue', 'structure_level1', 'Region',
       'structure_level3', 'structure_level4', 'structure_level5',
       'structure_level6', 'structure_potential', 'session', 'PseudoUnitID',
       'structure_level2_cleaned', 'manual_structure',
       'manual_structure_cleaned', 'drive', 'subject'],
      dtype='object')

### Generate a table including BL corrected regions

In [3]:
BL_CORRECTED_PATH = "/data/patrick_res/firing_rates/{sub}/all_units_corrected.pickle"

sa_units = load_units_for_sub("SA")
sa_units_per_region = sa_units.groupby("Region").PseudoUnitID.nunique().reset_index(name="Subject S Units")

bl_original_units = load_units_for_sub("BL")
bl_original_units_per_region = bl_original_units.groupby("Region").PseudoUnitID.nunique().reset_index(name="Subject B Units Original")

bl_correct_units = load_units_for_sub("BL", path=BL_CORRECTED_PATH)
bl_correct_units_per_region = bl_correct_units.groupby("Region").PseudoUnitID.nunique().reset_index(name="Subject B Units Corrected")

In [6]:
combined = pd.merge(sa_units_per_region, bl_original_units_per_region, on="Region", how="outer").fillna(0)
combined = pd.merge(combined, bl_correct_units_per_region, on="Region", how="outer").fillna(0)

combined["Subject B Units Original"] = combined["Subject B Units Original"].round().astype(int)
combined["Subject B Units Corrected"] = combined["Subject B Units Corrected"].round().astype(int)
combined["Subject B Diff"] = combined["Subject B Units Corrected"] - combined["Subject B Units Original"]

combined["Subject S Units"] = combined["Subject S Units"].round().astype(int)
combined["Both Subjects Units Corrected"] = combined["Subject B Units Corrected"] + combined["Subject S Units"]

totals = pd.DataFrame([{
    "Region": "All", 
    "Subject B Units Original": combined["Subject B Units Original"].sum(), 
    "Subject B Units Corrected": combined["Subject B Units Corrected"].sum(), 
    "Subject B Diff": combined["Subject B Diff"].sum(), 
    "Subject S Units": combined["Subject S Units"].sum(),
    "Both Subjects Units Corrected": combined["Both Subjects Units Corrected"].sum()
}])
combined = pd.concat((combined, totals))
combined = combined.sort_values(by="Both Subjects Units Corrected", ascending=False)


In [7]:
combined.to_csv("/data/patrick_res/figures/wcst_paper/units/region_units_bl_corrected.csv", index=False)

In [9]:
all_units_correct = pd.concat((sa_units, bl_correct_units))
all_units_correct.to_csv("/data/patrick_res/figures/wcst_paper/units/all_units_correct.csv", index=False)