In [1]:
%reload_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from ripple_heterogeneity.utils import functions,loading,add_new_deep_sup
import seaborn as sns
from matplotlib.ticker import AutoMinorLocator

In [2]:
df_sessions = pd.read_csv(r'Z:\home\ryanh\projects\ripple_heterogeneity\sessions.csv')
df_sessions = functions.add_animal_id(df_sessions)

In [3]:
df_sessions.animal_id.unique()


array(['AB1', 'AB3', 'AB4', 'AYA10', 'AYA4', 'AYA6', 'AYA7', 'AYA9',
       'OML22', 'OML23', 'EE', 'FF', 'GG', 'Rat08', 'Rat09', 'Rat10',
       'Rat11', 'Achilles', 'Buddy', 'Cicero', 'Gatsby', 'HMC1',
       '2006-4-10', '2006-4-18', '2006-6-12', '2006-6-13', '2006-6-7',
       'ec013', 'ec014', 'Kenji', 'ec016', 'f01_m', 'g01_m', 'i01_m',
       'j01_m', 'km01', 'nlx', 'OML10', 'OML18', 'OML19', 'OML3', 'OML5',
       'OML7', 'OML8', 'OR15', 'OR18', 'OR21', 'OR22', 'OR23', 'HMC2',
       'OML27'], dtype=object)

## load in all data

In [4]:
df_sessions = pd.read_csv(r'Z:\home\ryanh\projects\ripple_heterogeneity\sessions.csv')

df = loading.load_all_cell_metrics(df_sessions.basepath)

## add animal id

In [5]:
df = functions.add_animal_id(df)

## add deep sup

In [25]:
df = add_new_deep_sup.deep_sup_from_distance(df)

## norm brain areas

In [26]:
df.loc[df.brainRegion.str.contains("CA1"),"brainRegion"] = "CA1"
df.loc[df.brainRegion.str.contains("EC1|EC2|EC3|EC4|EC5"),"brainRegion"] = "MEC"
df.loc[df.brainRegion.str.contains("DGCA3|CA3DG"),"brainRegion"] = "CA3DG"
df.brainRegion.unique()

array(['CA1', 'MEC', 'PFC'], dtype=object)

In [27]:
regions = "CA1|MEC|PFC"
df = df.query("brainRegion.str.contains(@regions)")
df.brainRegion.unique()


array(['CA1', 'MEC', 'PFC'], dtype=object)

In [28]:
def return_all_behavior_type(df, animal_id, remove_epochs=True):
    epoch_df = pd.DataFrame()
    for basepath in df.query("animal_id == @animal_id").basepath.unique():
        epoch_df_temp = loading.load_epoch(basepath)
        epoch_df = pd.concat([epoch_df,epoch_df_temp],ignore_index=True)

    epoch_df = epoch_df.query("~environment.str.contains('sleep')")
    # remove unneeded environments
    if remove_epochs:
        restrict_str = "environment != 'wheel' & environment != 'wheel_home' & environment != 'test' & environment != 'unknown' & environment != 'water'"
        epoch_df = epoch_df.query(restrict_str)

        # epoch_df = epoch_df.query("~environment.str.contains('circle|test|ZigZag|wheel_home|wheel|unknown|water|sleep',regex=False)")

    # norm maze names
    epoch_df.loc[epoch_df.environment.str.contains("linear|linearOne|linearTwo"),"environment"] = "Linear track"
    epoch_df.loc[epoch_df.environment.str.contains("cheeseboard"),"environment"] = "Cheeseboard maze"
    epoch_df.loc[epoch_df.environment.str.contains("tmaze|Mwheel|Tmaze|T-maze"),"environment"] = "T-maze"
    epoch_df.loc[epoch_df.environment.str.contains("bigSquarePlus|midSquare|box|bigSquare|openfield|openfieldBound|open"),"environment"] = "Open field"
    epoch_df.loc[epoch_df.environment.str.contains("plus"),"environment"] = "Plus maze"
    epoch_df.loc[epoch_df.environment.str.contains("wmaze"),"environment"] = "M-maze"
    epoch_df.loc[epoch_df.environment.str.contains("circle"),"environment"] = "Circular Linear track"
    epoch_df.loc[epoch_df.environment.str.contains("ZigZag"),"environment"] = "Linear track"

    return ', '.join([str(x) for x in epoch_df.environment.unique()])
# return_all_behavior_type(df, "g01_m",remove_epochs=True)


In [9]:
epoch_df = pd.DataFrame()
for basepath in df.basepath.unique():
    epoch_df_temp = loading.load_epoch(basepath)
    epoch_df_temp["basepath"] = basepath
    epoch_df = pd.concat([epoch_df,epoch_df_temp],ignore_index=True)

In [10]:
epoch_df.environment.unique()

array(['sleep', 'linear', 'tmaze', 'cheeseboard', 'box', 'wmaze', 'water',
       'unknown', 'open', 'linearOne', 'linearTwo', 'Tmaze', 'wheel',
       'bigSquare', 'Mwheel', 'plus', 'midSquare', 'bigSquarePlus',
       'wheel_home', 'circle', 'ZigZag', 'test', 'openfield',
       'openfieldBound'], dtype=object)

In [11]:
df.query("tags_bad_waveform == True")[["basepath","UID"]]


Unnamed: 0,basepath,UID
3585,Z:\Data\AYAold\AYA7\day19,85
4558,Z:\Data\AYAold\AYA9\day12,50
4920,Z:\Data\AYAold\AYA9\day14,208
5010,Z:\Data\AYAold\AYA9\day14,298
6534,Z:\Data\Can\OML22\day7,3
6535,Z:\Data\Can\OML22\day7,4
6556,Z:\Data\Can\OML22\day7,25
17167,Z:\Data\GrosmarkAD\Achilles\Achilles_10252013,100
17168,Z:\Data\GrosmarkAD\Achilles\Achilles_10252013,101
17169,Z:\Data\GrosmarkAD\Achilles\Achilles_10252013,102


In [12]:
df.query("bad_unit != True & tags_bad_waveform != True").bad_unit.unique()

array([False])

In [29]:
n_ca1 = []
n_mec = []
n_pfc = []
n_deep = []
n_sup = []
regions = []
environments = []
for animal_id in df.animal_id.unique():
    regions.append(
        ", ".join(
            [str(x) for x in df.query("animal_id == @animal_id").brainRegion.unique()]
        )
    )

    n_ca1.append(
        df.query("animal_id == @animal_id & brainRegion.str.contains('CA1') & bad_unit != True & tags_bad_waveform != True").shape[0]
    )
    n_mec.append(
        df.query("animal_id == @animal_id & brainRegion.str.contains('MEC') & bad_unit != True & tags_bad_waveform != True").shape[0]
    )
    n_pfc.append(
        df.query("animal_id == @animal_id & brainRegion.str.contains('PFC') & bad_unit != True & tags_bad_waveform != True").shape[0]
    )
    n_deep.append(
        df.query("animal_id == @animal_id & brainRegion.str.contains('CA1') & bad_unit != True & tags_bad_waveform != True & deepSuperficial=='Deep'").shape[0]
    )
    n_sup.append(
        df.query("animal_id == @animal_id & brainRegion.str.contains('CA1') & bad_unit != True & tags_bad_waveform != True & deepSuperficial=='Superficial'").shape[0]
    )
    environments.append(return_all_behavior_type(df, animal_id))

animal_region_df = pd.DataFrame(
    {
        "animal_id": df.animal_id.unique(),
        "regions": regions,
        "n_ca1": n_ca1,
        "n_deep": n_deep,
        "n_sup": n_sup,
        "n_mec": n_mec,
        "n_pfc": n_pfc,
        "environments": environments,
    }
)

animal_region_df = animal_region_df.sort_values(by=["environments","regions"],ascending=False).reset_index(drop=True)
animal_region_df["Animal ID"] = [f"Rat {i+1}" for i in range(animal_region_df.shape[0])]

animal_region_df

Unnamed: 0,animal_id,regions,n_ca1,n_deep,n_sup,n_mec,n_pfc,environments,Animal ID
0,ec014,"CA1, MEC",628,308,49,272,0,"T-maze, Open field, Linear track",Rat 1
1,i01_m,"MEC, CA1",225,64,44,66,0,"T-maze, Open field",Rat 2
2,ec016,"MEC, CA1",498,103,226,209,0,"T-maze, Linear track, Open field",Rat 3
3,AB3,CA1,667,265,242,0,0,"T-maze, Linear track",Rat 4
4,OML22,CA1,423,147,87,0,0,"T-maze, Linear track",Rat 5
5,AYA10,"CA1, MEC",192,80,47,146,0,"T-maze, Cheeseboard maze",Rat 6
6,EE,"PFC, CA1",376,169,105,0,836,T-maze,Rat 7
7,FF,"PFC, CA1",28,7,17,0,138,T-maze,Rat 8
8,GG,"PFC, CA1",48,10,17,0,37,T-maze,Rat 9
9,f01_m,CA1,87,70,3,0,0,T-maze,Rat 10


In [30]:
cross_region_df = animal_region_df[(animal_region_df.n_ca1 > 0 ) & (animal_region_df.n_mec>0) | (animal_region_df.n_pfc>0)]
cross_region_df.n_deep.sum(),cross_region_df.n_sup.sum(), cross_region_df.n_mec.sum(), cross_region_df.n_pfc.sum()

(1705, 879, 2691, 2167)

In [12]:
animal_region_df.query("environments.str.contains('track')").shape

(30, 7)

In [13]:
animal_region_df.query("environments.str.contains('M-maze')").shape

(7, 7)

In [14]:
animal_region_df.query("environments.str.contains('Cheeseboard')").shape

(9, 7)

In [15]:
animal_region_df.query("environments.str.contains('T-maze')").shape


(16, 7)

In [16]:
animal_region_df.query("environments.str.contains('Open field')").shape


(10, 7)

In [17]:
animal_region_df.query("environments.str.contains('Plus')")


Unnamed: 0,animal_id,regions,n_ca1,n_mec,n_pfc,environments,Animal ID
11,ec013,"MEC, CA1",432,507,0,"Open field, Linear track, T-maze, Plus maze",Rat 12


In [31]:
# animal_region_df.n_ca1.sum()	n_mec	n_pfc
animal_region_df.n_ca1.sum(), animal_region_df.n_mec.sum(), animal_region_df.n_pfc.sum()

(10295, 2691, 2167)

In [30]:
df.query("animal_id == 'HMC2'").basepath.unique()

array(['Z:\\Data\\HMC2\\day3', 'Z:\\Data\\HMC2\\day4',
       'Z:\\Data\\HMC2\\day11', 'Z:\\Data\\HMC2\\day9',
       'Z:\\Data\\HMC2\\day8', 'Z:\\Data\\HMC2\\day5',
       'Z:\\Data\\HMC2\\day6', 'Z:\\Data\\HMC2\\day7'], dtype=object)

In [62]:
# df[["animal_id","brainRegion"]].value_counts().reset_index()

# df[["animal_id","brainRegion"]].value_counts().reset_index()[["animal_id","brainRegion"]].pivot(columns='brainRegion')

In [8]:
df = add_new_deep_sup.deep_sup_from_deepSuperficialDistance(df)

In [181]:
df.query("brainRegion.str.contains('CA1') & putativeCellType.str.contains('Pyr')").groupby("basepath").apply(
    lambda x: pd.Series(
        {
            "n_deep": sum(x.deepSuperficial == "Deep"),
            "n_sup": sum(x.deepSuperficial == "Superficial"),
        }
    )
).reset_index().sort_values("n_sup", ascending=False)
# .to_csv(r"Z:\home\ryanh\projects\ripple_heterogeneity\n_deep_sup_by_basepath.csv")

Unnamed: 0,basepath,n_deep,n_sup
258,Z:\Data\ORproject\OR22\day3,0,100
259,Z:\Data\ORproject\OR22\day4,0,82
55,Z:\Data\FujisawaS\EE\EE0622fm,18,69
6,Z:\Data\AYAold\AB3\AB3_58_59,22,51
188,Z:\Data\Kenji\ec016.577_590,13,45
...,...,...,...
100,Z:\Data\GirardeauG\Rat11\Rat11-20150321,6,0
101,Z:\Data\GirardeauG\Rat11\Rat11-20150323,0,0
102,Z:\Data\GirardeauG\Rat11\Rat11-20150325,6,0
103,Z:\Data\GirardeauG\Rat11\Rat11-20150326,24,0


In [32]:
df.query("firingRate<7 &  putativeCellType.str.contains('Pyr') & spikeCount>100").deepSuperficial.value_counts()

unknown        12650
Deep            4513
middle          3381
Superficial     2352
Name: deepSuperficial, dtype: int64

In [24]:
df.tags_bad_waveform

0      False
1      False
2      False
3      False
4      False
       ...  
823    False
824    False
825    False
826    False
827    False
Name: tags_bad_waveform, Length: 828, dtype: object

In [25]:
restriction = "bad_unit==False & tags_bad_waveform==False & firingRate<7 & putativeCellType.str.contains('Pyr') & spikeCount>100 & basepath.str.contains('Achilles_10252013')"
df.query(restriction).deepSuperficial.value_counts()


Deep           125
middle          76
Superficial     24
Name: deepSuperficial, dtype: int64

In [None]:
# basepath = r"Z:\\Data\\AYAold\\AB1\\day1"
# loading.load_cell_metrics(basepath)


In [2]:
df_sessions = pd.read_csv(r'Z:\home\ryanh\projects\ripple_heterogeneity\sessions.csv')
df = loading.load_all_cell_metrics(df_sessions.basepath.unique())


In [16]:
df.query("brainRegion.str.contains('CA1')").groupby(["basepath","shankID"]).size().mean()

10.39423076923077

In [24]:
df.query("brainRegion.str.contains('CA1')").groupby(["basepath"]).size().max()


275

In [159]:
df.animal.unique(), df.animal.nunique()

(array(['AB1', 'AB3', 'AB4', 'AYA10', 'AYA4', 'AYA6', 'AYA7', 'AYA9',
        'OML22', 'OML23', 'EE', 'FF', 'GG', 'Rat08', 'Rat10', 'Rat11',
        'Achilles', 'Buddy', 'Cicero', 'Gatsby', 'HMC1', '2006-4-10',
        '2006-4-18', '2006-6-12', '2006-6-13', '2006-6-7', 'ec013',
        'ec014', 'Kenji', 'ec016', 'f01_m', 'g01_m', 'i01_m', 'km01',
        'nlx', 'OML10', 'OML18', 'OML19', 'OML3', 'OML5', 'OML7', 'OML8',
        'OR15', 'OR18', 'OR21', 'OR22', 'OR23', 'HMC2', 'OML27'],
       dtype=object),
 49)

In [160]:
df.query("bad_unit==False & brainRegion.str.contains('CA1')").animal.unique(), df.query("bad_unit==False & brainRegion.str.contains('CA1')").animal.nunique()


(array(['AB1', 'AB3', 'AB4', 'AYA10', 'AYA4', 'AYA6', 'AYA7', 'AYA9',
        'OML22', 'OML23', 'EE', 'FF', 'GG', 'Rat08', 'Rat10', 'Rat11',
        'Achilles', 'Buddy', 'Cicero', 'Gatsby', 'HMC1', '2006-4-10',
        '2006-4-18', '2006-6-12', '2006-6-13', '2006-6-7', 'ec013',
        'ec014', 'Kenji', 'ec016', 'f01_m', 'g01_m', 'i01_m', 'km01',
        'nlx', 'OML10', 'OML18', 'OML19', 'OML3', 'OML5', 'OML7', 'OML8',
        'OR15', 'OR18', 'OR21', 'OR22', 'OR23', 'HMC2', 'OML27'],
       dtype=object),
 49)

In [164]:
df = add_new_deep_sup.deep_sup_from_deepSuperficialDistance(df)

In [6]:
df.keys()[df.keys().str.contains("tag")]

Index(['peakVoltage', 'peakVoltage_expFit', 'tags_P', 'tags_N', 'tags_Bad',
       'tags_Pr', 'tags_Nr', 'tags_Pb', 'tags_Nb', 'tags_InverseSpike',
       'peakVoltage_expFitLengthConstant', 'tags_bad_waveform', 'tags_Noise',
       'tags_contaminated'],
      dtype='object')

In [14]:
df.animal.nunique()

49

In [162]:
df.query("deepSuperficialDistance.isna() & brainRegion.str.contains('CA1')")[["basepath","UID"]]

Unnamed: 0,basepath,UID
17465,Z:\Data\GrosmarkAD\Buddy\Buddy_06272013,19
17473,Z:\Data\GrosmarkAD\Buddy\Buddy_06272013,27
17789,Z:\Data\GrosmarkAD\Gatsby\Gatsby_08022013,25
17863,Z:\Data\GrosmarkAD\Gatsby\Gatsby_08282013,19
17868,Z:\Data\GrosmarkAD\Gatsby\Gatsby_08282013,24
17870,Z:\Data\GrosmarkAD\Gatsby\Gatsby_08282013,26
28643,Z:\Data\OMLproject\OML8\day17,19
28656,Z:\Data\OMLproject\OML8\day17,32
31135,Z:\Data\OMLproject\OML27\day3,2
31138,Z:\Data\OMLproject\OML27\day3,5


In [163]:
df.query("bad_unit==False & brainRegion.str.contains('CA1') & putativeCellType.str.contains('Pyr')").deepSuperficial.value_counts()


Deep           3818
middle         2808
Superficial    1964
Name: deepSuperficial, dtype: int64

In [165]:
df.query("bad_unit==False & brainRegion.str.contains('CA1') & putativeCellType.str.contains('Pyr')").deepSuperficial.value_counts().sum()

8590

## how many animals on each maze

In [66]:
epoch_df = pd.DataFrame()
for basepath in df.basepath.unique():
    epoch_df_temp = loading.load_epoch(basepath)
    epoch_df_temp["basepath"] = basepath
    epoch_df_temp["animal"] = loading.get_animal_id(basepath)
    epoch_df = pd.concat([epoch_df,epoch_df_temp],ignore_index=True)

In [67]:
epoch_df.environment.unique()

array(['sleep', 'linear', 'tmaze', 'cheeseboard', 'box', 'wmaze', 'water',
       'unknown', 'open', 'linearOne', 'linearTwo', 'Tmaze', 'wheel',
       'bigSquare', 'Mwheel', 'plus', 'midSquare', 'bigSquarePlus',
       'wheel_home', 'circle', 'ZigZag', 'test'], dtype=object)

In [69]:
epoch_df.query("environment=='box' | environment=='open' | environment=='bigSquare' | environment=='midSquare' | environment=='midSquare'").animal.nunique()


10

In [70]:
epoch_df.query("environment=='linear' | environment=='linearOne' | environment=='linearTwo'").animal.nunique()


27

In [57]:
epoch_df.query("environment=='tmaze' | environment=='Tmaze' | environment=='Mwheel'").animal.nunique()

15

In [74]:
epoch_df.query("environment=='cheeseboard'").animal.nunique()


9

In [59]:
epoch_df.query("environment=='wmaze'").animal.nunique()


6

In [71]:
epoch_df.query("environment=='bigSquarePlus' | environment=='plus'").animal.nunique()


1

In [72]:
epoch_df.query("environment=='ZigZag'").animal.nunique()


1