In [None]:
import spike.spike_analysis.spike_collection as sc
import spike.spike_analysis.spike_recording as sr
import spike.spike_analysis.firing_rate_calculations as fr
import spike.spike_analysis.normalization as norm
import spike.spike_analysis.single_cell as single_cell
import spike.spike_analysis.spike_collection as collection
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import behavior.boris_extraction as boris
import matplotlib.pyplot as plt
import pickle


In [None]:
'''
import os
from spike.spike_analysis.spike_recording import SpikeRecording
import numpy as np


class SpikeCollection:
    """
    This class initializes and reads in phy folders as EphysRecording
    instances.

    Attributes:
        path: str, relative path to the folder of merged.rec files
            for each reacording
        sampling_rate: int, default=20000 sampling rate of ephys device in Hz
    """

    def __init__(self, path, event_dict={}, subject_dict={}, sampling_rate=20000):
        self.sampling_rate = sampling_rate
        self.path = path
        self.event_dict = event_dict
        self.subject_dict = subject_dict

        self.make_collection()
        if not event_dict:
            print("Please assign event dictionaries to each recording")
            print("as recording.event_dict")
            print("event_dict = {event name(str): np.array[[start(ms), stop(ms)]...]")
        else:
            self.event_dict = event_dict
        if not subject_dict:
            print("Please assign subjects to each recording as recording.subject")
        else:
            self.subject_dict = subject_dict

    def make_collection(self):
        print("Scanning in path:", self.path)
        collection = []
        for root, dirs, files in os.walk(self.path):
            print("Found dirs:", dirs)
            for directory in dirs:
                if directory.endswith("merged.rec"):
                    phy_path = os.path.join(self.path, directory, "phy")
                    print("Checking for params.py at:", os.path.join(phy_path, "params.py"))
                    if os.path.exists(os.path.join(phy_path, "params.py")):
                        print("loading ", directory)
                        recording = SpikeRecording(phy_path, self.sampling_rate)
                        if "good" not in recording.labels_dict.values():
                            print(f"{directory} has no good units")
                            print("and will not be included in the collection")
                        else:
                            if self.subject_dict:
                                try:
                                    recording.subject = self.subject_dict[directory]
                                except KeyError:
                                    print(f"{directory} not found in subject dict")
                            if self.event_dict:
                                try:
                                    recording.event_dict = self.event_dict[directory]
                                except KeyError:
                                    print(f"{directory} not found in event dict")
                            collection.append(recording)
        self.recordings = collection

    def analyze(self, timebin, ignore_freq=0.1, smoothing_window=None, mode="same"):
        self.timebin = timebin
        self.ignore_freq = ignore_freq
        self.smoothing_window = smoothing_window
        self.mode = mode
        analyzed_neurons = 0
        good_neurons = 0
        for recording in self.recordings:
            recording.analyze(timebin, ignore_freq, smoothing_window, mode)
            analyzed_neurons += recording.analyzed_neurons
            good_neurons += recording.good_neurons
        self.good_neurons = good_neurons
        self.analyzed_neurons = analyzed_neurons
        self.__all_set__()

    def __all_set__(self):
        """
        double checks that all SpikeRecordings in the collection have attributes: subject & event_dict and that
        each event_dict has the same keys. Warns users which recordings are missing subjects or event_dicts.
        If all set, prints "All set to analyze" and calculates spiketrains and firing rates.
        """
        is_first = True
        is_good = True
        missing_events = []
        missing_subject = []
        event_dicts_same = True
        event_type = False
        for recording in self.recordings:
            if not hasattr(recording, "event_dict"):
                missing_events.append(recording.name)
            else:
                if is_first:
                    last_recording_events = recording.event_dict.keys()
                    is_first = False
                else:
                    if recording.event_dict.keys() != last_recording_events:
                        event_dicts_same = False
                for value in recording.event_dict.values():
                    if type(value) is np.ndarray:
                        if (value.ndim == 2) & (value.shape[1] == 2):
                            event_type = True
            if not hasattr(recording, "subject"):
                missing_subject.append(recording.name)
        if len(missing_events) > 0:
            print("These recordings are missing event dictionaries:")
            print(f"{missing_events}")
            is_good = False
        else:
            if not event_dicts_same:
                print("Your event dictionary keys are different across recordings.")
                print("Please double check them:")
                for recording in self.recordings:
                    print(recording.name, "keys:", recording.event_dict.keys())
                is_good = False
        if len(missing_subject) > 0:
            print(f"These recordings are missing subjects: {missing_subject}")
            is_good = False
        if not event_type:
            print("Event arrays are not 2 dimensional numpy arrays of shape (n x 2).")
            print("Please fix.")
        if is_good:
            for recording in self.recordings:
                recording.all_set = True
            print("All set to analyze")

    def __str__(self):
        """
        Returns a summary of the SpikeCollection object, including:
        - Number of recordings
        - Average number of good units
        - Average number of events per event type (if event_dicts are present)
        - Number of unique subjects
        """
        num_recordings = len(self.recordings)
        avg_good_units = (
            sum(recording.good_neurons for recording in self.recordings) / num_recordings if num_recordings > 0 else 0
        )
        total_good_units = sum(recording.good_neurons for recording in self.recordings)
        for recording in self.recordings:
            if hasattr(recording, "analyzed_neurons"):
                calculate_analyzed_neurons = True
            else:
                calculate_analyzed_neurons = False
        if calculate_analyzed_neurons:
            total_analyzed_units = sum(recording.analyzed_neurons for recording in self.recordings)
        # Calculate average number of events per event type
        event_counts = {}
        for recording in self.recordings:
            if hasattr(recording, "event_dict"):
                for event, events in recording.event_dict.items():
                    event_counts[event] = event_counts.get(event, 0) + len(events)

        avg_events_per_type = (
            {event: count / num_recordings for event, count in event_counts.items()} if event_counts else "N/A"
        )

        # Get the number of unique subjects
        missing_subjects = [
            recording.name for recording in self.recordings if getattr(recording, "subject", None) is None
        ]
        if missing_subjects:
            subject_info = f"Missing Subjects for Recordings: {missing_subjects}"
        else:
            unique_subjects = len(set(recording.subject for recording in self.recordings))
            subject_info = f"Number of Unique Subjects: {unique_subjects}"

        return (
            f"SpikeCollection Summary:\n"
            f"  Number of Recordings: {num_recordings}\n"
            f"  Total Good Units: {total_good_units}\n"
            f"  Total Analyzed Units: {total_analyzed_units}\n"
            f"  Average Number of Good Units: {avg_good_units:.2f}\n"
            f"  Average Number of Events per Event Type: {avg_events_per_type}\n"
            f"  {subject_info}\n"
            f"\n"
        )

    def recording_details(self):
        details = []
        for recording in self.recordings:
            subject = getattr(recording, "subject", "Unknown")
            good_units = getattr(recording, "good_neurons", 0)
            recording_length = recording.timestamps_var[-1] / recording.sampling_rate / 60  # in minutes

            # Get the number of events per event type
            event_counts = {}
            if hasattr(recording, "event_dict"):
                for event, events in recording.event_dict.items():
                    event_counts[event] = len(events)

            details.append(
                f"\n"
                f"Recording: {recording.name}\n"
                f"  Subject: {subject}\n"
                f"  Number of Good Units: {good_units}\n"
                f"  Recording Length: {recording_length:.2f} minutes\n"
                f"  Events per Event Type: {event_counts}\n"
            )
        print(f"Recording Details:\n" f"{''.join(details)}")
        return None
'''

In [23]:
# Meghan Code from spike_object_creation

def pickle_this(thing_to_pickle, file_name):
    """
    Pickles things
    Args (2):
        thing_to_pickle: anything you want to pickle
        file_name: str, filename that ends with .pkl
    Returns:
        none
    """
    with open(file_name,'wb') as file:
        pickle.dump(thing_to_pickle, file)

def unpickle_this(pickle_file):
    """
    Unpickles things
    Args (1):
        file_name: str, pickle filename that already exists and ends with .pkl
    Returns:
        pickled item
    """
    with open(pickle_file, 'rb') as file:
        return(pickle.load(file))

In [24]:
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', None)        # Auto-detect display width


## Loading Spike Objects

In [None]:
# Define paths to the pkl files
cagemate_path = r'/Users/thomasheeps/UFL Dropbox/Thomas Heeps/Padilla-Coreano Lab/2024/Cum_SocialMemEphys_pilot2/Habituation_Dishabituation (phase 1)/spike_data/sorted/cagemate'
novel_path = r'/Users/thomasheeps/UFL Dropbox/Thomas Heeps/Padilla-Coreano Lab/2024/Cum_SocialMemEphys_pilot2/Habituation_Dishabituation (phase 1)/spike_data/sorted/novel'
'''
# Load the cagemate spike collection
with open(cagemate_path, 'rb') as f:
    cagemate_spike_collection = pickle.load(f)

# Load the novel spike collection
with open(novel_path, 'rb') as f:
    novel_spike_collection = pickle.load(f)

print(f"Loaded {len(cagemate_spike_collection)} cagemate recordings.")
print(f"Loaded {len(novel_spike_collection)} novel recordings.")

if cagemate_spike_collection:
    print(f"Cagemate Units (First Recording): {cagemate_spike_collection[0].get_unit_ids()}")
if novel_spike_collection:
    print(f"Novel Units (First Recording): {novel_spike_collection[0].get_unit_ids()}")
'''


'\n# Load the cagemate spike collection\nwith open(cagemate_path, \'rb\') as f:\n    cagemate_spike_collection = pickle.load(f)\n\n# Load the novel spike collection\nwith open(novel_path, \'rb\') as f:\n    novel_spike_collection = pickle.load(f)\n\nprint(f"Loaded {len(cagemate_spike_collection)} cagemate recordings.")\nprint(f"Loaded {len(novel_spike_collection)} novel recordings.")\n\nif cagemate_spike_collection:\n    print(f"Cagemate Units (First Recording): {cagemate_spike_collection[0].get_unit_ids()}")\nif novel_spike_collection:\n    print(f"Novel Units (First Recording): {novel_spike_collection[0].get_unit_ids()}")\n'

In [None]:
cagemate_spike_collection = collection.SpikeCollection(cagemate_path)
novel_spike_collection = collection.SpikeCollection(novel_path)

Scanning in path: /Users/thomasheeps/code/researchRepos/diff_fam_social_memory_ephys/other_peoples_sutff/Thomas/cagemate_spike_collection.pkl
Please assign event dictionaries to each recording
as recording.event_dict
event_dict = {event name(str): np.array[[start(ms), stop(ms)]...]
Please assign subjects to each recording as recording.subject
Scanning in path: /Users/thomasheeps/code/researchRepos/diff_fam_social_memory_ephys/other_peoples_sutff/Thomas/novel_spike_collection.pkl
Please assign event dictionaries to each recording
as recording.event_dict
event_dict = {event name(str): np.array[[start(ms), stop(ms)]...]
Please assign subjects to each recording as recording.subject


In [27]:
print(f"Type of first cagemate recording: {type(cagemate_spike_collection[0])}")
print(f"Methods: {dir(cagemate_spike_collection[0])}")


TypeError: 'SpikeCollection' object is not subscriptable

In [None]:
cagemate_spike_collection.analyze(timebin = 100, ignore_freq=0.5)

AttributeError: 'list' object has no attribute 'analyze'

## Loading Boris Files and making Cagemate and Novel Dataframes

In [None]:
trial_df = pd.read_csv(r'/Users/thomasheeps/code/researchRepos/diff_fam_social_memory_ephys/other_peoples_sutff/Thomas/habit_dishabit_aggregated/22_nov_p1_aggregated.csv')
trial_df.head(5)

Unnamed: 0,Observation id,Observation date,Description,Media file,Total length,FPS,Subject,Behavior,Behavioral category,Modifiers,Behavior type,Start (s),Stop (s),Duration (s),Comment start,Comment stop
0,22_nov_p1,2024-10-07 16:14:23,,/Users/naylajimenez/Downloads/22_nov_p1_conver...,2342.2,15.0,social_agent,mice back in,,,POINT,603.533,603.533,,,
1,22_nov_p1,2024-10-07 16:14:23,,/Users/naylajimenez/Downloads/22_nov_p1_conver...,2342.2,15.0,social_agent,facial sniffing,,,STATE,605.0,605.267,0.267,,
2,22_nov_p1,2024-10-07 16:14:23,,/Users/naylajimenez/Downloads/22_nov_p1_conver...,2342.2,15.0,subject,facial sniffing,,,STATE,605.133,605.267,0.134,,
3,22_nov_p1,2024-10-07 16:14:23,,/Users/naylajimenez/Downloads/22_nov_p1_conver...,2342.2,15.0,subject,anogenital sniffing,,,STATE,606.4,610.4,4.0,,
4,22_nov_p1,2024-10-07 16:14:23,,/Users/naylajimenez/Downloads/22_nov_p1_conver...,2342.2,15.0,social_agent,anogenital sniffing,,,STATE,606.733,606.8,0.067,,


In [None]:
import glob

folder_path = r"/Users/thomasheeps/code/researchRepos/diff_fam_social_memory_ephys/other_peoples_sutff/Thomas/habit_dishabit_aggregated"
csv_files = glob.glob(os.path.join(folder_path, "*.csv"))
tsv_files = glob.glob(os.path.join(folder_path, "*.tsv"))

cage_dfs = []
nov_dfs = []

# Reading Behavior CSVs and sorting into cagemate and novel dfs
for file in csv_files:

    # Read CSV then assign subject_id based on filename
    df = pd.read_csv(file)
    df['subject_id'] = os.path.basename(file).split('_')[0]

    # Creating exposure columns labeling exposures in order of mouse_in order
    df['exposure'] = None
    exposure_mask = df['Behavior'] == 'mice back in'

    # skip if not 5 exposures
    if exposure_mask.sum() != 5:
        continue

    '''
    # if first exposure in exposure_mask is less than 60 seconds in, then remove it
    exposure_rows = df[exposure_mask]
    if exposure_rows.iloc[0]['Start (s)'] < 60: # checking first exposure
        exposure_mask.iloc[exposure_rows.index[0]] = False
    '''

    exp_label = [f'exp{i+1}' for i in range(exposure_mask.sum())] # exposures numbered for each mouse_in

    df.loc[exposure_mask, 'exposure'] = exp_label # assign exposure labels to mouse_in rows

    df['exposure'] = df['exposure'].ffill() # forward fill the exposure labels to all rows

    print("Last 10 rows of exposure for:", file)
    print(df[['Behavior', 'Start (s)', 'exposure']].tail(10))


    # Sorting into cagemate and novel dfs
    if '_cage_' in file:
        cage_dfs.append(df)
    elif '_nov_' in file:
        nov_dfs.append(df)

# Reading Behavior TSVs and sorting into cagemate and novel dfs
for file in tsv_files:

    # Read TSV then assign subject_id based on filename
    df = pd.read_csv(file, sep='\t') # tab-separated rather than comma-separated
    df['subject_id'] = os.path.basename(file).split('_')[0]

    # Creating exposure columns labeling exposures in order of mouse_in order
    df['exposure'] = None
    exposure_mask = df['Behavior'] == 'mice back in'

    # skip if not 5 exposures
    if exposure_mask.sum() != 5:
        continue

    '''
    # if first exposure in exposure_mask is less than 60 seconds in, then remove it
    exposure_rows = df[exposure_mask]
    if exposure_rows.iloc[0]['Start (s)'] < 60: # checking first exposure
        exposure_mask.iloc[exposure_rows.index[0]] = False
    '''

    exp_label = [f'exp{i+1}' for i in range(exposure_mask.sum())] # exposures numbered for each mouse_in

    df.loc[exposure_mask, 'exposure'] = exp_label # assign exposure labels to mouse_in rows

    df['exposure'] = df['exposure'].ffill() # forward fill the exposure labels to all rows

    print("Last 10 rows of exposure for:", file)
    print(df[['Behavior', 'Start (s)', 'exposure']].tail(10))

cagemate_df = pd.concat(cage_dfs, ignore_index=True)
novel_df = pd.concat(nov_dfs, ignore_index=True)

Last 10 rows of exposure for: /Users/thomasheeps/code/researchRepos/diff_fam_social_memory_ephys/other_peoples_sutff/Thomas/habit_dishabit_aggregated/23_cage_p1_aggregated.csv
                Behavior  Start (s) exposure
372  anogenital sniffing   2290.667     exp5
373  anogenital sniffing   2297.933     exp5
374  anogenital sniffing   2300.867     exp5
375      facial sniffing   2304.000     exp5
376      facial sniffing   2304.133     exp5
377      facial sniffing   2325.067     exp5
378      facial sniffing   2325.067     exp5
379  anogenital sniffing   2325.333     exp5
380      facial sniffing   2339.067     exp5
381      facial sniffing   2339.067     exp5
Last 10 rows of exposure for: /Users/thomasheeps/code/researchRepos/diff_fam_social_memory_ephys/other_peoples_sutff/Thomas/habit_dishabit_aggregated/12_cage_p1_aggregated.csv
                Behavior  Start (s) exposure
148  anogenital sniffing   2200.133     exp5
149      facial sniffing   2203.400     exp5
150      facial sn

In [None]:
print(cagemate_df['exposure'].unique())
print(novel_df['exposure'].unique())

['exp1' 'exp2' 'exp3' 'exp4' 'exp5' None]
['exp1' 'exp2' 'exp3' 'exp4' 'exp5']


In [None]:
novel_df.tail(5)

Unnamed: 0,Observation id,Observation date,Description,Observation type,Source,Time offset (s),Coding duration,Media duration (s),FPS (frame/s),Subject,Observation duration by subject by observation,Behavior,Behavioral category,Behavior type,Start (s),Stop (s),Duration (s),Media file name,Image index start,Image index stop,Image file path start,Image file path stop,Comment start,Comment stop,subject_id,exposure,Media file,Total length,FPS,Modifiers
1690,33_nov_p1,2024-10-08 21:07:01.382,,Media file,player #1:C:/Users/brayd/Desktop/Padilla-Corea...,0.0,1735.067,2341.4,15.0,subject,246.864,facial sniffing,Not defined,STATE,2303.067,2303.867,0.8,C:/Users/brayd/Desktop/Padilla-Coreano Lab/Soc...,34546.0,34558.0,,,,,33,exp5,,,,
1691,33_nov_p1,2024-10-08 21:07:01.382,,Media file,player #1:C:/Users/brayd/Desktop/Padilla-Corea...,0.0,1735.067,2341.4,15.0,social_agent,143.795,facial sniffing,Not defined,STATE,2318.0,2318.933,0.933,C:/Users/brayd/Desktop/Padilla-Coreano Lab/Soc...,34770.0,34784.0,,,,,33,exp5,,,,
1692,33_nov_p1,2024-10-08 21:07:01.382,,Media file,player #1:C:/Users/brayd/Desktop/Padilla-Corea...,0.0,1735.067,2341.4,15.0,social_agent,143.795,anogenital sniffing,Not defined,STATE,2319.267,2320.933,1.666,C:/Users/brayd/Desktop/Padilla-Coreano Lab/Soc...,34789.0,34814.0,,,,,33,exp5,,,,
1693,33_nov_p1,2024-10-08 21:07:01.382,,Media file,player #1:C:/Users/brayd/Desktop/Padilla-Corea...,0.0,1735.067,2341.4,15.0,social_agent,143.795,facial sniffing,Not defined,STATE,2322.8,2325.333,2.533,C:/Users/brayd/Desktop/Padilla-Coreano Lab/Soc...,34842.0,34880.0,,,,,33,exp5,,,,
1694,33_nov_p1,2024-10-08 21:07:01.382,,Media file,player #1:C:/Users/brayd/Desktop/Padilla-Corea...,0.0,1735.067,2341.4,15.0,social_agent,143.795,facial sniffing,Not defined,STATE,2336.333,2337.4,1.067,C:/Users/brayd/Desktop/Padilla-Coreano Lab/Soc...,35045.0,35061.0,,,,,33,exp5,,,,


In [None]:
# quick analysis of cagemate and novel dfs
print(f"Cagemate DataFrame Shape: {cagemate_df.shape}")
print(f"Novel DataFrame Shape: {novel_df.shape}")
print(f"Cagemate DataFrame Columns: {cagemate_df.columns}")
print(f"Novel DataFrame Columns: {novel_df.columns}")

Cagemate DataFrame Shape: (3075, 30)
Novel DataFrame Shape: (3397, 30)
Cagemate DataFrame Columns: Index(['Observation id', 'Observation date', 'Description', 'Media file',
       'Total length', 'FPS', 'Subject', 'Behavior', 'Behavioral category',
       'Modifiers', 'Behavior type', 'Start (s)', 'Stop (s)', 'Duration (s)',
       'Comment start', 'Comment stop', 'subject_id', 'exposure',
       'Observation type', 'Source', 'Time offset (s)', 'Coding duration',
       'Media duration (s)', 'FPS (frame/s)',
       'Observation duration by subject by observation', 'Media file name',
       'Image index start', 'Image index stop', 'Image file path start',
       'Image file path stop'],
      dtype='object')
Novel DataFrame Columns: Index(['Observation id', 'Observation date', 'Description', 'Observation type',
       'Source', 'Time offset (s)', 'Coding duration', 'Media duration (s)',
       'FPS (frame/s)', 'Subject',
       'Observation duration by subject by observation', 'Behavior

In [None]:
print(cagemate_df['Observation type'].unique())
print(cagemate_df['Behavior type'].unique())
print(cagemate_df['Description'].unique())
print(cagemate_df['Modifiers'].unique())
print(cagemate_df['Observation id'].unique())
print(cagemate_df['Behavior'].unique())

[nan 'Media file']
['POINT' 'STATE']
[nan '21_cage_p1 video observation due oct 8 2024'
 'Anastasia Achziger BORIS 11_cage_p1 ']
[nan]
['23_cage_p1' '12_cage_p1' '13_cage_p1' '22_cage_p1' '33_cage'
 '41_cage_p1' '44_Cage_ACObservations' '31_cage_p1' '21_cage_p1 Achziger'
 '11_cage_p1 Achziger' '24_cage_p1']
['mice back in' 'facial sniffing' 'anogenital sniffing' 'fighting'
 'chasing' 'mice taken out' 'allogrooming']


## Finding all sniffing bouts regardless of reciprocity

In [None]:
# Array of arrays of start/stop times of facial sniffing bouts for cagemates in ms
cage_fac_sniffs = boris.get_behavior_bouts(
    boris_df=cagemate_df,
    subject=['subject'],
    behavior=['facial sniffing'],
    min_iti=.5,    # Combine bouts separated by less than 1 second
    min_bout=2.0    # Only keep bouts at least 2 seconds long
)

# Array of arrays of start/stop times of anogenital sniffing bouts for cagemates in ms
cage_ano_sniffs = boris.get_behavior_bouts(
    boris_df=cagemate_df,
    subject=['subject'],
    behavior=['anogenital sniffing'],
    min_iti=0.5,    # Combine bouts separated by less than 1 second
    min_bout=2.0    # Only keep bouts at least 2 seconds long
)

# Array of arrays of start/stop times of facial sniffing bouts for novel in ms
nov_fac_sniffs = boris.get_behavior_bouts(
    boris_df=novel_df,
    subject=['subject'],
    behavior=['facial sniffing'],
    min_iti=0.5,    # Combine bouts separated by less than 1 second
    min_bout=2.0    # Only keep bouts at least 2 seconds long
)

# Array of arrays of start/stop times of anogenital sniffing bouts for novel in ms
nov_ano_sniffs = boris.get_behavior_bouts(
    boris_df=novel_df,
    subject=['subject'],
    behavior=['anogenital sniffing'],
    min_iti=0.5,    # Combine bouts separated by less than 1 second
    min_bout=2.0    # Only keep bouts at least 2 seconds long
)

In [None]:
# Finding reciprocity of sniffing bouts for cagemate and novel facial and anogenital sniffing by finding overlapping sniffing bouts

# cagemate facial reciprocal bouts


## Single Cell