In [34]:
import spike.spike_analysis.spike_collection as sc
import spike.spike_analysis.spike_recording as sr
import spike.spike_analysis.firing_rate_calculations as fr
import spike.spike_analysis.normalization as norm
import spike.spike_analysis.single_cell as single_cell
import spike.spike_analysis.spike_collection as collection
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import behavior.boris_extraction as boris
import matplotlib.pyplot as plt
import pickle
import re

In [35]:
pd.set_option('display.max_colwidth', 0)  # 0 means unlimited in newer pandas versions

# Show all rows
pd.set_option("display.max_rows", None)

# Show all columns
pd.set_option("display.max_columns", None)

# Don’t truncate column contents
pd.set_option("display.max_colwidth", None)

# Expand the display to the full width of the screen
pd.set_option("display.width", 0)


In [36]:
spike_collection_json_path = r'C:\Users\thoma\Code\ResearchCode\diff_fam_social_memory_ephys\spike_collection.json\spike_collection.json'

In [37]:
import importlib
import spike.spike_analysis.spike_collection as sc

importlib.reload(sc)

<module 'spike.spike_analysis.spike_collection' from 'c:\\Users\\thoma\\Code\\ResearchCode\\diff_fam_social_memory_ephys\\spike\\spike_analysis\\spike_collection.py'>

In [38]:
new_sp = sc.SpikeCollection.load_collection(spike_collection_json_path)

In [39]:
rec1 = new_sp.recordings[0].event_dict.keys()

In [40]:
def table_of_events_neurons_per_rec(spike_collection):
    """
    Returns a table with # of total neurons in recording and # of each event type or event_dict keys.
    """
    rows = []
    for rec in spike_collection.recordings:
        # Get the number of neurons in the recording
        num_neurons = rec.good_neurons

        # Get the number of events for each event type
        event_counts = {event: len(rec.event_dict[event]) for event in rec.event_dict.keys()}

        # Create a row for the table
        row = {
            'recording': rec.name,
            'num_neurons': num_neurons,
            **event_counts
        }

        # Collect rows in a list instead of using append()
        rows.append(row)

    # Create the DataFrame at the end
    table = pd.DataFrame(rows, columns=[
        'recording', 'num_neurons', 'alone_rewarded', 'alone_rewarded_baseline', 'high_comp',
        'high_comp_lose', 'high_comp_lose_baseline', 'high_comp_win', 'high_comp_win_baseline',
        'lose', 'low_comp', 'low_comp_lose', 'low_comp_lose_baseline', 'low_comp_win',
        'low_comp_win_baseline', 'overall_pretone', 'win'
    ])
    
    return table

# Usage
table = table_of_events_neurons_per_rec(new_sp)
table.head(len(table))


Unnamed: 0,recording,num_neurons,alone_rewarded,alone_rewarded_baseline,high_comp,high_comp_lose,high_comp_lose_baseline,high_comp_win,high_comp_win_baseline,lose,low_comp,low_comp_lose,low_comp_lose_baseline,low_comp_win,low_comp_win_baseline,overall_pretone,win
0,20230612_101430_standard_comp_to_training_D1_subj_1-3_t3b3L_box2_merged.rec,22,40,40,9,1,40,8,40,7,11,6,40,5,40,160.0,13
1,20230612_101430_standard_comp_to_training_D1_subj_1-4_t4b2L_box1_merged.rec,10,40,40,9,8,40,1,40,13,11,5,40,6,40,160.0,7
2,20230612_112630_standard_comp_to_training_D1_subj_1-1_t1b3L_box2_merged.rec,15,40,40,14,10,40,4,40,13,6,3,40,3,40,160.0,7
3,20230612_112630_standard_comp_to_training_D1_subj_1-2_t2b2L_box1_merged.rec,20,40,40,14,4,40,10,40,7,6,3,40,3,40,160.0,13
4,20230613_105657_standard_comp_to_training_D2_subj_1-1_t1b2L_box1_merged.rec,15,40,0,11,0,0,11,0,1,9,1,0,8,0,,19
5,20230613_105657_standard_comp_to_training_D2_subj_1-4_t4b3L_box2_merged.rec,37,40,0,11,11,0,0,0,19,9,8,0,1,0,,1
6,20230614_114041_standard_comp_to_training_D3_subj_1-1_t1b3L_box1_merged.rec,31,38,38,14,5,38,9,38,6,6,1,38,5,38,152.0,14
7,20230614_114041_standard_comp_to_training_D3_subj_1-2_t2b2L_box2_merged.rec,27,38,38,14,9,38,5,38,14,6,5,38,1,38,152.0,6
8,20230616_111904_standard_comp_to_training_D4_subj_1-2_t2b2L_box2_merged.rec,19,39,39,11,1,39,10,39,6,9,5,39,4,39,156.0,14
9,20230616_111904_standard_comp_to_training_D4_subj_1-4_t4b3L_box1_merged.rec,15,39,39,11,10,39,1,39,14,9,4,39,5,39,156.0,6


In [None]:
def summarize_event_criteria_by_recording(table):
    """
    For each recording, returns a dictionary with:
    - 'recording': name
    - 'qualifying_events': list of event columns that meet threshold
    - 'num_qualifying_events': count of such columns
    """
    skip_cols = {'recording', 'num_neurons', 'overall_pretone', 'lose', 'win'}
    summaries = []

    for _, row in table.iterrows():
        qualifying_events = []
        sum_of_trials = 0

        # skip row if win is less than 2
        if row['win'] <= 3:
            continue

        for col in table.columns:
            if col in skip_cols:
                continue
            elif row[col] >= 3:
                qualifying_events.append(col)
                sum_of_trials += row[col]
                num_neurons = row['num_neurons']
                

        summaries.append({
            'recording': row['recording'],
            'qualifying_events': qualifying_events,
            'num_qualifying_events_types': len(qualifying_events),
            'sum_of__qualifying_trials': sum_of_trials,
            'num_neurons': num_neurons,
            'num_wins': row['win']
        })

    return pd.DataFrame(summaries)

# Usage
event_summary = summarize_event_criteria_by_recording(table)
event_summary.head(len(event_summary))

Unnamed: 0,recording,qualifying_events,num_qualifying_events_types,sum_of__qualifying_trials,num_neurons,num_wins
0,20230612_101430_standard_comp_to_training_D1_subj_1-3_t3b3L_box2_merged.rec,"[alone_rewarded, alone_rewarded_baseline, high_comp, high_comp_lose_baseline, high_comp_win, high_comp_win_baseline, low_comp, low_comp_lose, low_comp_lose_baseline, low_comp_win, low_comp_win_baseline]",11,279,22,13
1,20230612_101430_standard_comp_to_training_D1_subj_1-4_t4b2L_box1_merged.rec,"[alone_rewarded, alone_rewarded_baseline, high_comp, high_comp_lose, high_comp_lose_baseline, high_comp_win_baseline, low_comp, low_comp_lose, low_comp_lose_baseline, low_comp_win, low_comp_win_baseline]",11,279,10,7
2,20230612_112630_standard_comp_to_training_D1_subj_1-1_t1b3L_box2_merged.rec,"[alone_rewarded, alone_rewarded_baseline, high_comp, high_comp_lose, high_comp_lose_baseline, high_comp_win, high_comp_win_baseline, low_comp, low_comp_lose, low_comp_lose_baseline, low_comp_win, low_comp_win_baseline]",12,280,15,7
3,20230612_112630_standard_comp_to_training_D1_subj_1-2_t2b2L_box1_merged.rec,"[alone_rewarded, alone_rewarded_baseline, high_comp, high_comp_lose, high_comp_lose_baseline, high_comp_win, high_comp_win_baseline, low_comp, low_comp_lose, low_comp_lose_baseline, low_comp_win, low_comp_win_baseline]",12,280,20,13
4,20230613_105657_standard_comp_to_training_D2_subj_1-1_t1b2L_box1_merged.rec,"[alone_rewarded, high_comp, high_comp_win, low_comp, low_comp_win]",5,79,15,19
5,20230614_114041_standard_comp_to_training_D3_subj_1-1_t1b3L_box1_merged.rec,"[alone_rewarded, alone_rewarded_baseline, high_comp, high_comp_lose, high_comp_lose_baseline, high_comp_win, high_comp_win_baseline, low_comp, low_comp_lose_baseline, low_comp_win, low_comp_win_baseline]",11,267,31,14
6,20230614_114041_standard_comp_to_training_D3_subj_1-2_t2b2L_box2_merged.rec,"[alone_rewarded, alone_rewarded_baseline, high_comp, high_comp_lose, high_comp_lose_baseline, high_comp_win, high_comp_win_baseline, low_comp, low_comp_lose, low_comp_lose_baseline, low_comp_win_baseline]",11,267,27,6
7,20230616_111904_standard_comp_to_training_D4_subj_1-2_t2b2L_box2_merged.rec,"[alone_rewarded, alone_rewarded_baseline, high_comp, high_comp_lose_baseline, high_comp_win, high_comp_win_baseline, low_comp, low_comp_lose, low_comp_lose_baseline, low_comp_win, low_comp_win_baseline]",11,273,19,14
8,20230616_111904_standard_comp_to_training_D4_subj_1-4_t4b3L_box1_merged.rec,"[alone_rewarded, alone_rewarded_baseline, high_comp, high_comp_lose, high_comp_lose_baseline, high_comp_win_baseline, low_comp, low_comp_lose, low_comp_lose_baseline, low_comp_win, low_comp_win_baseline]",11,273,15,6
9,20230617_115521_standard_comp_to_omission_D1_subj_1-1_t1b3L_box1_merged.rec,"[high_comp, high_comp_win, low_comp, low_comp_win]",4,40,18,19


In [42]:
# calculate the total sum of qualifying trials across all recordings
total_sum_of_qualifying_trials = event_summary['sum_of__qualifying_trials'].sum()
print(f"Total sum of qualifying trials across all recordings: {total_sum_of_qualifying_trials}")

# total number of qualifying event types
total_num_qualifying_event_types = event_summary['num_qualifying_events_types'].sum()
print(f"Total number of qualifying event types across all recordings: {total_num_qualifying_event_types}")

# sum of neurons across all recordings
total_num_neurons = event_summary['num_neurons'].sum()
print(f"Total number of neurons across all recordings: {total_num_neurons}")


# count number of times each event is in qualifying events
def count_event_occurrences(event_summary):
    """
    Counts how many times each event type appears in the qualifying events across all recordings.
    """
    event_counts = {}

    for _, row in event_summary.iterrows():
        for event in row['qualifying_events']:
            if event not in event_counts:
                event_counts[event] = 0
            event_counts[event] += 1

    return pd.DataFrame(event_counts.items(), columns=['event', 'count'])

# Usage
event_occurrences = count_event_occurrences(event_summary)
event_occurrences = event_occurrences.sort_values(by='count', ascending=False)
event_occurrences.head(len(event_occurrences))
event_occurrences.head(20)


Total sum of qualifying trials across all recordings: 3709
Total number of qualifying event types across all recordings: 201
Total number of neurons across all recordings: 499


Unnamed: 0,event,count
2,high_comp,31
4,high_comp_win,27
6,low_comp,27
9,low_comp_win,24
11,high_comp_lose,20
7,low_comp_lose,16
0,alone_rewarded,16
1,alone_rewarded_baseline,8
3,high_comp_lose_baseline,8
5,high_comp_win_baseline,8


In [43]:
trial_types = [
    'alone_rewarded', 'alone_rewarded_baseline', 'high_comp',
    'high_comp_lose', 'high_comp_lose_baseline', 'high_comp_win',
    'high_comp_win_baseline', 'low_comp', 'low_comp_lose',
    'low_comp_lose_baseline', 'low_comp_win', 'low_comp_win_baseline'
]

def total_neurons_per_trial_type(event_summary, trial_types):
    """
    Returns a dictionary with the total number of neurons for each trial type
    across all recordings, using 'qualifying_events' like a set.
    """
    total_neurons = {trial_type: 0 for trial_type in trial_types}

    for _, row in event_summary.iterrows():
        num_neurons = row['num_neurons']
        qualifying_events = row['qualifying_events']  # should be a list

        for trial_type in trial_types:
            if trial_type in qualifying_events:
                total_neurons[trial_type] += num_neurons

    return total_neurons


# Usage
total_neurons = total_neurons_per_trial_type(event_summary, trial_types)
# Display the total number of neurons for each trial type
for trial_type, count in total_neurons.items():
    print(f"Total neurons for {trial_type}: {count}")

Total neurons for alone_rewarded: 279
Total neurons for alone_rewarded_baseline: 159
Total neurons for high_comp: 499
Total neurons for high_comp_lose: 323
Total neurons for high_comp_lose_baseline: 159
Total neurons for high_comp_win: 446
Total neurons for high_comp_win_baseline: 159
Total neurons for low_comp: 462
Total neurons for low_comp_lose: 264
Total neurons for low_comp_lose_baseline: 159
Total neurons for low_comp_win: 396
Total neurons for low_comp_win_baseline: 159


In [44]:
# save table to csv and put in the same directory 
table.to_csv('spike_collection_event_counts.csv', index=False)


In [45]:
event_summary.to_csv('event_summary.csv', index=False)

In [46]:
len(new_sp.recordings)

39

In [60]:
def filter_recordings_by_event_counts(table):
    """
    Filters the table to include only recordings that have:
    - >= 3 trials for every event type (except specified columns),
    - > 1 trial for 'win',
    - Skips 'recording', 'num_neurons', 'overall_pretone'.
    Includes debug printouts for unexpected types or values.
    """
    skip_cols = {'recording', 'num_neurons', 'overall_pretone'}
    no_wins = 0
    no_losses = 0
    no_alone_rewarded = 0

    no_wins_recordings = []
    no_losses_recordings = []
    no_alone_rewarded_recordings = []
    
    # Print column names at the start
    print("\n=== Column Names ===")
    print(list(table.columns))
    print("====================\n")

    for index, row in table.iterrows():

        for col in table.columns:
            if col in skip_cols:
                continue

            value = row[col]

            if col == 'win':
                if pd.isna(value):
                    print(f"    [WARN] 'win' is NaN for recording: {row.get('recording', 'N/A')}")
                elif value == 0:
                    print(f"\n    [INFO] no wins for recording: {row.get('recording', 'N/A')}")
                    no_wins += 1
                    no_wins_recordings.append(row.get('recording', 'N/A'))

                if value <= 1:
                    print()
                    print(f"    [FAIL] Not enough wins: {value} (<= 1) for {row.get('recording', 'N/a')} \n")


            elif col == 'lose':
                if pd.isna(value):
                    print(f"    [WARN] 'lose' is NaN for recording: {row.get('recording', 'N/A')}")
                elif value == 0:
                    print(f"    [INFO] no losses for recording: {row.get('recording', 'N/A')}")
                    no_losses += 1
                    no_losses_recordings.append(row.get('recording', 'N/A'))


            elif col == 'alone_rewarded':
                if pd.isna(value):
                    print(f"    [WARN] 'alone_rewarded' is NaN for recording: {row.get('recording', 'N/A')}")
                elif value == 0:
                    print(f"    [INFO] no alone_rewarded for recording: {row.get('recording', 'N/A')}")
                    no_alone_rewarded += 1
                    no_alone_rewarded_recordings.append(row.get('recording', 'N/A'))

    print(f"Number of recordings with no wins: {no_wins}")
    print(f"Number of recordings with no losses: {no_losses}")
    print(f"Number of recordings with no alone_rewarded: {no_alone_rewarded}")
    print(f"Recordings with no wins: {no_wins_recordings}")
    print(f"Recordings with no losses: {no_losses_recordings}")
    print(f"Recordings with no alone_rewarded: {no_alone_rewarded_recordings}")


filter_recordings_by_event_counts(table)


=== Column Names ===
['recording', 'num_neurons', 'alone_rewarded', 'alone_rewarded_baseline', 'high_comp', 'high_comp_lose', 'high_comp_lose_baseline', 'high_comp_win', 'high_comp_win_baseline', 'lose', 'low_comp', 'low_comp_lose', 'low_comp_lose_baseline', 'low_comp_win', 'low_comp_win_baseline', 'overall_pretone', 'win']


    [FAIL] Not enough wins: 1 (<= 1) for 20230613_105657_standard_comp_to_training_D2_subj_1-4_t4b3L_box2_merged.rec 

    [INFO] no alone_rewarded for recording: 20230617_115521_standard_comp_to_omission_D1_subj_1-1_t1b3L_box1_merged.rec
    [INFO] no alone_rewarded for recording: 20230617_115521_standard_comp_to_omission_D1_subj_1-2_t2b2L_box2_merged.rec
    [INFO] no alone_rewarded for recording: 20230618_100636_standard_comp_to_omission_D2_subj_1-1_t1b2L_box2_merged.rec
    [INFO] no alone_rewarded for recording: 20230618_100636_standard_comp_to_omission_D2_subj_1-4_t4b3L_box1_merged.rec
    [INFO] no alone_rewarded for recording: 20230619_115321_standard_com

In [48]:
# loop through rows in table, create column with true/false for each row that has event types >= 3 trials and > 1 win
def filter_recordings_by_event_counts(table):
    """
    Filters the table to include only recordings that have:
    - >= 3 trials for every event type (except specified columns),
    - > 1 trial for 'win',
    - Skips 'recording', 'num_neurons', 'overall_pretone'.
    """
    skip_cols = {'recording', 'num_neurons', 'overall_pretone'}
    filtered_rows = []

    for _, row in table.iterrows():
        meets_criteria = True
        for col in table.columns:
            if col in skip_cols:
                continue
            elif col == 'win':
                if row[col] == 0:
                    print(f"\nno wins for recording: {row['recording']}\n")
                if row[col] <= 1:
                    meets_criteria = False
                    break
            elif col == 'lose':
                if row[col] == 0:
                    print(f"\nno losses for recording: {row['recording']}\n")
            elif col == 'alone_rewarded':
                if row[col] == 0:
                    print(f"no alone_rewarded for recording: {row['recording']}")
            elif row[col] < 3:
                meets_criteria = False
                break
        
        if meets_criteria:
            filtered_rows.append(row)

    return pd.DataFrame(filtered_rows)

# Usage
filtered_table = filter_recordings_by_event_counts(table)
filtered_table.head(20)

no alone_rewarded for recording: 20230617_115521_standard_comp_to_omission_D1_subj_1-1_t1b3L_box1_merged.rec
no alone_rewarded for recording: 20230617_115521_standard_comp_to_omission_D1_subj_1-2_t2b2L_box2_merged.rec
no alone_rewarded for recording: 20230618_100636_standard_comp_to_omission_D2_subj_1-1_t1b2L_box2_merged.rec
no alone_rewarded for recording: 20230618_100636_standard_comp_to_omission_D2_subj_1-4_t4b3L_box1_merged.rec
no alone_rewarded for recording: 20230619_115321_standard_comp_to_omission_D3_subj_1-4_t3b3L_box2_merged.rec
no alone_rewarded for recording: 20230620_114347_standard_comp_to_omission_D4_subj_1-1_t1b2L_box_2_merged.rec
no alone_rewarded for recording: 20230620_114347_standard_comp_to_omission_D4_subj_1-2_t3b3L_box_1_merged.rec
no alone_rewarded for recording: 20230621_111240_standard_comp_to_omission_D5_subj_1-4_t3b3L_box1_merged.rec
no alone_rewarded for recording: 20240317_151922_long_comp_subj_3-1_t6b6_merged.rec
no alone_rewarded for recording: 20240317_

Unnamed: 0,recording,num_neurons,alone_rewarded,alone_rewarded_baseline,high_comp,high_comp_lose,high_comp_lose_baseline,high_comp_win,high_comp_win_baseline,lose,low_comp,low_comp_lose,low_comp_lose_baseline,low_comp_win,low_comp_win_baseline,overall_pretone,win
2,20230612_112630_standard_comp_to_training_D1_subj_1-1_t1b3L_box2_merged.rec,15,40,40,14,10,40,4,40,13,6,3,40,3,40,160.0,7
3,20230612_112630_standard_comp_to_training_D1_subj_1-2_t2b2L_box1_merged.rec,20,40,40,14,4,40,10,40,7,6,3,40,3,40,160.0,13


In [49]:
def table_of_events_neurons_per_rec(spike_collection):
    """
    Returns a table with:
    - # of total neurons in recording
    - # of events of each type
    - # of neurons that have at least 3 trials for *every* event type
    """
    rows = []
    for rec in spike_collection.recordings:
        num_neurons = rec.good_neurons

        # Count how many events of each type (all neurons combined)
        event_counts = {event: len(rec.event_dict[event]) for event in rec.event_dict.keys()}

        print('\nEvent counts for recording {}: {}\n'.format(rec.name, event_counts))

        # Now filter neurons that have >= 3 events in EACH event type
        qualifying_neurons = 0
        for neuron_id in range(num_neurons):
            meets_criteria = True
            for event_type, event_trials in rec.event_dict.items():
                # Count how many trials for this neuron
                print(f"\nChecking neuron {neuron_id} for event type '{event_type}' with {event_trials}\n")

                neuron_trials = [trial for trial in event_trials if trial['neuron'] == neuron_id]
                
                if len(neuron_trials) < 3:
                    meets_criteria = False
                    break
            if meets_criteria:
                qualifying_neurons += 1

        # Build row
        row = {
            'recording': rec.name,
            'num_neurons': num_neurons,
            'qualified_neurons_all_events_≥3': qualifying_neurons,
            **event_counts
        }

        rows.append(row)

    table = pd.DataFrame(rows)
    return table

# Usage
table = table_of_events_neurons_per_rec(new_sp)
table.head(20)


Event counts for recording 20230612_101430_standard_comp_to_training_D1_subj_1-3_t3b3L_box2_merged.rec: {'alone_rewarded': 40, 'alone_rewarded_baseline': 40, 'high_comp': 9, 'high_comp_lose': 1, 'high_comp_lose_baseline': 40, 'high_comp_win': 8, 'high_comp_win_baseline': 40, 'lose': 7, 'low_comp': 11, 'low_comp_lose': 6, 'low_comp_lose_baseline': 40, 'low_comp_win': 5, 'low_comp_win_baseline': 40, 'overall_pretone': 160, 'win': 13}


Checking neuron 0 for event type 'alone_rewarded' with [[1849109.9  1859109.9 ]
 [1909109.85 1919109.95]
 [1964109.8  1974109.9 ]
 [2029109.75 2039109.75]
 [2084109.85 2094109.7 ]
 [2134109.8  2144109.7 ]
 [2184109.75 2194109.75]
 [2289109.55 2299109.65]
 [2409109.6  2419109.6 ]
 [2529109.4  2539109.4 ]
 [2639109.3  2649109.3 ]
 [2724109.35 2734109.35]
 [2844109.3  2854109.15]
 [2904109.1  2914109.1 ]
 [2969109.05 2979109.15]
 [3029109.15 3039109.  ]
 [3104108.95 3114109.1 ]
 [3204108.9  3214109.  ]
 [3299108.95 3309108.8 ]
 [3349108.9  3359108.9 ]
 [1849

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices