In [1]:
import os
import numpy as np
from scipy.io import loadmat
import h5py
from preproc import *

In [2]:
# Specify which day's dataset to use
prefix = "/Volumes/Hippocampus/Data/picasso-misc/"
day_dir = "20181102"

In [3]:
# Get list of cells under the day directory
os.system(f"sh ~/Documents/neural_decoding/Hippocampus_Decoding/get_cells.sh {day_dir}")
cell_list = list()
with open("cell_list.txt", "r") as file:
    for line in file.readlines():
        cell_list.append(line.strip())
os.system("rm cell_list.txt")

# Load data from rplparallel.mat object, extract trial markers, time stamps and session start timestamp
rp = h5py.File(prefix + day_dir + "/session01/rplparallel.mat")
rp = rp.get('rp').get('data')
trial_markers = np.array(rp.get('markers'))
cue_intervals = np.array(rp.get('timeStamps'))
session_start_time = np.round(np.array(rp.get('session_start_sec'))[0,0], 3)

# Load data and extract spike times from all spiketrain.mat objects
spike_times = list()
cell_labels = list()
for cell_dir in cell_list:
    spk = loadmat(prefix + day_dir + "/session01/" + cell_dir + "/spiketrain.mat")
    spk = spk.get('timestamps').flatten() # spike timestamps is loaded in as a column vector
    spk = spk / 1000 # convert spike timestamps from msec to sec
    spike_times.append(spk)
    
    cell_name = cell_dir.split('/')
    array, channel, cell = cell_name[0][6:], cell_name[1][7:], cell_name[2][5:]
    if channel[0] == '0':
        channel = channel[1:]
    cell_labels.append(f'a{array}/ch{channel}/c{cell}')

# Load data from vmpv.mat object, extract session end timestamp
pv = h5py.File(prefix + day_dir + "/session01/1vmpv.mat")
pv = pv.get('pv').get('data')
session_end_time = np.round(np.array(pv.get('rplmaxtime'))[0,0], 3)

In [4]:
# Get poster numbers from trial markers, cue phase time intervals
trial_markers = trial_markers[0,:] % 10
trial_markers = trial_markers.astype(int)
cue_intervals = cue_intervals[0:2,:].T

# Slot spikes into cue phase intervals for each trial
spikecounts_per_trial = spike_counts_per_observation(cue_intervals, spike_times)

# Bin spike counts within each cell for cue phases
binned_spikes_per_trial = np.empty_like(spikecounts_per_trial)
for col in range(spikecounts_per_trial.shape[1]):
    binned_spikes_per_trial[:,col] = bin_firing_rates_3(spikecounts_per_trial[:,col])


In [5]:
# # Bin entire session into 1-second time bins
# session_intervals = np.arange(session_start_time, session_end_time, 1)
# session_intervals = np.vstack((session_intervals[:-1], session_intervals[1:])).T

# Generate time intervals for navigation phases
nav_intervals = np.empty_like(cue_intervals)
nav_intervals[:,0] = cue_intervals[:,1]
nav_intervals[:-1,1] = cue_intervals[1:,0]
nav_intervals[-1,1] = session_end_time

# Bin entire session into 1-second time bins, aligned to the end of each cue phase for each trial
session_intervals = list()
for idx, intvl in enumerate(nav_intervals):
    session_intervals.append(cue_intervals[idx,:])
    nav_start, nav_end = intvl
    for time in np.arange(nav_start, nav_end - 1, 1):
        session_intervals.append(np.array([time, time + 1]))
session_intervals = np.array(session_intervals)

# Slot spikes into session time intervals
spikecounts_across_session = spike_counts_per_observation(session_intervals, spike_times)

# Bin spike counts within each cell for entire sesion
binned_spikes_across_session = np.empty_like(spikecounts_across_session)
for col in range(spikecounts_across_session.shape[1]):
    binned_spikes_across_session[:,col] = bin_firing_rates_3(spikecounts_across_session[:,col])

In [6]:
def group_by_goal(timeseries: np.array, goals: np.array) -> list:
    num_goals = 6
    grouped = [np.empty((0, timeseries.shape[1])) for _ in range(num_goals)]
    for idx, goal in enumerate(goals):
        goal = int(goal - 1)
        grouped[goal] = np.vstack((grouped[goal], timeseries[idx,:]))
    return grouped

In [7]:
# Group cue phase spikes according to goal
responses_per_goal = group_by_goal(binned_spikes_per_trial, trial_markers)

# Get distribution of occurences of each goal
num_responses_per_goal = np.array(list(map(lambda arr: arr.shape[0], responses_per_goal)))
total_goal_responses = np.sum(num_responses_per_goal)

In [8]:
# Get distribution of population responses across all cue phases
response_distribution_cues = map_response_distribution_popl(binned_spikes_per_trial)
response_distribution_cues_keys = list(response_distribution_cues.keys())
total_unique_responses_cues = len(response_distribution_cues_keys)
total_responses_cues = sum(response_distribution_cues.values())

In [9]:
# Get distribution of population responses aross entire session
response_distribution_session = map_response_distribution_popl(binned_spikes_across_session)
response_distribution_session_keys = list(response_distribution_session.keys())
total_unique_responses_session = len(response_distribution_session_keys)
total_responses_session = sum(response_distribution_session.values())

In [10]:
# # Get distribution of population responses across entire session, accounting for different binning windows
# response_distribution_extended = response_distribution_session.copy()
# for i in range(1, 10):
#     timeshift = 0.1 * i
#     # Generate time intervals
#     extended_intervals = np.arange(session_start_time + timeshift, session_end_time, 1)
#     extended_intervals = np.vstack((extended_intervals[:-1], extended_intervals[1:])).T
#     # Slot in spikes
#     spikecounts = spike_counts_per_observation(extended_intervals, spike_times)
#     # Bin spike counts
#     binned_spikes = np.empty_like(spikecounts)
#     for col in range(spikecounts.shape[1]):
#         binned_spikes[:,col] = bin_firing_rates_3(spikecounts[:,col])
#     # Add to response distribution map
#     response_distribution_extended = map_response_distribution_popl(binned_spikes, dist=response_distribution_extended)

# response_distriubtion_extended_keys = list(response_distribution_extended.keys())
# total_unique_responses_extended = len(response_distriubtion_extended_keys)
# total_responses_extended = sum(response_distribution_extended.values())

In [11]:
def information_per_stimulus(responses_per_stimulus: np.array, response_dist: dict) -> float:
    responses_per_stimulus_dist = map_response_distribution_popl(responses_per_stimulus)
    total_responses, total_responses_in_stimulus = sum(response_dist.values()), sum(responses_per_stimulus_dist.values())
    res = 0
    for response in responses_per_stimulus_dist:
        P_r_given_s = responses_per_stimulus_dist[response] / total_responses_in_stimulus
        P_r = response_dist.get(response, 1) / total_responses
        res += P_r_given_s * np.log2(P_r_given_s / P_r)
    return res

In [12]:
# (Unconditioned) Entropy across cue phases
entropy_cue = 0
for obs in response_distribution_cues:
    P_r = response_distribution_cues[obs] / total_responses_cues
    entropy_cue -= P_r * np.log2(P_r)

print(entropy_cue)

8.561812481597263


In [13]:
# (Unconditioned) Entropy across entire session
entropy_session = 0
for obs in response_distribution_session:
    P_r = response_distribution_session[obs] / total_responses_session
    entropy_session -= P_r * np.log2(P_r)

print(entropy_session)

9.737103540988251


In [14]:
# (Conditioned) Entropy across responses for each goal
entropy_responses = list()
for goal_responses in responses_per_goal:
    entropy_goal = 0
    goal_responses_dist = map_response_distribution_popl(goal_responses)
    goal_responses_total = sum(goal_responses_dist.values())
    for obs in goal_responses_dist:
        P_r_s = goal_responses_dist[obs] / goal_responses_total
        entropy_goal -= P_r_s * np.log2(P_r_s)
    entropy_responses.append(entropy_goal)

print(entropy_responses)

[5.96875, 6.093409091335662, 6.234120167580205, 6.548548539271539, 6.044394119358462, 5.913787860007857]


In [15]:
# Calculate information per stimulus for each goal (using responses from cue phase only)
information_per_goal_cues = list()
for goal in responses_per_goal:
    information_per_goal_cues.append(information_per_stimulus(goal, response_distribution_cues))

# Calculate mutual informtion across cues (using responses from cue phase only)
goal_mutual_information_cues = 0
for goal, info in enumerate(information_per_goal_cues):
    P_s = num_responses_per_goal[goal] / total_goal_responses
    goal_mutual_information_cues += P_s * info

print(goal_mutual_information_cues)

2.3924284203679247


In [16]:
# Calculate information per stimulus for each goal (using responses across entire session)
information_per_goal_session = list()
for goal in responses_per_goal:
    information_per_goal_session.append(information_per_stimulus(goal, response_distribution_session))

# Calculate mutual informtion across cues (using responses across entire session)
goal_mutual_information_session = 0
for goal, info in enumerate(information_per_goal_session):
    P_s = num_responses_per_goal[goal] / total_goal_responses
    goal_mutual_information_session += P_s * info

print(goal_mutual_information_session)

5.557695341161491


In [17]:
# # Calculate information per stimulus for each goal (using extended response distribution) map
# information_per_goal_extended = list()
# for goal in responses_per_goal:
#     information_per_goal_extended.append(information_per_stimulus(goal, response_distribution_extended))

# # Calculate mutual informtion across cues (using extended response distribution map)
# goal_mutual_information_extended = 0
# for goal, info in enumerate(information_per_goal_extended):
#     P_s = num_responses_per_goal[goal] / total_goal_responses
#     goal_mutual_information_extended += P_s * info

# print(goal_mutual_information_extended)

In [18]:
# Shannon entropy across goals
goal_entropy = 0
for goal in range(6):
    P_s = num_responses_per_goal[goal] / total_goal_responses
    goal_entropy -= P_s * np.log2(P_s)

print(goal_entropy)

2.565364163013027


In [19]:
np.log2(6)

2.584962500721156