In [1]:
import os
import h5py
from scipy.io import loadmat
import numpy as np
import random
import matplotlib.pyplot as plt
import pickle



In [2]:
# Choose which days to include
#day_list = ['20181105', '20181102', '20181101']

# Or, read in list of days from txt file
day_list = list()
with open(f'data/combined/days.txt', 'r') as file:
    for line in file:
        day_list.append(line.strip())

num_sess = len(day_list)
num_goals = 6
tbin_size = 0.1

In [3]:
good_cell_labels = list()
with open('data/cell_list_hm.txt', 'r') as file:
    for line in file:
        line = line.strip().split('/')
        good_cell_labels.append(f'{line[5]}ch{str(int(line[8][7:]))}c{str(int(line[9][4:]))}')
print(good_cell_labels)

['20181105ch29c1', '20181105ch30c1', '20181105ch43c1', '20181105ch43c2', '20181105ch43c3', '20181105ch45c1', '20181102ch9c1', '20181102ch19c1', '20181102ch19c2', '20181102ch26c1', '20181102ch26c2', '20181102ch29c1', '20181102ch30c1', '20181102ch30c2', '20181102ch31c1', '20181102ch43c1', '20181102ch43c2', '20181102ch45c1', '20181102ch45c2', '20181101ch19c1', '20181101ch19c2', '20181101ch21c1', '20181101ch23c1', '20181101ch29c1', '20181101ch29c2', '20181101ch29c3', '20181101ch29c4', '20181101ch30c1', '20181101ch30c2', '20181101ch35c1', '20181101ch43c1', '20181101ch45c1', '20181031ch19c1', '20181031ch19c2', '20181031ch19c3', '20181031ch26c1', '20181031ch26c2', '20181031ch29c1', '20181031ch30c1', '20181031ch30c2', '20181031ch35c1', '20181031ch35c2', '20181031ch35c3', '20181031ch43c1', '20181031ch43c2', '20181031ch45c1', '20181031ch45c2', '20181026ch19c1', '20181026ch19c2', '20181026ch19c3', '20181026ch29c1', '20181026ch30c1', '20181026ch35c1', '20181026ch35c2', '20181026ch35c3', '20181026c

In [4]:
# Save directory for data files
prefix = "/Volumes/Hippocampus/Data/picasso-misc/"
save_dir = "data/placedist"
# Whether to overwrite preexisting files
overwrite = True
# Save Figures
to_save = True
figsave_dir = 'figures/population_prob_distribution'

In [5]:
#Read placeist files
all_place_responses = list()
all_place_durations = list()
all_cell_labels = list()

for day in day_list:
    with open(f'{save_dir}/{day}_data.pkl', 'rb') as file:
        data = pickle.load(file)
        num_sess_cells = len(data['cell_labels'])
        all_place_responses.extend(data['place_responses_per_cell'])
        for _ in range(num_sess_cells):
            all_place_durations.append(data['durations_per_place'])
        all_cell_labels.extend(data['cell_labels'])


In [6]:
# Set amount of bins, see which cells are valid for testing 
total_bins = 21
two_thirds = 20
valid_cells_distribution = list()

for day in day_list:
    with open(f'{save_dir}/{day}_data.pkl', 'rb') as file:
        data = pickle.load(file)
        place_responses_per_cell = data['place_responses_per_cell']
        cell_labels = data['cell_labels']
    for cell, label in enumerate(cell_labels):
        if label not in good_cell_labels:
            continue
        total_valid_bins = list()
        for bin, dist in place_responses_per_cell[cell].items():
            dist = dist[dist > 0]
            if dist.size > 2:
                total_valid_bins.append(bin)

        if len(total_valid_bins) > two_thirds:
            valid_cells_distribution.append(label)
            
print('Amount of valid cells:', len(valid_cells_distribution))
train_size = int(round((len(valid_cells_distribution) * 0.7),0))
test_size = int(round((len(valid_cells_distribution) * 0.3),0))
print(train_size)
print(test_size)
print(valid_cells_distribution)
list_nums = list(range(1,(len(valid_cells_distribution) + 1)))
valid_cell_to_num = dict(zip(valid_cells_distribution, list_nums))

print(valid_cell_to_num)
new_day_list = list()

for cell in valid_cells_distribution:
    new_day = cell[:8]
    new_day_list.append(new_day)


Amount of valid cells: 276
193
83
['20181105ch29c1', '20181105ch43c1', '20181105ch43c3', '20181105ch45c1', '20181102ch19c1', '20181102ch29c1', '20181102ch30c1', '20181102ch30c2', '20181102ch31c1', '20181102ch43c1', '20181102ch43c2', '20181101ch19c1', '20181101ch21c1', '20181101ch29c2', '20181101ch29c3', '20181101ch29c4', '20181101ch30c2', '20181101ch43c1', '20181031ch19c2', '20181031ch26c1', '20181031ch26c2', '20181031ch29c1', '20181031ch30c1', '20181031ch43c1', '20181031ch43c2', '20181026ch19c1', '20181026ch29c1', '20181026ch30c1', '20181026ch35c1', '20181026ch35c2', '20181026ch45c1', '20181022ch9c1', '20181022ch26c1', '20181022ch29c1', '20181022ch43c1', '20181022ch43c2', '20181022ch43c4', '20181022ch45c1', '20181022ch93c1', '20181017ch29c1', '20181017ch45c1', '20181016ch20c1', '20181016ch29c1', '20181016ch45c1', '20181015ch6c1', '20181015ch20c1', '20181015ch29c1', '20181015ch45c1', '20181011ch20c1', '20181011ch29c1', '20181011ch45c1', '20181011ch45c2', '20181011ch45c3', '20181010ch45

In [7]:
bins_data = {}
for day in day_list:
    with open(f'{save_dir}/{day}_data.pkl', 'rb') as file:
        data = pickle.load(file)
        place_responses_per_cell = data['place_responses_per_cell']
        cell_labels = data['cell_labels']
    for cell, label in enumerate(cell_labels):
        if label not in valid_cells_distribution:
            continue
        for bin, dist in place_responses_per_cell[cell].items():
            dist = dist[dist > 0]
            if bin not in bins_data:
                bins_data[bin] = []
            bins_data[bin].append(dist)


In [8]:
# normalized to the sum 
for key, distributions in bins_data.items():
    for i in range(len(distributions)):
        distribution = distributions[i]
        total = sum(distribution)
        distributions[i] = [float(i) / total for i in distribution]

In [9]:
all_place_responses = list()
all_place_durations = list()
all_cell_labels = list()

for day in day_list:
    with open(f'{save_dir}/{day}_data.pkl', 'rb') as file:
        data = pickle.load(file)
        num_sess_cells = len(data['cell_labels'])
        for i in range(num_sess_cells):
            cell_label = data['cell_labels'][i]
            if cell_label in valid_cells_distribution:
                all_place_responses.append(data['place_responses_per_cell'][i])
                all_place_durations.append(data['durations_per_place'])
                all_cell_labels.append(cell_label)

num_all_cells = len(all_cell_labels)

In [None]:
for cell, place_responses_per_cell in enumerate(all_place_responses):

    num_bins = 25
    response_params_per_cell = np.zeros((num_bins+1, 3))
    place_durations_per_cell = all_place_durations[cell]
    for bin, dist in place_responses_per_cell.items():
        total = sum(dist)
        dist= [float(i) / total for i in dist]
        response_params_per_cell[bin,0] = np.mean(dist)
        response_params_per_cell[bin,1] = np.std(dist, ddof=1)
        response_params_per_cell[bin,2] = np.sum(place_durations_per_cell[bin])
    response_params_per_cell[:,2] = response_params_per_cell[:,2] / np.sum(response_params_per_cell[:,2])
    all_place_responses[cell] = response_params_per_cell
all_place_responses = np.array(all_place_responses)

# Clean up large memory variables
del all_place_durations

In [None]:
from scipy.stats import lognorm
from scipy.stats import expon
from collections import Counter

class BayesDecoder:
    def __init__(self, dist):
        self.dist = dist
        self.num_cells = dist.shape[0]
        self.num_bins = dist.shape[1]
        
    def gaussian_pdf(x, mu, sig):
        return (1 / np.sqrt(2 * np.pi * sig**2)) * np.exp(-(x - mu)**2 / (2 * sig**2))
    
    def __likelihood(self, x, cell, bin):
        mu, sig = self.dist[cell,bin,0], self.dist[cell,bin,1]
        return BayesDecoder.gaussian_pdf(x, mu, sig)
    
    def __predict_cell(self, x, cell):
        posterior = np.zeros(self.num_bins)
        for bin in range(1, self.num_bins):
            prior = self.dist[cell,bin,2]
            posterior[bin] = prior * self.__likelihood(x, cell, bin)
        posterior = np.nan_to_num(posterior, nan=-np.inf, posinf=np.inf, neginf=-np.inf)
        pred = np.argmax(posterior[1:])
        return pred, posterior[pred] 

    def predict(self, x):
        prediction, confidence = np.zeros(self.num_cells), np.zeros(self.num_cells)
        for cell in range(self.num_cells):
            pred, conf = self.__predict_cell(x[cell], cell)
            prediction[cell] = pred
            confidence[cell] = conf
        unique_preds, counts = np.unique(prediction, return_counts=True)
        #return lines
        most_common_pred = unique_preds[np.argmax(counts)]
        counts = Counter(prediction)
        most_common = counts.most_common(5)
        total_count = len(prediction)
        unique_proportions = {int(unique + 1): f'{round((count / total_count) * 100, 2)}%' for unique, count in counts.items()}
        cleaned_proportions = {k: float(v.strip('%')) for k, v in unique_proportions.items()}

        top_three_bins = [num + 1 for num, _ in most_common]

        return cleaned_proportions 
    

In [None]:
place_decoder = BayesDecoder(all_place_responses)
print(len(valid_cells_distribution))

In [None]:
# Refresh averages
from collections import defaultdict

cumulative_sums = defaultdict(lambda: defaultdict(float))
counts = defaultdict(lambda: defaultdict(int))

num_runs = 0

In [None]:
def update_running_average(new_run):
    for bin_num, percentages in new_run.items():
        for b, percentage in percentages.items():
            cumulative_sums[bin_num][b] += percentage
            counts[bin_num][b] += 1

In [None]:
num_iterations = 80
for _ in range(num_iterations):
    random_values = {}
    
    for key, distributions in bins_data.items():
        key_random_values = [random.choice(distribution) for distribution in distributions]
        random_values[key] = key_random_values
    
    predictions = {}
    for key in bins_data.keys():
        bin_prediction_list = random_values[key]
        predicted_bin = place_decoder.predict(bin_prediction_list)
        sorted_percentages = dict(sorted(predicted_bin.items(), key=lambda item: item[1], reverse=True))
        predictions[key] = sorted_percentages
    
    update_running_average(predictions)
    num_runs += 1
    print('Number of Samples:', num_runs)


In [None]:
averages = defaultdict(dict)
for bin_num, sum_dict in cumulative_sums.items():
    for b, total_sum in sum_dict.items():
        averages[bin_num][b] = total_sum / counts[bin_num][b]

print("Averages of percentages across all runs:")
for bin_num, avg_dict in sorted(averages.items()):
    print(f"Bin {bin_num}:")
    for b, avg in sorted(avg_dict.items()):
        print(f"  Bin {b}: {avg:.2f}%")