In [250]:
import os
import h5py
from scipy.io import loadmat
import numpy as np
import random
from preproc import *
from itertools import combinations
from scipy.special import factorial
import statsmodels.api as sm
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.animation import FuncAnimation
from matplotlib.gridspec import GridSpec
from matplotlib.ticker import StrMethodFormatter
from matplotlib.lines import Line2D
from matplotlib.patches import Rectangle
from matplotlib.colors import ListedColormap
from matplotlib.widgets import Slider
from IPython.display import HTML
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.datasets import make_classification
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, mean_squared_error, classification_report
import textwrap
import pickle
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, Masking
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.base import BaseEstimator, ClassifierMixin



In [251]:
# Choose which days to include
#day_list = ['20181105', '20181102', '20181101']

# Or, read in list of days from txt file
day_list = list()
with open(f'data/combined/days.txt', 'r') as file:
    for line in file:
        day_list.append(line.strip())

num_sess = len(day_list)
num_goals = 6
tbin_size = 0.1

In [252]:
good_cell_labels = list()
with open('data/cell_list_hm.txt', 'r') as file:
    for line in file:
        line = line.strip().split('/')
        good_cell_labels.append(f'{line[5]}ch{str(int(line[8][7:]))}c{str(int(line[9][4:]))}')
print(good_cell_labels)

['20181105ch29c1', '20181105ch30c1', '20181105ch43c1', '20181105ch43c2', '20181105ch43c3', '20181105ch45c1', '20181102ch9c1', '20181102ch19c1', '20181102ch19c2', '20181102ch26c1', '20181102ch26c2', '20181102ch29c1', '20181102ch30c1', '20181102ch30c2', '20181102ch31c1', '20181102ch43c1', '20181102ch43c2', '20181102ch45c1', '20181102ch45c2', '20181101ch19c1', '20181101ch19c2', '20181101ch21c1', '20181101ch23c1', '20181101ch29c1', '20181101ch29c2', '20181101ch29c3', '20181101ch29c4', '20181101ch30c1', '20181101ch30c2', '20181101ch35c1', '20181101ch43c1', '20181101ch45c1', '20181031ch19c1', '20181031ch19c2', '20181031ch19c3', '20181031ch26c1', '20181031ch26c2', '20181031ch29c1', '20181031ch30c1', '20181031ch30c2', '20181031ch35c1', '20181031ch35c2', '20181031ch35c3', '20181031ch43c1', '20181031ch43c2', '20181031ch45c1', '20181031ch45c2', '20181026ch19c1', '20181026ch19c2', '20181026ch19c3', '20181026ch29c1', '20181026ch30c1', '20181026ch35c1', '20181026ch35c2', '20181026ch35c3', '20181026c

In [253]:
# Save directory for data files
prefix = "/Volumes/Hippocampus/Data/picasso-misc/"
save_dir = "data/placedist"
# Whether to overwrite preexisting files
overwrite = True

In [254]:
#Read placeist files
all_place_responses = list()
all_place_durations = list()
all_cell_labels = list()

for day in day_list:
    with open(f'{save_dir}/{day}_data.pkl', 'rb') as file:
        data = pickle.load(file)
        num_sess_cells = len(data['cell_labels'])
        all_place_responses.extend(data['place_responses_per_cell'])
        for _ in range(num_sess_cells):
            all_place_durations.append(data['durations_per_place'])
        all_cell_labels.extend(data['cell_labels'])

In [255]:
# Set amount of bins, see which cells are valid for testing 
total_bins = 21
two_thirds = 20
valid_cells_distribution = list()

for day in day_list:
    with open(f'{save_dir}/{day}_data.pkl', 'rb') as file:
        data = pickle.load(file)
        place_responses_per_cell = data['place_responses_per_cell']
        cell_labels = data['cell_labels']
    for cell, label in enumerate(cell_labels):
        if label not in good_cell_labels:
            continue
        total_valid_bins = list()
        # if 2/3 of the bins contain over 30 samples, the cell is valid
        for bin, dist in place_responses_per_cell[cell].items():
            dist = dist[dist > 0]
            if dist.size > 2:
                total_valid_bins.append(bin)

        if len(total_valid_bins) > two_thirds:
            valid_cells_distribution.append(label)
            
print('Amount of valid cells:', len(valid_cells_distribution))
train_size = int(round((len(valid_cells_distribution) * 0.7),0))
test_size = int(round((len(valid_cells_distribution) * 0.3),0))
print(train_size)
print(test_size)
print(valid_cells_distribution)
list_nums = list(range(1,(len(valid_cells_distribution) + 1)))
valid_cell_to_num = dict(zip(valid_cells_distribution, list_nums))

print(valid_cell_to_num)
new_day_list = list()

for cell in valid_cells_distribution:
    new_day = cell[:8]
    new_day_list.append(new_day)


Amount of valid cells: 276
193
83
['20181105ch29c1', '20181105ch43c1', '20181105ch43c3', '20181105ch45c1', '20181102ch19c1', '20181102ch29c1', '20181102ch30c1', '20181102ch30c2', '20181102ch31c1', '20181102ch43c1', '20181102ch43c2', '20181101ch19c1', '20181101ch21c1', '20181101ch29c2', '20181101ch29c3', '20181101ch29c4', '20181101ch30c2', '20181101ch43c1', '20181031ch19c2', '20181031ch26c1', '20181031ch26c2', '20181031ch29c1', '20181031ch30c1', '20181031ch43c1', '20181031ch43c2', '20181026ch19c1', '20181026ch29c1', '20181026ch30c1', '20181026ch35c1', '20181026ch35c2', '20181026ch45c1', '20181022ch9c1', '20181022ch26c1', '20181022ch29c1', '20181022ch43c1', '20181022ch43c2', '20181022ch43c4', '20181022ch45c1', '20181022ch93c1', '20181017ch29c1', '20181017ch45c1', '20181016ch20c1', '20181016ch29c1', '20181016ch45c1', '20181015ch6c1', '20181015ch20c1', '20181015ch29c1', '20181015ch45c1', '20181011ch20c1', '20181011ch29c1', '20181011ch45c1', '20181011ch45c2', '20181011ch45c3', '20181010ch45

In [256]:
bins_data = {}
for day in day_list:
    with open(f'{save_dir}/{day}_data.pkl', 'rb') as file:
        data = pickle.load(file)
        place_responses_per_cell = data['place_responses_per_cell']
        cell_labels = data['cell_labels']
    for cell, label in enumerate(cell_labels):
        if label not in valid_cells_distribution:
            continue
        for bin, dist in place_responses_per_cell[cell].items():
            dist = dist[dist > 0]
            if bin not in bins_data:
                bins_data[bin] = []
            bins_data[bin].append(dist)


In [257]:
all_place_responses = list()
all_place_durations = list()
all_cell_labels = list()

for day in day_list:
    with open(f'{save_dir}/{day}_data.pkl', 'rb') as file:
        data = pickle.load(file)
        num_sess_cells = len(data['cell_labels'])
        for i in range(num_sess_cells):
            cell_label = data['cell_labels'][i]
            if cell_label in valid_cells_distribution:
                all_place_responses.append(data['place_responses_per_cell'][i])
                all_place_durations.append(data['durations_per_place'])
                all_cell_labels.append(cell_label)

num_all_cells = len(all_cell_labels)

In [274]:
for cell, place_responses_per_cell in enumerate(all_place_responses):

    num_bins = 25
    response_params_per_cell = np.zeros((num_bins+1, 3))
    place_durations_per_cell = all_place_durations[cell]
    for bin, dist in place_responses_per_cell.items():
        response_params_per_cell[bin,0] = np.mean(dist)
        response_params_per_cell[bin,1] = np.std(dist, ddof=1)
        response_params_per_cell[bin,2] = np.sum(place_durations_per_cell[bin])
    response_params_per_cell[:,2] = response_params_per_cell[:,2] / np.sum(response_params_per_cell[:,2])
    all_place_responses[cell] = response_params_per_cell
all_place_responses = np.array(all_place_responses)

# Clean up large memory variables
del all_place_durations

NameError: name 'all_place_durations' is not defined

In [281]:
class BayesDecoder:
    def __init__(self, dist):
        self.dist = dist
        self.num_cells = dist.shape[0]
        self.num_bins = dist.shape[1]
        
    def gaussian_pdf(x, mu, sig):
        return (1 / np.sqrt(2 * np.pi * sig**2)) * np.exp(-(x - mu)**2 / (2 * sig**2))
    
    def __likelihood(self, x, cell, bin):
        mu, sig = self.dist[cell,bin,0], self.dist[cell,bin,1]
        return BayesDecoder.gaussian_pdf(x, mu, sig)
    
    def __predict_cell(self, x, cell):
        posterior = np.zeros(self.num_bins)
        for bin in range(1, self.num_bins):
            prior = self.dist[cell,bin,2]
            posterior[bin] = prior * self.__likelihood(x, cell, bin)
        pred = np.argmax(posterior[1:])
        return pred, posterior[pred] 

    def predict(self, x):
        prediction, confidence = np.zeros(self.num_cells), np.zeros(self.num_cells)
        for cell in range(self.num_cells):
            pred, conf = self.__predict_cell(x[cell], cell)
            prediction[cell] = pred
            confidence[cell] = conf
        unique_preds, counts = np.unique(prediction, return_counts=True)
        most_common_pred = unique_preds[np.argmax(counts)]
        return most_common_pred
    

In [260]:
del data

In [261]:
print(all_place_responses)

[[[0.00000000e+00 0.00000000e+00 0.00000000e+00]
  [2.24555862e+00 5.39497881e+00 1.58927991e-02]
  [1.00157394e+00 3.41496595e+00 4.71277368e-02]
  ...
  [1.00244706e+00 4.70171308e+00 4.55999788e-02]
  [1.43663235e+00 4.30521345e+00 3.63941067e-02]
  [2.31080100e+00 7.84911032e+00 8.63038720e-03]]

 [[0.00000000e+00 0.00000000e+00 0.00000000e+00]
  [4.76919996e+00 6.01389317e+00 1.58927991e-02]
  [4.08967694e+00 6.47445616e+00 4.71277368e-02]
  ...
  [4.47700009e+00 6.64845516e+00 4.55999788e-02]
  [3.08450473e+00 5.29533613e+00 3.63941067e-02]
  [2.29361674e+00 4.94233823e+00 8.63038720e-03]]

 [[0.00000000e+00 0.00000000e+00 0.00000000e+00]
  [5.50125550e+00 7.41865934e+00 1.58927991e-02]
  [6.24655441e+00 9.39579746e+00 4.71277368e-02]
  ...
  [7.02210046e+00 9.62004911e+00 4.55999788e-02]
  [6.90813213e+00 8.69705954e+00 3.63941067e-02]
  [8.42732926e+00 1.11972119e+01 8.63038720e-03]]

 ...

 [[0.00000000e+00 0.00000000e+00 0.00000000e+00]
  [5.34433067e+00 6.71782048e+00 1.2492

In [262]:
place_decoder = BayesDecoder(all_place_responses)
print(len(valid_cells_distribution))

276


In [282]:
random_values = {}
for key, distributions in bins_data.items():
    key_random_values = []
    for distribution in distributions:
        random_value = random.choice(distribution)
        key_random_values.append(random_value)
    random_values[key] = key_random_values


print(random_values)

bin_prediction_list = random_values[6]
dummy_list = range(1,277)
print(dummy_list)
predicted_bin = place_decoder.predict(dummy_list)
print(predicted_bin)

{3: [6.134916554345374, 12.984827322291654, 14.592448115447114, 19.518038968343724, 11.047867246675002, 5.183819900941603, 17.96274359380435, 5.208176775457311, 37.97915071377022, 11.833021209349566, 9.995179447515154, 5.987146671882785, 5.100630866970125, 5.914743357737393, 5.228950116647088, 12.897551079250993, 8.69247539772313, 2.353093583453293, 4.96001152214542, 5.747697484424351, 6.2890194730788185, 20.69279328063466, 14.692560607429874, 10.579632712991494, 5.000282749718978, 10.201925070063652, 4.809299873292327, 1.2951267356736749, 5.846783350611743, 8.733290323006582, 5.154007055424115, 5.052008832920536, 37.971358814498224, 10.363978409314244, 5.883006297938657, 5.262762987719474, 7.093328653220883, 5.405673080050725, 5.3122702719256525, 4.784447670954674, 11.492766388773704, 5.0487219779925265, 15.889784463125826, 12.601649393096709, 2.9150875103007836, 8.923836985642641, 13.574580106774963, 21.391610009297352, 3.3328332313273994, 3.21519432012819, 24.198605531603576, 8.2859

  return (1 / np.sqrt(2 * np.pi * sig**2)) * np.exp(-(x - mu)**2 / (2 * sig**2))
  return (1 / np.sqrt(2 * np.pi * sig**2)) * np.exp(-(x - mu)**2 / (2 * sig**2))
