In [None]:
#redesigning code for parallel processing
import matlab.engine
import numpy as np 
import pandas as pd
import math
import matplotlib.pyplot as plt
import os
import time
from sklearn import preprocessing
from scipy import stats
import place_cell_multifunctions as pcm 
from functools import partial
import multiprocessing as mp

#read in events data
events_data=pd.read_csv(r'C:\Users\fgobbo\Meera Thesis\H2203_translated_events_data\H2203_PAL_events_translate.csv', sep=',', error_bad_lines=False, index_col=False, dtype='unicode')

#session and stage to have mutual information scores calculate
Session='P02'
Stage='PRE'

#constrains dataframe to session, stage, and moving points    
ss_data=events_data[events_data['Session']==Session]
ss_data=ss_data[ss_data['stage']== Stage]
ss_data=ss_data[ss_data['Movement status']=='moving']

#seperates cell data
cell_events=ss_data[[col for col in ss_data if col.startswith(' C')]]

#defining bins in the arena (10X10cm) - designed to include all values (incl startbox) 
x_bins=np.linspace(4.8, 94.8, 9, endpoint=False)
x_bins = np.insert(x_bins, 0, -5.2, axis=0)
x_bins = np.insert(x_bins, 10, 94.8, axis=0)
x_bins = np.insert(x_bins, 11, 104.8, axis=0)
y_bins=np.linspace(3.6, 93.6, 9, endpoint=False)
y_bins = np.insert(y_bins, 0, -6.4, axis=0)
y_bins = np.insert(y_bins, 10, 93.6, axis=0)
y_bins = np.insert(y_bins, 11, 103.6, axis=0)
y_bins = np.insert(y_bins, 12, 113.6, axis=0)
y_bins = np.insert(y_bins, 13, 123.6, axis=0)


#x, y coord read in 
x=ss_data['x']
y=ss_data['y']

#cell events to float#
cell_events=cell_events.astype(float)


#normalising all cell events to between 0 and 1 (min-max normalistion)
normed_cells=[]
for j in cell_events:
    cellist=cell_events[j].tolist()
    mino=min(cellist)
    maxo=max(cellist)
    sing=[]
    if maxo!=0:
        for i in cellist:
            sing.append((i-mino)/(maxo-mino))
        normed_cells.append(sing)
    else:
        normed_cells.append(cellist)
cell_events=pd.DataFrame(normed_cells)
cell_events=cell_events.transpose()
cell_events=cell_events.astype(float)


#binarise cell data
onevals=cell_events>=0.3
zerovals=cell_events<0.3
cell_events[onevals]=1
cell_events[zerovals]=0


#SORTING CRITERIA ONE - cell events per stage > 3 - if < 3 cell is dropped 
dropped_cells=[]
for i in cell_events:
    if cell_events[i].sum()<3:
        cell_events=cell_events.drop(i, 1)
        dropped_cells.append(i)

new_cell_index=cell_events.columns.tolist()

# SORTING CRITERIA TWO - maximal bin must be entered 5 times by animal # 

#formating x, y data to float for calculations#
newx=[]
newy=[]
for i in x:
    newx.append(float(i))
for j in y:
    newy.append(float(j))

x_b=x.tolist()
y_b=y.tolist()
for i in x_b:
    i = float(i)
for i in y_b:
    i = float(i)


    
#creation of a list of (i,j) values - where i denotes the x bin index and j denotes the y bin index 
#this is a chronological list of the bin the animal is occupying in each frame#
occupancy_map_x=[]
occupancy_map_y=[]
for x_c, y_c in zip(newx,newy):
    for val in range(len(y_bins)):
        if val<=len(y_bins)-1:
            if y_bins[val]<=y_c<=y_bins[val+1]:
                for valx in range(len(x_bins)):
                    if x_bins[valx]<=x_c<=x_bins[valx+1]:
                        occupancy_map_x.append(valx)
                        occupancy_map_y.append(val)
OM=list(zip(occupancy_map_x, occupancy_map_y))


#for each cell a list is created - for each frame, if the cell has an event, the corresponding spatial bin is appended, 
# if there is no event, a dummy value of 99999 is appended
event_bins=[]
for j in cell_events:
    cebins=[]
    i=0
    while i<len(cell_events[j]):
        if cell_events[j][i]==0:
            cebins.append(99999)
            i+=1
        else:
            loc=OM[i]
            cebins.append(loc)
            i+=1
    event_bins.append(cebins)
    
    
#FINDING EACH CELLS MAXIMALLY FIRING BINS
all_locs=[] #list of lists - each sublist represents the cells maximal bins 
p=0
while p<len(event_bins): #going through cell by cell
    uniq=[]  #finding all unique firing locations
    q=0
    while q<len(event_bins[p]):
        if event_bins[p][q] not in uniq:
            if event_bins[p][q]!=99999:  #appending unique event locations
                uniq.append(event_bins[p][q])
                q+=1
            else:
                q+=1
        else:
            q+=1
    #counting the number of events in for each bin where an event occurs
    freq=[]
    for r in uniq:
        freq.append(event_bins[p].count(r))#counting frequency of each event bin
    
    #in case freq is an empty list append a dummy value (an entry of 4 maximal bins will not be read by later code)
    if not freq:
        all_locs.append([[7,7],[5,5],[3,5,6], [6,7,7]])
        p+=1
    #finding the bins with the maximal number of events - if multiple bins have the maximal number of events, all bins are stored
    else:
        m=max(freq)
        pos=[i for i, j in enumerate(freq) if j == m]
        cell_max_locs=[]
        z=0
        while z<len(pos):
            cell_max_locs.append(uniq[pos[z]])
            z+=1
        all_locs.append(cell_max_locs)
        p+=1

## FINDING NUMBER OF ENTRIES INTO MAXIMAL BIN ##
threshold=5
i=0
todrop=[]
while i<len(all_locs):
    if len(all_locs[i])==1:
        for p, q in all_locs[i]:
            j=0
            entries=[]
            while j<len(x): # sorts for points where the animal is in the maximal firing bin but wasnt in the previous frame (i.e entries)
                if x_bins[p]<=float(x_b[j])<=x_bins[p+1]:
                    if y_bins[q]<=float(y_b[j])<y_bins[q+1]:
                        if float(x_b[j-1])<x_bins[p] or float(x_b[j-1])>x_bins[p+1] or float(y_b[j-1])<y_bins[q] or float(y_b[j-1])>y_bins[q+1]:
                            entries.append(1) # counts the number of entries
                            j+=1
                        else:
                            j+=1
                    else:
                        j+=1
                else:
                    j+=1
        entries=entries.count(1)
        if entries<threshold: # if the bin is entered less than 5 times the cell_id is appended to a list to be dropped (iteratively dropping cells confuses index)
            todrop.append(new_cell_index[i])
            i+=1
        else:
            i+=1
    else:
        todrop.append(new_cell_index[i])
        i+=1
for i in todrop:  #drops cells where the maximal bin is entered less than 5 times
    cell_events=cell_events.drop(i, 1)
    dropped_cells.append(i)

#stores the new dataframe with all dropped cells
new_cell_index=cell_events.columns.tolist()
event_arr = cell_events.to_numpy().transpose()

#occupie represents the output of a function in pcm that puts x,y data in the correct format for CDM estimation 
#occupie is a (b x f) matrix, b=the number of spatial bins (each index represents a bin in the arena), f=total frames in that session
#each row in occupie has one 1 value and the rest are 0's, the 1's position represents the bin occupied in that frame
occupie=pcm.spatial_occupancy(x,y)

#calls function that calculates spatial entropy (spatial entropy is the same for each cell)
spatial_en=pcm.spatial_entropy(occupie)

## just to check cell_events data has been read correctly ##
cell_events

In [None]:
## CREATES 1000 SHUFFLES OF THE EVENTS DATA FOR EACH CELL ##

import random
shuffles=1000
shuff_inputs=[]
i=0
while i<len(event_arr):
    new_list=event_arr[i].tolist()
    q=0
    while q<len(range(shuffles)):
        new_list=random.sample(new_list, len(new_list))
        shuff_inputs.append(new_list)
        q+=1
    i+=1


In [None]:
# Starts multipool processing #
# CALCULATES MI VALUE (in bits) FOR EACH CELL #

if __name__ == '__main__':
    start_time = time.time() # records time taken to carry out function
    p=mp.Pool(processes=5)   # opens a pool of 5 processes
    mutual_infos=p.map(partial(pcm.mutual_information, occupied=occupie, spatial_ent=spatial_en), event_arr) #calculates mutualinformation in parallel for every cell
    print(time.time()-start_time)
    p.close() #closes and joins pool
    p.join()
    
# CELL MUTUAL INFORMATION OUTPUT (if you need to call all cells real MI Val) = mutual_info #

In [None]:
# CALCULATES MUTUAL INFORMATION VALUES FOR SHUFFLED EVENTS DATA #
if __name__ == '__main__':
    start_time = time.time()
    p=mp.Pool(processes=5)
    shuffled_dist=p.map(partial(pcm.mutual_information, occupied=occupie, spatial_ent=spatial_en), shuff_inputs)
    print(time.time()-start_time)
    p.close()
    p.join()
    
# SHUFFLED DATA MUTUAL INFORMATION OUTPUT = shuffled_dist #
# for cell i - corresponding shuffled MI vals are shuffled_dist[i*1000:(i+1)*1000] - note this corresponds to the sorted cell data frame

In [None]:
#selects MI and SD files for a stage 
MI_vals=mutual_infos
SD_vals=shuffled_dist
#runs through each cell and finds its percentile against the shuffled values/stores to perc
i=0
percs=[]
while i<len(MI_vals['MI']):
    SD=SD_vals['dist_vals'][i*1000:(i+1)*1000]
    percs.append(stats.percentileofscore(SD, MI_vals['MI'][i]))
    i+=1
    

In [None]:
## creates a data frame of cell_id, MI, percentiles and stores##
##creates a dataframe of the shuffled distribution and stores##
cell_MI={'cell_id':new_cell_index, 'MI':mutual_infos, 'percentile':percs}
cell_MI=pd.DataFrame(cell_MI)
cell_MI.to_csv('E:\Meera\Mutual Information\H2203_P02_PRE_MI.csv')
dist_scores={'dist_vals':shuffled_dist}
dist_scores=pd.DataFrame(dist_scores)
dist_scores.to_csv('E:\Meera\Mutual Information\H2203_P02_PRE_SD.csv')