In [1]:
#This program takes as input the combined Ca imaging and behavioural data for each session for a mouse
#It determines whether a cell is a place cell or not based on a spatial information comparison to shuffled data

In [2]:
#importing libraries
import pandas as pd
import numpy as np
import os
import math

In [3]:
#import the dataset
#this is the location of the csv file of the combined Ca imaging and behavioural data for the session
path='/Users/XXXXX'
Ca_data='XXXX_interpolated_data.csv' #session 1
Ca_data2='XXXX_interpolated_data.csv' #session 2
Ca_data3='XXXX_interpolated_data.csv' #session 3
Ca_data4='XXXX_interpolated_data.csv' #session 4

#ouput file name
newpath = path + r'/Speed0.01 Spike 5_occupancy0' 
if not os.path.exists(newpath):
    os.makedirs(newpath)
testName = 'XXXX'

In [4]:
#define variables and file locations

#defines the number of shuffles used when deterining significance
No_shuffles=2000

#define timestep size
Delta_time=0.049962

#define number of bins/gridcells
Gridcells_x=16
Gridcells_y=16
N_bins=Gridcells_x*Gridcells_y

#defines whether the data is pruned to remove the times the mouse doesn't move (1=pruning on, 0=pruning off)
pruning =1

#defines the threshold for counting events
Spike_threshold=5

#defines the threshold velocity for pruning (in m/s?)
#if the velocity is below this, it is pruned when pruning is on
speed_velocity = 1
Threshold_velocity=0.01
#defines the threshold velocity for pruning (in m/s?)
#If using n% of average speed,
average_speed = 0
speed_fraction=0.1

#threshold time in grid square (set to zero when speed thresholding is used)
#if the time in a grid square is below this, the activity in the grid square is zeroed 
threshold_time=0

#spatial info threshold %
Spat_inf_thresh=1


Function definitions begin here

In [5]:
#define spatial information function. 
#Takes as input an activity map (plus a column for duration per bin) and outputs the spatial information per cell
def spatial_info (df):

    total_time = df['Duration'].sum()
    bin_probability = df['Duration']/total_time
    total_events=df.drop(['Duration'], axis=1).sum()
    average_event_rate=total_events/total_time
    cell_activity=df.drop(['Duration'], axis=1)

    event_rate_per_bin=cell_activity.div(df['Duration'], axis=0).fillna(0)
    
    log_event_rate_per_bin = np.log2(event_rate_per_bin/average_event_rate).replace(-np.inf, 0)
    spatial_info_sum_term_two = (event_rate_per_bin/average_event_rate)
    product=log_event_rate_per_bin*spatial_info_sum_term_two
    result = product.mul(bin_probability, axis=0)
    spatial_info_result=result.sum()
    
    return spatial_info_result


In [6]:
#defines the activity map function. 
#Takes as input raw cell and behaioural data, outputs the activity pre bin per cell
def activity_map(input_df):
    df=input_df.drop(['Time','X center','Y center','Speed'], axis=1)
    df=df[df>0].groupby('Grid no.').count()
    return(df)

In [7]:
#define shuffling and recombining funtion
#takes the cell data as input and outputs a shuffled version (keeps behavioural data in place)
def shuffle (df):
    cell_data_only=df.iloc[:,5:]#N_cells+5] #get cell data only#
    behavioural_data_only=df.iloc[:,0:5] #get behavioural data only
    
    shuffled_cell_data = cell_data_only.sample(frac=1).reset_index(drop=True)
    recombined_data = pd.concat([behavioural_data_only, shuffled_cell_data], axis = 1)
    
    return(recombined_data)

In [8]:
#define velocity pruning function 
#prunes data with velocity less than threshold velocity from the input data
#prunes spikes with an amplitude less than spike threshold
if speed_velocity==1:
    def prune (df):
        #prune non-moving times
        pruned_velocity=df.drop(df[df.Speed < Threshold_velocity].index).reset_index(drop=True)
        #set spikes with size less than a threshold to zero  
        cell_data_only=pruned_velocity.iloc[:,5:N_cells+5] #get cell data only
        behavioural_data_only=pruned_velocity.iloc[:,0:5] #get behavioural data only
        cell_data_only=cell_data_only.mask(cell_data_only<Spike_threshold, 0) #mask cell data
        pruned_data=pd.concat([behavioural_data_only, cell_data_only], axis = 1) #recombine
        
        return(pruned_data)

In [9]:
#define velocity pruning function by using average speed 
#prunes data with velocity less than threshold velocity from the input data
#prunes spikes with an amplitude less than spike threshold
if average_speed==1:
    def prune (df):
        average_speed= df['Speed'].mean()
        thresh_speed=average_speed*speed_fraction
        pruned_velocity=df.drop(df[df.Speed < thresh_speed].index).reset_index(drop=True)
        #set spikes with size less than a threshold to zero  
        cell_data_only=pruned_velocity.iloc[:,5:N_cells+5] #get cell data only
        behavioural_data_only=pruned_velocity.iloc[:,0:5] #get behavioural data only
        cell_data_only=cell_data_only.mask(cell_data_only<Spike_threshold, 0) #mask cell data
        pruned_data=pd.concat([behavioural_data_only, cell_data_only], axis = 1) #recombine
        
        return(pruned_data)

# Main program begins here: Session 1

In [10]:
#first the spatial information for the cells is calculated

In [11]:
#read in the combined Ca imaging and behavioural data for one session, delete intermdiate data, rename speed column

interpolated_data_for_session=pd.read_csv(os.path.join(path, Ca_data))
interpolated_data_for_session=interpolated_data_for_session.drop(['Unnamed: 0','Interpolated speed', 'x-velocity','y-velocity'], axis=1)
interpolated_data_for_session=interpolated_data_for_session.rename({'Calculated speed': 'Speed'}, axis=1)
interpolated_data_for_session

Unnamed: 0,Time,X center,Y center,Speed,Grid no.,C000,C001,C002,C004,C005,...,C371,C374,C375,C376,C377,C378,C379,C380,C381,C382
0,0.000000,-0.163569,0.143291,0.0,34,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0
1,0.049962,-0.163569,0.143291,0.0,34,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,15.60207,0.0,0.0,0.0,0.0,0.0,0.0
2,0.099924,-0.163569,0.143291,0.0,34,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0
3,0.149886,-0.163569,0.143291,0.0,34,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0
4,0.199848,-0.163569,0.143291,0.0,34,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24011,1199.637582,0.147519,0.066533,0.0,94,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0
24012,1199.687544,0.147519,0.066533,0.0,94,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0
24013,1199.737506,0.147519,0.066533,0.0,94,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0
24014,1199.787468,0.147519,0.066533,0.0,94,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
#prune data
if pruning==1:
    interpolated_data_for_session=prune(interpolated_data_for_session)
interpolated_data_for_session    

Unnamed: 0,Time,X center,Y center,Speed,Grid no.,C000,C001,C002,C004,C005,...,C371,C374,C375,C376,C377,C378,C379,C380,C381,C382
0,0.399696,-0.163569,0.143291,0.027951,34,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0
1,0.449658,-0.161958,0.141010,0.129135,34,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0
2,0.499620,-0.156125,0.132751,0.101184,50,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0
3,0.599544,-0.156125,0.132751,0.023261,50,0.0,0.0,2.036496,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0
4,0.649506,-0.154779,0.130856,0.109384,50,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5767,1197.838950,0.128942,0.057399,0.116129,109,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0
5768,1197.988836,0.128942,0.057399,0.103098,109,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0
5769,1198.038798,0.138616,0.060941,0.103098,94,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,21.37924,0.0,0.0,0.0,0.0,0.0
5770,1198.538418,0.138616,0.060941,0.095692,94,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0


In [13]:
#determine the total number of cells detected
N_cells=len(interpolated_data_for_session.columns)-5
N_cells

250

In [14]:
#prune negative gridcell error 
#interpolated_data_for_session=interpolated_data_for_session.drop(interpolated_data_for_session[interpolated_data_for_session['Grid no.'] < 0].index).reset_index(drop=True)

In [15]:
#calculate the overall occupancy per bin
occupancy_time = pd.DataFrame(np.bincount(interpolated_data_for_session['Grid no.'], minlength = 257),columns = ['Freq visit'] )
occupancy_time['Duration'] = occupancy_time*Delta_time
occupancy_time=occupancy_time.drop([0])
occupancy_time=occupancy_time.drop(['Freq visit'], axis=1)
total_time = occupancy_time['Duration'].sum()
bin_probability = occupancy_time['Duration'].div(total_time)

occupancy_time

Unnamed: 0,Duration
1,0.000000
2,0.499620
3,0.299772
4,1.099164
5,2.198328
...,...
252,0.999240
253,0.449658
254,0.399696
255,0.199848


In [16]:
#calculate activity map
cell_activations_per_bin=activity_map(interpolated_data_for_session)
#true_bin_data=pd.concat([occupancy_time, cell_activations_per_bin], axis=1).fillna(0)

cell_activations_per_bin

Unnamed: 0_level_0,C000,C001,C002,C004,C005,C006,C007,C008,C009,C010,...,C371,C374,C375,C376,C377,C378,C379,C380,C381,C382
Grid no.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
5,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,1,...,0,0,1,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
252,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
253,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
254,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
255,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [17]:
#zero rows with occupancy numbers less than threshold
n=0
limit=len(cell_activations_per_bin.index)

for i in range (0, limit):
    if occupancy_time.iloc[i,0]<threshold_time:
        n=n+1
        for j in range (0, N_cells):
            cell_activations_per_bin.iloc[i,j]=0

print(n)


0


In [18]:
#check the activity maps for each cell
cell_activations_per_bin

Unnamed: 0_level_0,C000,C001,C002,C004,C005,C006,C007,C008,C009,C010,...,C371,C374,C375,C376,C377,C378,C379,C380,C381,C382
Grid no.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
5,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,1,...,0,0,1,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
252,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
253,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
254,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
255,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [19]:
#combines activity map with duration data for each bin
true_bin_data=pd.concat([occupancy_time, cell_activations_per_bin], axis=1).fillna(0)
true_bin_data

Unnamed: 0,Duration,C000,C001,C002,C004,C005,C006,C007,C008,C009,...,C371,C374,C375,C376,C377,C378,C379,C380,C381,C382
1,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.499620,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.299772,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.099164,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,2.198328,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
252,0.999240,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
253,0.449658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
254,0.399696,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
255,0.199848,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
#calculate the true spatial infomation for the cells
true_spatial_information=spatial_info(true_bin_data)
true_spatial_information

 C000    3.141094
 C001    2.705377
 C002    3.219122
 C004    2.342525
 C005    1.950584
           ...   
 C378    3.180418
 C379    2.797159
 C380    1.995072
 C381    3.078840
 C382    5.189193
Length: 250, dtype: float64

In [21]:
#Next, the spatial information for the shuffles is calculated

#this one is a test
shuffled_raw_data=shuffle(interpolated_data_for_session)
if pruning==1:
    shuffled_raw_data=prune(shuffled_raw_data)
shuffled_activations_per_bin=activity_map(shuffled_raw_data)
shuffled_bin_data=pd.concat([occupancy_time, shuffled_activations_per_bin], axis=1).fillna(0)
shuffled_spatial_information=spatial_info(shuffled_bin_data)

shuffled_spatial_information_df=shuffled_spatial_information

shuffled_spatial_information_df

 C000    2.984412
 C001    2.530616
 C002    3.384162
 C004    2.434596
 C005    1.666171
           ...   
 C378    3.340636
 C379    2.841318
 C380    1.669535
 C381    2.826526
 C382    4.798643
Length: 250, dtype: float64

In [22]:
#calculate the spatial information for many shuffles of the data
for i in range (No_shuffles):
    shuffled_raw_data=shuffle(interpolated_data_for_session)
    if pruning==1:
        shuffled_raw_data=prune(shuffled_raw_data)
    shuffled_activations_per_bin=activity_map(shuffled_raw_data)
    shuffled_bin_data=pd.concat([occupancy_time, shuffled_activations_per_bin], axis=1).fillna(0)
    shuffled_spatial_information=spatial_info(shuffled_bin_data)
    
    shuffled_spatial_information_df=pd.concat([shuffled_spatial_information_df, shuffled_spatial_information], axis=1)

#check output of shuffles makes sense
shuffled_spatial_information_df

Unnamed: 0,0,1,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,...,0.9,0.10,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18
C000,2.984412,2.958647,3.135299,3.125475,3.035135,3.075273,3.284398,2.980736,3.122771,3.132086,...,3.059081,2.977460,2.947739,2.904570,3.167251,2.562566,3.214867,3.064828,3.075553,2.982360
C001,2.530616,2.791505,3.243354,2.830469,3.040348,2.937736,2.845591,2.872533,3.108152,2.714539,...,2.748297,3.057448,2.716268,2.842352,2.928398,2.818691,2.949841,2.849715,2.835976,3.384469
C002,3.384162,3.635288,3.206372,3.336867,3.266323,3.167871,3.655073,3.164631,3.125467,3.018037,...,3.225301,3.126583,3.109519,3.433141,3.224262,3.123146,3.568150,2.753934,3.436192,2.862010
C004,2.434596,2.831303,2.565703,2.716301,2.742848,2.359442,2.981515,2.628314,2.932932,2.395746,...,2.734368,2.598516,2.515984,2.406348,2.616079,2.407773,2.478209,2.585922,2.501493,2.739598
C005,1.666171,1.678284,1.594377,1.805708,1.722578,1.740442,1.647409,2.009584,1.664020,1.721142,...,1.650246,1.584426,1.551397,1.583851,1.675036,1.589869,1.652165,1.753836,1.815060,1.782512
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C378,3.340636,3.286504,3.086995,3.144011,3.304370,3.373525,2.651398,3.141547,3.115970,3.116118,...,2.935313,2.758846,2.711342,3.124950,3.238280,2.859474,2.876195,3.176767,3.370429,3.087600
C379,2.841318,2.896910,3.026013,3.042422,3.167401,2.988080,2.994642,3.264148,3.071003,3.012132,...,3.260396,3.302248,2.647223,2.842094,2.794118,2.983283,2.693799,3.127434,2.985780,2.962994
C380,1.669535,1.860539,2.103326,1.425977,1.654286,1.741789,1.619193,1.906830,1.710872,2.015045,...,1.770875,1.880789,1.713987,1.489491,1.708831,1.879957,1.833317,1.655691,1.741708,1.688014
C381,2.826526,2.872199,2.939562,2.605445,2.935467,2.874806,2.867787,2.759591,2.771083,2.907440,...,2.943050,2.846693,3.053654,2.953428,3.005985,2.637132,2.959764,2.802938,3.014694,2.514121


In [23]:
#sort shuffles in order
shuffled_spatial_information_df.values.sort()

#check sorted shuffles
shuffled_spatial_information_df

Unnamed: 0,0,1,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,...,0.9,0.10,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18
C000,2.441648,2.469333,2.475433,2.504134,2.507112,2.512268,2.515810,2.531775,2.531812,2.533622,...,3.620697,3.638752,3.656170,3.659548,3.666890,3.667087,3.675594,3.682271,3.703134,3.772603
C001,2.253422,2.335343,2.339483,2.342944,2.349544,2.351398,2.354377,2.367860,2.380510,2.389567,...,3.466042,3.470887,3.475900,3.481190,3.488053,3.496091,3.535212,3.583962,3.590199,3.676583
C002,2.512139,2.516352,2.548316,2.571725,2.615436,2.623810,2.629068,2.629295,2.631795,2.641177,...,3.832402,3.860029,3.865723,3.868748,3.882976,3.887839,3.898446,3.974497,4.069741,4.253008
C004,2.075743,2.110091,2.120383,2.149000,2.194815,2.196529,2.199163,2.207465,2.213957,2.226153,...,3.145091,3.148446,3.152712,3.155785,3.157604,3.176731,3.178115,3.181689,3.231605,3.327252
C005,1.423692,1.424319,1.436123,1.436183,1.439898,1.440921,1.447742,1.452904,1.453282,1.460158,...,2.010074,2.010200,2.050410,2.051491,2.051784,2.054654,2.072968,2.086356,2.096443,2.254835
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C378,2.401493,2.443117,2.444328,2.455966,2.480632,2.481632,2.487010,2.490234,2.496335,2.497608,...,3.537350,3.537481,3.542968,3.559097,3.563670,3.576472,3.593121,3.676800,3.704715,3.727679
C379,2.349274,2.369496,2.433661,2.454139,2.481228,2.493618,2.493660,2.500349,2.501619,2.514330,...,3.572886,3.573868,3.574084,3.588586,3.609067,3.637944,3.666984,3.668818,3.668868,3.709493
C380,1.403362,1.412627,1.417963,1.422052,1.424561,1.425977,1.427676,1.434215,1.435340,1.441145,...,2.015057,2.019295,2.021844,2.025289,2.034308,2.040865,2.053542,2.071873,2.103326,2.145231
C381,2.229299,2.270799,2.288573,2.296467,2.318994,2.337371,2.360299,2.373793,2.377852,2.383012,...,3.404305,3.411008,3.426138,3.429789,3.442115,3.465260,3.554596,3.563598,3.571633,3.624989


In [24]:
#what is the n% top value for each cell?
special_index=np.floor(No_shuffles*(100-Spat_inf_thresh)/100)
special_index=int(special_index)
spatial_info_value_to_beat=shuffled_spatial_information_df.iloc[:,special_index]

#check values
spatial_info_value_to_beat

 C000    3.563450
 C001    3.351981
 C002    3.755832
 C004    3.081648
 C005    1.992353
           ...   
 C378    3.459394
 C379    3.508193
 C380    1.984076
 C381    3.303529
 C382    5.616912
Name: 0, Length: 250, dtype: float64

In [25]:
#calculate difference between spatial info and the top n% value
spatial_analysis=pd.concat([true_spatial_information.rename('Spatial info'), spatial_info_value_to_beat.rename('Shuffled spatial info')], axis=1)
spatial_analysis['Difference']=spatial_analysis['Spatial info']-spatial_analysis['Shuffled spatial info']
spatial_analysis

Unnamed: 0,Spatial info,Shuffled spatial info,Difference
C000,3.141094,3.563450,-0.422356
C001,2.705377,3.351981,-0.646605
C002,3.219122,3.755832,-0.536710
C004,2.342525,3.081648,-0.739123
C005,1.950584,1.992353,-0.041769
...,...,...,...
C378,3.180418,3.459394,-0.278976
C379,2.797159,3.508193,-0.711034
C380,1.995072,1.984076,0.010995
C381,3.078840,3.303529,-0.224689


In [26]:
Eventcount_threthold = total_time#/60#EventCount_Threthold
Eventcount_threthold

288.380664

In [27]:
#Defines cells as a place cell if the spatial information is higher than most of the shuffles

def place_cell (row):
    if row['Difference']>0:
        return 'Place cell'
    else:
        return 'Not place cell'

In [28]:
#create place cell dataframe

spatial_analysis['Place cell?'] = spatial_analysis.apply(place_cell, axis=1)
spatial_analysis=spatial_analysis.drop(['Difference'], axis=1)
spatial_analysis

Unnamed: 0,Spatial info,Shuffled spatial info,Place cell?
C000,3.141094,3.563450,Not place cell
C001,2.705377,3.351981,Not place cell
C002,3.219122,3.755832,Not place cell
C004,2.342525,3.081648,Not place cell
C005,1.950584,1.992353,Not place cell
...,...,...,...
C378,3.180418,3.459394,Not place cell
C379,2.797159,3.508193,Not place cell
C380,1.995072,1.984076,Place cell
C381,3.078840,3.303529,Not place cell


In [29]:
#write to file
spatial_analysis.to_csv(f'{newpath}/{testName}D1R1_place_cell_identity.csv', index=True)

# From here session 2

In [30]:
#read in the combined Ca imaging and behavioural data for one session, delete intermdiate data, rename speed column

interpolated_data_for_session=pd.read_csv(os.path.join(path, Ca_data2))
interpolated_data_for_session=interpolated_data_for_session.drop(['Unnamed: 0','Interpolated speed', 'x-velocity','y-velocity'], axis=1)
interpolated_data_for_session=interpolated_data_for_session.rename({'Calculated speed': 'Speed'}, axis=1)
interpolated_data_for_session

Unnamed: 0,Time,X center,Y center,Speed,Grid no.,C000,C001,C002,C004,C005,...,C371,C374,C375,C376,C377,C378,C379,C380,C381,C382
0,0.000000,-0.154397,0.148893,0.0,34,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00000,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0
1,0.049962,-0.154397,0.148893,0.0,34,0.0,0.0,0.0,0.0,0.0,...,0.0,10.74353,0.0,0.0,0.0,17.55119,0.0,0.0,0.0,0.0
2,0.099924,-0.154397,0.148893,0.0,34,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00000,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0
3,0.149886,-0.154397,0.148893,0.0,34,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00000,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0
4,0.199848,-0.154397,0.148893,0.0,34,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00000,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24012,1199.687544,-0.160124,0.181185,0.0,18,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00000,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0
24013,1199.737506,-0.160124,0.181185,0.0,18,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00000,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0
24014,1199.787468,-0.160124,0.181185,0.0,18,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00000,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0
24015,1199.837430,-0.160124,0.181185,0.0,18,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00000,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0


In [31]:
#prune data
if pruning==1:
    interpolated_data_for_session=prune(interpolated_data_for_session)
interpolated_data_for_session    

Unnamed: 0,Time,X center,Y center,Speed,Grid no.,C000,C001,C002,C004,C005,...,C371,C374,C375,C376,C377,C378,C379,C380,C381,C382
0,34.973400,-0.154397,0.148893,0.100912,34,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0
1,35.023362,-0.151532,0.158561,0.100912,34,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0
2,42.018042,-0.151532,0.158561,0.104814,34,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0
3,42.068004,-0.141402,0.155901,0.104814,35,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0
4,42.817434,-0.141402,0.155901,0.110394,35,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
410,1166.912472,-0.160823,0.176926,0.101990,18,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0
411,1166.962434,-0.152666,0.174388,0.085488,18,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0
412,1167.661902,-0.152666,0.174388,0.012280,18,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,23.95199,0.0,0.0,0.0,0.0
413,1167.711864,-0.153573,0.175215,0.100983,18,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0


In [32]:
N_cells=len(interpolated_data_for_session.columns)-5
N_cells

250

In [33]:
#calculate the overall occupancy per bin
occupancy_time = pd.DataFrame(np.bincount(interpolated_data_for_session['Grid no.'], minlength = 257),columns = ['Freq visit'] )
occupancy_time['Duration'] = occupancy_time*Delta_time
occupancy_time=occupancy_time.drop([0])
occupancy_time=occupancy_time.drop(['Freq visit'], axis=1)
total_time = occupancy_time['Duration'].sum()
bin_probability = occupancy_time['Duration'].div(total_time)

occupancy_time

Unnamed: 0,Duration
1,0.000000
2,0.799392
3,0.849354
4,0.449658
5,0.249810
...,...
252,0.000000
253,0.000000
254,0.000000
255,0.000000


In [34]:
#calculate activity map and combine with duration data
cell_activations_per_bin=activity_map(interpolated_data_for_session)
cell_activations_per_bin

Unnamed: 0_level_0,C000,C001,C002,C004,C005,C006,C007,C008,C009,C010,...,C371,C374,C375,C376,C377,C378,C379,C380,C381,C382
Grid no.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,1,0,0,0,0,0,1,...,0,0,0,0,1,2,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,2,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
6,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
234,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
235,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
236,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
237,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [35]:
#zero rows with occupancy numbers less than threshold
n=0
limit=len(cell_activations_per_bin.index)

for i in range (0, limit):
    if occupancy_time.iloc[i,0]<threshold_time:
        n=n+1
        for j in range (0, N_cells):
            cell_activations_per_bin.iloc[i,j]=0

print(n)


0


In [36]:
true_bin_data=pd.concat([occupancy_time, cell_activations_per_bin], axis=1).fillna(0)
true_bin_data

Unnamed: 0,Duration,C000,C001,C002,C004,C005,C006,C007,C008,C009,...,C371,C374,C375,C376,C377,C378,C379,C380,C381,C382
1,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.799392,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.849354,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0
4,0.449658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.249810,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
252,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
253,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
254,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
255,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [37]:
#calculate the true spatial infomation for the cells
true_spatial_information=spatial_info(true_bin_data)
true_spatial_information

 C000    4.097282
 C001    4.614721
 C002    0.000000
 C004    3.145995
 C005    4.824062
           ...   
 C378    2.389929
 C379    4.582558
 C380    2.935405
 C381    2.671248
 C382    5.696968
Length: 250, dtype: float64

In [38]:
#Next, the spatial information for the shuffles is calculated

shuffled_raw_data=shuffle(interpolated_data_for_session)
if pruning==1:
    shuffled_raw_data=prune(shuffled_raw_data)
shuffled_activations_per_bin=activity_map(shuffled_raw_data)
shuffled_bin_data=pd.concat([occupancy_time, shuffled_activations_per_bin], axis=1).fillna(0)
shuffled_spatial_information=spatial_info(shuffled_bin_data)

shuffled_spatial_information_df=shuffled_spatial_information

shuffled_spatial_information_df

 C000    3.965764
 C001    4.554530
 C002    0.000000
 C004    3.017465
 C005    2.962091
           ...   
 C378    3.288140
 C379    5.319524
 C380    2.629436
 C381    4.720245
 C382    6.696968
Length: 250, dtype: float64

In [39]:
for i in range (No_shuffles):
    shuffled_raw_data=shuffle(interpolated_data_for_session)
    if pruning==1:
        shuffled_raw_data=prune(shuffled_raw_data)
    shuffled_activations_per_bin=activity_map(shuffled_raw_data)
    shuffled_bin_data=pd.concat([occupancy_time, shuffled_activations_per_bin], axis=1).fillna(0)
    shuffled_spatial_information=spatial_info(shuffled_bin_data)
    
    shuffled_spatial_information_df=pd.concat([shuffled_spatial_information_df, shuffled_spatial_information], axis=1)

shuffled_spatial_information_df

Unnamed: 0,0,1,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,...,0.9,0.10,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18
C000,3.965764,4.023596,2.788619,4.177764,3.882621,3.978126,3.405113,2.651913,4.074004,3.479376,...,3.964653,5.050727,3.773596,5.470245,3.546873,3.429595,3.802139,3.627355,3.847282,2.582702
C001,4.554530,5.722030,4.026209,4.917018,4.887863,4.809708,5.035732,3.849371,4.753066,4.445338,...,5.583684,3.849371,4.554530,3.380009,3.554530,2.562103,4.809708,5.143041,3.046676,3.811590
C002,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
C004,3.017465,3.637447,3.458287,3.084102,3.208486,4.363363,3.281387,2.648296,2.940200,3.567154,...,4.042891,2.074114,3.535284,2.923213,3.404633,2.784818,2.471672,2.961798,3.203725,3.187683
C005,2.962091,3.474871,3.818849,2.930690,2.909985,3.562091,3.893161,3.759676,3.867135,3.081915,...,3.624062,3.256641,3.778298,3.741556,2.957617,3.758676,3.575110,4.710654,3.417459,3.741054
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C378,3.288140,2.998047,2.647790,2.649073,3.386030,3.451740,3.134582,2.654526,2.613890,2.490662,...,2.376848,2.835485,2.048836,2.309545,2.854439,2.215212,2.527299,2.653812,2.451098,1.834558
C379,5.319524,6.036003,4.685784,3.568274,4.819524,3.650589,3.584967,5.054266,5.612005,6.404486,...,5.243522,5.743522,4.935187,5.743522,4.243522,4.031509,3.992272,6.036003,3.481741,3.109505
C380,2.629436,3.606226,3.188679,3.275210,3.832361,4.448546,4.399996,3.284660,3.796798,2.677986,...,3.421543,3.404078,2.480367,2.887859,2.590008,2.593017,3.366501,3.848841,3.339949,3.744553
C381,4.720245,3.052139,3.545135,3.552139,5.074004,3.844620,3.622166,4.404486,3.127029,4.427764,...,4.372166,4.050727,4.027538,3.479376,4.229376,2.972861,4.306407,3.898894,3.881297,3.598486


In [40]:
#sort them in order
shuffled_spatial_information_df.values.sort()
shuffled_spatial_information_df

Unnamed: 0,0,1,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,...,0.9,0.10,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18
C000,2.219409,2.262675,2.284541,2.326620,2.347236,2.369101,2.380047,2.384032,2.403651,2.405898,...,5.574004,5.598888,5.616486,5.654486,5.800727,5.800727,5.824004,5.946968,6.050727,6.196968
C001,2.444224,2.444224,2.518355,2.518355,2.518355,2.518355,2.518355,2.545842,2.545842,2.545842,...,6.250351,6.250351,6.250351,6.250351,6.250351,6.250351,6.338029,6.338029,6.445338,7.112005
C002,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
C004,1.815239,1.880625,1.888140,1.981905,1.982788,2.032983,2.074114,2.079399,2.093143,2.145915,...,4.677450,4.696696,4.696696,4.712214,4.722030,4.740535,4.765869,4.819524,4.835042,5.014511
C005,1.940386,2.094195,2.113876,2.167372,2.176500,2.195407,2.227606,2.252905,2.261269,2.270397,...,5.141054,5.193661,5.193661,5.193661,5.234951,5.258047,5.341054,5.341054,5.458047,5.575039
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C378,1.609223,1.628103,1.834558,1.841606,1.851860,1.883610,1.917093,1.921739,1.926018,1.932799,...,3.896957,3.901758,3.904006,3.955131,3.960961,4.007884,4.027948,4.038293,4.109277,4.120178
C379,2.891455,2.891455,2.891455,2.891455,2.891455,2.891455,2.891455,2.891455,2.891455,2.891455,...,7.196968,7.196968,7.196968,7.196968,7.196968,7.196968,7.196968,7.196968,7.196968,7.696968
C380,1.852893,1.876691,1.895160,1.920956,1.920956,1.964973,1.977001,1.981972,1.990506,2.014330,...,4.379803,4.399996,4.424271,4.448546,4.450068,4.504829,4.509359,4.522355,4.525220,4.924629
C381,2.196132,2.219409,2.219409,2.262675,2.328434,2.369101,2.380047,2.392379,2.392379,2.392379,...,5.470245,5.550727,5.550727,5.550727,5.654486,5.654486,5.696968,5.800727,5.800727,5.946968


In [41]:
#what is the n% top value for each cell?
special_index=np.floor(No_shuffles*(100-Spat_inf_thresh)/100)
special_index=int(special_index)
spatial_info_value_to_beat=shuffled_spatial_information_df.iloc[:,special_index]
spatial_info_value_to_beat

 C000    5.404486
 C001    6.004696
 C002    0.000000
 C004    4.583684
 C005    4.957547
           ...   
 C378    3.805726
 C379    6.904486
 C380    4.245359
 C381    5.404486
 C382    8.696968
Name: 0, Length: 250, dtype: float64

In [42]:
#calculate difference between spatial info and the top n% value
spatial_analysis=pd.concat([true_spatial_information.rename('Spatial info'), spatial_info_value_to_beat.rename('Shuffled spatial info')], axis=1)
spatial_analysis['Difference']=spatial_analysis['Spatial info']-spatial_analysis['Shuffled spatial info']
spatial_analysis

Unnamed: 0,Spatial info,Shuffled spatial info,Difference
C000,4.097282,5.404486,-1.307205
C001,4.614721,6.004696,-1.389975
C002,0.000000,0.000000,0.000000
C004,3.145995,4.583684,-1.437689
C005,4.824062,4.957547,-0.133485
...,...,...,...
C378,2.389929,3.805726,-1.415797
C379,4.582558,6.904486,-2.321928
C380,2.935405,4.245359,-1.309954
C381,2.671248,5.404486,-2.733238


In [43]:
Eventcount_threthold = total_time#/60#EventCount_Threthold
Eventcount_threthold

20.734230000000004

In [44]:
def place_cell (row):
    if row['Difference']>0:
        return 'Place cell'
    else:
        return 'Not place cell'

In [45]:
spatial_analysis['Place cell?'] = spatial_analysis.apply(place_cell, axis=1)
spatial_analysis=spatial_analysis.drop(['Difference'], axis=1)
spatial_analysis

Unnamed: 0,Spatial info,Shuffled spatial info,Place cell?
C000,4.097282,5.404486,Not place cell
C001,4.614721,6.004696,Not place cell
C002,0.000000,0.000000,Not place cell
C004,3.145995,4.583684,Not place cell
C005,4.824062,4.957547,Not place cell
...,...,...,...
C378,2.389929,3.805726,Not place cell
C379,4.582558,6.904486,Not place cell
C380,2.935405,4.245359,Not place cell
C381,2.671248,5.404486,Not place cell


In [46]:
#write to file
spatial_analysis.to_csv(f'{newpath}/{testName}D1R2_place_cell_identity.csv', index=True)

# From here session 3

In [47]:
#read in the combined Ca imaging and behavioural data for one session, delete intermdiate data, rename speed column

interpolated_data_for_session=pd.read_csv(os.path.join(path, Ca_data3))
interpolated_data_for_session=interpolated_data_for_session.drop(['Unnamed: 0','Interpolated speed', 'x-velocity','y-velocity'], axis=1)
interpolated_data_for_session=interpolated_data_for_session.rename({'Calculated speed': 'Speed'}, axis=1)
interpolated_data_for_session

Unnamed: 0,Time,X center,Y center,Speed,Grid no.,C000,C001,C002,C004,C005,...,C371,C374,C375,C376,C377,C378,C379,C380,C381,C382
0,0.000000,0.142024,-0.160442,0.0,238,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
1,0.049999,0.142024,-0.160442,0.0,238,0.0,0.125798,0.0,0.0,0.0,...,0.0,0.833647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.944802
2,0.099998,0.142024,-0.160442,0.0,238,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
3,0.149997,0.142024,-0.160442,0.0,238,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
4,0.199996,0.142024,-0.160442,0.0,238,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23993,1199.626007,-0.161294,-0.165601,0.0,226,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
23994,1199.676006,-0.161294,-0.165601,0.0,226,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
23995,1199.726005,-0.161294,-0.165601,0.0,226,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
23996,1199.776004,-0.161294,-0.165601,0.0,226,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000


In [48]:
#prune data
if pruning==1:
    interpolated_data_for_session=prune(interpolated_data_for_session)
interpolated_data_for_session    

Unnamed: 0,Time,X center,Y center,Speed,Grid no.,C000,C001,C002,C004,C005,...,C371,C374,C375,C376,C377,C378,C379,C380,C381,C382
0,1.049979,0.142024,-0.160442,0.049044,238,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
1,1.099978,0.137371,-0.158891,0.109120,238,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,4.674972,0.0,0.0,0.0,0.0
2,1.149977,0.131672,-0.156992,0.060076,238,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
3,1.549969,0.131672,-0.156992,0.095448,238,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
4,1.599968,0.122455,-0.154512,0.100556,237,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1009,1136.127277,-0.161505,-0.164789,0.046929,226,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
1010,1142.227155,-0.160953,-0.164844,0.102918,226,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
1011,1142.277154,-0.151264,-0.165808,0.097373,226,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
1012,1150.626987,-0.151264,-0.165808,0.097780,226,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0


In [49]:
N_cells=len(interpolated_data_for_session.columns)-5
N_cells

250

In [50]:
#calculate the overall occupancy per bin
occupancy_time = pd.DataFrame(np.bincount(interpolated_data_for_session['Grid no.'], minlength = 257),columns = ['Freq visit'] )
occupancy_time['Duration'] = occupancy_time*Delta_time
occupancy_time=occupancy_time.drop([0])
occupancy_time=occupancy_time.drop(['Freq visit'], axis=1)
total_time = occupancy_time['Duration'].sum()
bin_probability = occupancy_time['Duration'].div(total_time)

occupancy_time

Unnamed: 0,Duration
1,0.000000
2,0.000000
3,0.499620
4,0.349734
5,0.149886
...,...
252,0.199848
253,0.599544
254,0.399696
255,0.000000


In [51]:
#calculate activity map and combine with duration data
cell_activations_per_bin=activity_map(interpolated_data_for_session)


In [52]:
#zero rows with occupancy numbers less than threshold
n=0
limit=len(cell_activations_per_bin.index)

for i in range (0, limit):
    if occupancy_time.iloc[i,0]<threshold_time:
        n=n+1
        for j in range (0, N_cells):
            cell_activations_per_bin.iloc[i,j]=0

print(n)


0


In [53]:
true_bin_data=pd.concat([occupancy_time, cell_activations_per_bin], axis=1).fillna(0)
true_bin_data

Unnamed: 0,Duration,C000,C001,C002,C004,C005,C006,C007,C008,C009,...,C371,C374,C375,C376,C377,C378,C379,C380,C381,C382
1,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.499620,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.349734,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.149886,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
252,0.199848,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
253,0.599544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
254,0.399696,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
255,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [54]:
#calculate the true spatial infomation for the cells
true_spatial_information=spatial_info(true_bin_data)
true_spatial_information

 C000    4.869032
 C001    4.115958
 C002    3.318235
 C004    4.210067
 C005    3.886359
           ...   
 C378    1.705259
 C379    4.285625
 C380    4.591184
 C381    4.173482
 C382    3.558813
Length: 250, dtype: float64

In [55]:
#Next, the spatial information for the shuffles is calculated

shuffled_raw_data=shuffle(interpolated_data_for_session)
if pruning==1:
    shuffled_raw_data=prune(shuffled_raw_data)
shuffled_activations_per_bin=activity_map(shuffled_raw_data)
shuffled_bin_data=pd.concat([occupancy_time, shuffled_activations_per_bin], axis=1).fillna(0)
shuffled_spatial_information=spatial_info(shuffled_bin_data)

shuffled_spatial_information_df=shuffled_spatial_information

shuffled_spatial_information_df

 C000    5.526410
 C001    4.305274
 C002    3.812972
 C004    3.872617
 C005    4.275324
           ...   
 C378    1.771667
 C379    3.749790
 C380    4.350386
 C381    4.553637
 C382    4.177102
Length: 250, dtype: float64

In [56]:
for i in range (No_shuffles):
    shuffled_raw_data=shuffle(interpolated_data_for_session)
    if pruning==1:
        shuffled_raw_data=prune(shuffled_raw_data)
    shuffled_activations_per_bin=activity_map(shuffled_raw_data)
    shuffled_bin_data=pd.concat([occupancy_time, shuffled_activations_per_bin], axis=1).fillna(0)
    shuffled_spatial_information=spatial_info(shuffled_bin_data)
    
    shuffled_spatial_information_df=pd.concat([shuffled_spatial_information_df, shuffled_spatial_information], axis=1)

shuffled_spatial_information_df

Unnamed: 0,0,1,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,...,0.9,0.10,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18
C000,5.526410,6.239915,5.608398,5.789683,5.458201,5.693361,5.272710,4.474658,6.178487,5.463645,...,6.824878,5.985842,4.843141,4.900879,5.608398,3.947434,5.532397,5.400879,5.678487,5.239915
C001,4.305274,4.250730,3.308463,3.031338,3.929313,4.175286,3.822694,4.297455,4.268458,3.567842,...,4.257401,4.783205,4.588472,3.908463,4.538043,4.213388,4.324808,3.846977,5.429929,4.114384
C002,3.812972,3.317036,3.388111,3.698885,3.123337,3.043195,3.574746,3.619680,3.592674,3.101999,...,3.792279,3.529683,3.606740,3.543141,4.290054,2.721007,3.327239,3.099632,3.163214,3.400930
C004,3.872617,4.452292,4.036627,3.527538,4.107188,3.593196,4.230600,4.139880,4.568893,3.885090,...,4.260291,4.388077,3.847713,4.434869,5.047344,3.940035,2.902326,3.300889,3.515807,3.822691
C005,4.275324,3.655459,3.579030,3.392785,4.345691,3.794324,3.622525,3.447638,4.466972,3.365119,...,3.824878,3.707280,3.375774,4.074450,3.635672,3.882437,3.656282,3.690609,3.956888,3.674008
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C378,1.771667,2.044596,1.656349,2.136708,1.720033,1.530164,1.927841,1.932157,1.733172,1.806768,...,1.822762,1.836577,2.132190,1.773510,1.901319,1.641538,1.971291,1.849810,1.647025,1.827920
C379,3.749790,3.858464,3.849570,4.270107,3.956624,4.583864,4.867601,3.716023,3.739345,3.299189,...,4.201779,4.060542,3.539872,4.007176,3.963594,3.504621,4.393539,3.455239,3.615850,4.541743
C380,4.350386,4.034312,3.274439,3.955379,4.168357,4.547344,4.897267,3.821264,3.649250,3.821422,...,3.533487,4.673843,3.478741,4.256076,4.841196,3.779456,3.541941,3.860038,3.387898,4.265851
C381,4.553637,3.117959,3.870725,3.066488,3.527136,3.587405,3.813214,3.783200,4.058258,3.625806,...,3.640573,3.222579,3.968409,3.560647,3.718000,3.334537,3.469478,3.601840,3.877752,3.889285


In [57]:
#sort them in order
shuffled_spatial_information_df.values.sort()
shuffled_spatial_information_df

Unnamed: 0,0,1,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,...,0.9,0.10,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18
C000,3.611746,3.611746,3.611746,3.611746,3.611746,3.611746,3.687747,3.739915,3.739915,3.895266,...,7.693361,7.693361,7.693361,7.985842,7.985842,7.985842,7.985842,8.193361,8.400879,8.485842
C001,2.711820,2.895328,2.913007,2.945805,2.967842,2.978731,3.003509,3.029999,3.031338,3.039672,...,5.765543,5.765543,5.765543,5.795944,5.818150,5.829929,5.829929,6.014328,6.112936,6.248551
C002,2.384048,2.438306,2.460392,2.466693,2.484409,2.491012,2.492476,2.519729,2.522130,2.530352,...,4.294761,4.321065,4.340329,4.377394,4.383214,4.422272,4.428782,4.439918,4.453737,4.688460
C004,2.615389,2.803563,2.819551,2.820819,2.884464,2.902326,2.903886,2.945323,2.968240,2.970897,...,5.219415,5.234271,5.246744,5.253518,5.255819,5.281839,5.307172,5.372559,5.388077,5.510163
C005,2.561632,2.616041,2.623448,2.624623,2.629920,2.654056,2.712439,2.723007,2.731687,2.736475,...,4.673482,4.682281,4.701690,4.710080,4.751758,4.766012,4.811450,4.823999,4.893765,5.000321
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C378,1.418620,1.427984,1.431619,1.445186,1.450862,1.451125,1.466525,1.468861,1.469175,1.470933,...,2.328644,2.340135,2.341608,2.347437,2.385482,2.385535,2.391905,2.393059,2.431115,2.435725
C379,2.667560,2.693736,2.834227,2.852927,2.890694,2.918313,2.933732,2.940719,2.941067,2.946609,...,5.295987,5.318904,5.318904,5.334422,5.339280,5.339280,5.441732,5.483575,5.539225,5.629320
C380,2.684991,2.754889,2.781313,2.878807,2.891908,2.908529,2.910914,2.935274,2.959495,2.970524,...,5.275065,5.281839,5.362743,5.388077,5.403595,5.485571,5.539225,5.539225,5.583064,5.778052
C381,2.550413,2.670466,2.711839,2.728933,2.734731,2.734874,2.745799,2.764170,2.779001,2.780813,...,4.726240,4.729080,4.731320,4.755401,4.774401,4.799960,4.821201,4.871602,4.875321,5.070249


In [58]:
#what is the n% top value for each cell?
special_index=np.floor(No_shuffles*(100-Spat_inf_thresh)/100)
special_index=int(special_index)
spatial_info_value_to_beat=shuffled_spatial_information_df.iloc[:,special_index]
spatial_info_value_to_beat

 C000    7.485842
 C001    5.443256
 C002    4.230376
 C004    5.152237
 C005    4.615119
           ...   
 C378    2.249538
 C379    5.179387
 C380    5.148509
 C381    4.583583
 C382    5.109514
Name: 0, Length: 250, dtype: float64

In [59]:
#calculate difference between spatial info and the top n% value
spatial_analysis=pd.concat([true_spatial_information.rename('Spatial info'), spatial_info_value_to_beat.rename('Shuffled spatial info')], axis=1)
spatial_analysis['Difference']=spatial_analysis['Spatial info']-spatial_analysis['Shuffled spatial info']
spatial_analysis

Unnamed: 0,Spatial info,Shuffled spatial info,Difference
C000,4.869032,7.485842,-2.616810
C001,4.115958,5.443256,-1.327299
C002,3.318235,4.230376,-0.912141
C004,4.210067,5.152237,-0.942170
C005,3.886359,4.615119,-0.728760
...,...,...,...
C378,1.705259,2.249538,-0.544278
C379,4.285625,5.179387,-0.893762
C380,4.591184,5.148509,-0.557326
C381,4.173482,4.583583,-0.410101


In [60]:
Eventcount_threthold = total_time#/60#EventCount_Threthold
Eventcount_threthold

50.661468

In [61]:
def place_cell (row):
    if row['Difference']>0:
        return 'Place cell'
    else:
        return 'Not place cell'

In [62]:
spatial_analysis['Place cell?'] = spatial_analysis.apply(place_cell, axis=1)
spatial_analysis=spatial_analysis.drop(['Difference'], axis=1)
spatial_analysis

Unnamed: 0,Spatial info,Shuffled spatial info,Place cell?
C000,4.869032,7.485842,Not place cell
C001,4.115958,5.443256,Not place cell
C002,3.318235,4.230376,Not place cell
C004,4.210067,5.152237,Not place cell
C005,3.886359,4.615119,Not place cell
...,...,...,...
C378,1.705259,2.249538,Not place cell
C379,4.285625,5.179387,Not place cell
C380,4.591184,5.148509,Not place cell
C381,4.173482,4.583583,Not place cell


In [63]:
#write to file
spatial_analysis.to_csv(f'{newpath}/{testName}D2R1_place_cell_identity.csv', index=True)

# From here session 4

In [64]:
#read in the combined Ca imaging and behavioural data for one session, delete intermdiate data, rename speed column

interpolated_data_for_session=pd.read_csv(os.path.join(path, Ca_data4))
interpolated_data_for_session=interpolated_data_for_session.drop(['Unnamed: 0','Interpolated speed', 'x-velocity','y-velocity'], axis=1)
interpolated_data_for_session=interpolated_data_for_session.rename({'Calculated speed': 'Speed'}, axis=1)
interpolated_data_for_session

Unnamed: 0,Time,X center,Y center,Speed,Grid no.,C000,C001,C002,C004,C005,...,C371,C374,C375,C376,C377,C378,C379,C380,C381,C382
0,0.000000,0.161569,-0.164951,0.000000,239,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.00000
1,0.049999,0.161569,-0.164951,0.000000,239,0.0,0.0,0.0,0.0,0.0,...,0.0,1.749561,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.00019
2,0.099998,0.161569,-0.164951,0.000000,239,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.00000
3,0.149997,0.161569,-0.164951,0.000000,239,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.00000
4,0.199996,0.161569,-0.164951,0.000000,239,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.293297,0.0,0.0,0.0,0.0,0.0,0.0,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23989,1199.426011,0.082447,0.112806,0.069849,60,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.00000
23990,1199.476010,0.082447,0.112806,0.100462,60,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.00000
23991,1199.526009,0.072588,0.114736,0.100462,59,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.00000
23992,1199.576008,0.072588,0.114736,0.085711,59,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.00000


In [65]:
#prune data
if pruning==1:
    interpolated_data_for_session=prune(interpolated_data_for_session)
interpolated_data_for_session    

Unnamed: 0,Time,X center,Y center,Speed,Grid no.,C000,C001,C002,C004,C005,...,C371,C374,C375,C376,C377,C378,C379,C380,C381,C382
0,40.049199,0.162119,-0.164923,0.100158,239,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,40.099198,0.171572,-0.164448,0.094652,239,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,137.997240,0.171572,-0.164448,0.100130,239,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,138.047239,0.166309,-0.155930,0.100130,239,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,138.897222,0.166309,-0.155930,0.066005,239,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
912,1199.426011,0.082447,0.112806,0.069849,60,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
913,1199.476010,0.082447,0.112806,0.100462,60,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
914,1199.526009,0.072588,0.114736,0.100462,59,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
915,1199.576008,0.072588,0.114736,0.085711,59,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [66]:
N_cells=len(interpolated_data_for_session.columns)-5
N_cells

250

In [67]:
#calculate the overall occupancy per bin
occupancy_time = pd.DataFrame(np.bincount(interpolated_data_for_session['Grid no.'], minlength = 257),columns = ['Freq visit'] )
occupancy_time['Duration'] = occupancy_time*Delta_time
occupancy_time=occupancy_time.drop([0])
occupancy_time=occupancy_time.drop(['Freq visit'], axis=1)
total_time = occupancy_time['Duration'].sum()
bin_probability = occupancy_time['Duration'].div(total_time)

occupancy_time

Unnamed: 0,Duration
1,0.000000
2,0.000000
3,0.000000
4,0.000000
5,0.299772
...,...
252,0.000000
253,0.000000
254,0.199848
255,0.000000


In [68]:
#calculate activity map and combine with duration data
cell_activations_per_bin=activity_map(interpolated_data_for_session)

In [69]:
#zero rows with occupancy numbers less than threshold
n=0
limit=len(cell_activations_per_bin.index)

for i in range (0, limit):
    if occupancy_time.iloc[i,0]<threshold_time:
        n=n+1
        for j in range (0, N_cells):
            cell_activations_per_bin.iloc[i,j]=0

print(n)


0


In [70]:
true_bin_data=pd.concat([occupancy_time, cell_activations_per_bin], axis=1).fillna(0)
true_bin_data

Unnamed: 0,Duration,C000,C001,C002,C004,C005,C006,C007,C008,C009,...,C371,C374,C375,C376,C377,C378,C379,C380,C381,C382
1,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.299772,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
252,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
253,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
254,0.199848,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
255,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [71]:
#calculate the true spatial infomation for the cells
true_spatial_information=spatial_info(true_bin_data)
true_spatial_information

 C000    3.789253
 C001    4.587536
 C002    2.981068
 C004    3.524623
 C005    6.679814
           ...   
 C378    2.025213
 C379    2.183724
 C380    6.644619
 C381    3.846209
 C382    0.000000
Length: 250, dtype: float64

In [72]:
#Next, the spatial information for the shuffles is calculated

shuffled_raw_data=shuffle(interpolated_data_for_session)
if pruning==1:
    shuffled_raw_data=prune(shuffled_raw_data)
shuffled_activations_per_bin=activity_map(shuffled_raw_data)
shuffled_bin_data=pd.concat([occupancy_time, shuffled_activations_per_bin], axis=1).fillna(0)
shuffled_spatial_information=spatial_info(shuffled_bin_data)

shuffled_spatial_information_df=shuffled_spatial_information

shuffled_spatial_information_df

 C000    3.246772
 C001    4.181168
 C002    3.702072
 C004    2.702123
 C005    4.424333
           ...   
 C378    2.132332
 C379    2.964185
 C380    5.548297
 C381    3.688525
 C382    0.000000
Length: 250, dtype: float64

In [73]:
for i in range (No_shuffles):
    shuffled_raw_data=shuffle(interpolated_data_for_session)
    if pruning==1:
        shuffled_raw_data=prune(shuffled_raw_data)
    shuffled_activations_per_bin=activity_map(shuffled_raw_data)
    shuffled_bin_data=pd.concat([occupancy_time, shuffled_activations_per_bin], axis=1).fillna(0)
    shuffled_spatial_information=spatial_info(shuffled_bin_data)
    
    shuffled_spatial_information_df=pd.concat([shuffled_spatial_information_df, shuffled_spatial_information], axis=1)

shuffled_spatial_information_df

Unnamed: 0,0,1,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,...,0.9,0.10,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18
C000,3.246772,4.276431,4.242699,4.279093,4.529093,5.217815,4.109575,4.675333,4.203091,3.935494,...,5.108463,4.530208,4.087709,3.808457,3.692722,4.445949,3.852957,5.175333,4.236315,5.052073
C001,4.181168,4.044164,3.879592,3.486843,3.686964,3.631052,3.942250,4.070767,3.267062,3.836848,...,3.717348,3.529468,3.795312,3.329929,3.148996,3.597124,4.038311,2.682848,4.421699,3.607179
C002,3.702072,3.228219,3.026688,3.465704,4.118786,2.971905,3.357898,4.016732,3.821223,3.133997,...,3.176726,2.933700,3.523235,3.074794,2.936448,3.502883,3.522608,2.958386,3.951346,3.609346
C004,2.702123,3.420057,3.773476,3.381393,3.994393,4.283889,3.100446,3.839041,3.809965,3.607179,...,3.725761,3.857877,2.809327,4.000332,4.488597,3.772388,3.449983,3.737542,3.129888,3.524129
C005,4.424333,6.255815,5.679814,6.179814,4.136082,5.055850,5.055850,4.424333,4.802370,4.173083,...,6.340778,6.255815,6.144619,4.463334,6.490558,6.548297,5.179814,6.755815,6.144619,5.018850
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C378,2.132332,1.750618,2.058448,1.761110,1.828907,1.981491,1.915998,1.825447,1.745006,2.229283,...,2.286917,2.066971,2.029258,1.616775,1.914062,2.021988,2.207673,2.153020,2.239042,2.084839
C379,2.964185,2.464339,2.266325,2.390388,2.167305,2.628493,2.680094,2.845738,2.825622,2.281185,...,2.499895,2.491175,2.362546,3.179145,2.382265,2.341275,2.588195,2.732957,2.374568,3.108050
C380,5.548297,5.179814,6.179814,5.483655,4.691174,5.255815,5.555850,6.340778,4.840778,6.255815,...,6.255815,5.424333,6.340778,5.226369,4.340778,4.594851,6.518850,5.755815,6.297047,5.048297
C381,3.688525,4.120185,3.271806,4.812186,3.232061,4.592935,4.371543,3.700115,4.020917,3.146262,...,4.113470,4.251618,3.829716,3.386863,3.786383,4.274049,3.782090,3.713951,4.112786,4.130942


In [74]:
#sort them in order
shuffled_spatial_information_df.values.sort()
shuffled_spatial_information_df

Unnamed: 0,0,1,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,...,0.9,0.10,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18
C000,2.655833,2.655833,2.850235,2.905833,2.947610,2.949726,2.953091,2.994869,2.994869,2.994869,...,5.929814,5.929814,5.953091,6.048297,6.068912,6.071574,6.071574,6.175333,6.217815,6.444537
C001,2.432150,2.486674,2.504057,2.545965,2.588710,2.625715,2.631689,2.642832,2.657485,2.663833,...,4.796026,4.807806,4.809327,4.820301,4.862107,4.976460,4.984873,5.000735,5.035751,5.135178
C002,2.220807,2.246787,2.252968,2.286358,2.292485,2.344473,2.367060,2.397469,2.417597,2.424259,...,4.310104,4.313906,4.320449,4.328985,4.339330,4.345123,4.357655,4.359822,4.479667,4.505091
C004,2.353116,2.418134,2.474894,2.557924,2.572970,2.581281,2.596843,2.617036,2.634700,2.653219,...,4.930470,4.938883,4.938883,4.938883,4.946332,4.963158,4.989761,5.044164,5.097602,5.099673
C005,3.555850,3.555850,3.555850,3.555850,3.555850,3.555850,3.555850,3.594851,3.594851,3.594851,...,7.548297,7.679814,7.840778,7.840778,7.840778,7.840778,7.840778,7.840778,8.048297,8.048297
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C378,1.493547,1.506449,1.508821,1.512412,1.521268,1.533663,1.539933,1.560076,1.570603,1.571139,...,2.581803,2.582493,2.596983,2.602739,2.625151,2.628368,2.656861,2.662979,2.682801,2.709489
C379,1.700680,1.750142,1.756384,1.779521,1.833429,1.833476,1.844386,1.848650,1.855720,1.857477,...,3.136503,3.167648,3.179145,3.205401,3.209587,3.217717,3.259481,3.311697,3.351886,3.382173
C380,3.555850,3.555850,3.555850,3.555850,3.555850,3.555850,3.555850,3.555850,3.555850,3.555850,...,7.548297,7.679814,7.679814,7.840778,7.840778,7.840778,7.840778,7.840778,7.840778,8.255815
C381,2.584952,2.648037,2.651222,2.661781,2.682702,2.743954,2.762318,2.767137,2.788395,2.793156,...,5.202161,5.248715,5.258531,5.286852,5.312186,5.324988,5.394161,5.399864,5.438000,5.507173


In [75]:
#what is the n% top value for each cell?
special_index=np.floor(No_shuffles*(100-Spat_inf_thresh)/100)
special_index=int(special_index)
spatial_info_value_to_beat=shuffled_spatial_information_df.iloc[:,special_index]
spatial_info_value_to_beat

 C000    5.765976
 C001    4.690745
 C002    4.200227
 C004    4.751761
 C005    7.340778
           ...   
 C378    2.533554
 C379    3.080746
 C380    7.340778
 C381    5.060828
 C382    0.000000
Name: 0, Length: 250, dtype: float64

In [76]:
#calculate difference between spatial info and the top n% value
spatial_analysis=pd.concat([true_spatial_information.rename('Spatial info'), spatial_info_value_to_beat.rename('Shuffled spatial info')], axis=1)
spatial_analysis['Difference']=spatial_analysis['Spatial info']-spatial_analysis['Shuffled spatial info']
spatial_analysis

Unnamed: 0,Spatial info,Shuffled spatial info,Difference
C000,3.789253,5.765976,-1.976723
C001,4.587536,4.690745,-0.103209
C002,2.981068,4.200227,-1.219159
C004,3.524623,4.751761,-1.227138
C005,6.679814,7.340778,-0.660964
...,...,...,...
C378,2.025213,2.533554,-0.508342
C379,2.183724,3.080746,-0.897022
C380,6.644619,7.340778,-0.696159
C381,3.846209,5.060828,-1.214619


In [77]:
Eventcount_threthold = total_time#/60#EventCount_Threthold
Eventcount_threthold

45.815154

In [78]:
def place_cell (row):
    if row['Difference']>0:
        return 'Place cell'
    else:
        return 'Not place cell'

In [79]:
spatial_analysis['Place cell?'] = spatial_analysis.apply(place_cell, axis=1)
spatial_analysis=spatial_analysis.drop(['Difference'], axis=1)
spatial_analysis

Unnamed: 0,Spatial info,Shuffled spatial info,Place cell?
C000,3.789253,5.765976,Not place cell
C001,4.587536,4.690745,Not place cell
C002,2.981068,4.200227,Not place cell
C004,3.524623,4.751761,Not place cell
C005,6.679814,7.340778,Not place cell
...,...,...,...
C378,2.025213,2.533554,Not place cell
C379,2.183724,3.080746,Not place cell
C380,6.644619,7.340778,Not place cell
C381,3.846209,5.060828,Not place cell


In [80]:
#write to file
spatial_analysis.to_csv(f'{newpath}/{testName}D2R2_place_cell_identity.csv', index=True)

# From here Session 5

In [81]:
#read in the combined Ca imaging and behavioural data for one session, delete intermdiate data, rename speed column

interpolated_data_for_session=pd.read_csv(os.path.join(path, Ca_data5))
interpolated_data_for_session=interpolated_data_for_session.drop(['Unnamed: 0','Interpolated speed', 'x-velocity','y-velocity'], axis=1)
interpolated_data_for_session=interpolated_data_for_session.rename({'Calculated speed': 'Speed'}, axis=1)
interpolated_data_for_session

NameError: name 'Ca_data5' is not defined

In [None]:
#prune data
if pruning==1:
    interpolated_data_for_session=prune(interpolated_data_for_session)
interpolated_data_for_session    

In [None]:
N_cells=len(interpolated_data_for_session.columns)-5
N_cells

In [None]:
#calculate the overall occupancy per bin
occupancy_time = pd.DataFrame(np.bincount(interpolated_data_for_session['Grid no.'], minlength = 257),columns = ['Freq visit'] )
occupancy_time['Duration'] = occupancy_time*Delta_time
occupancy_time=occupancy_time.drop([0])
occupancy_time=occupancy_time.drop(['Freq visit'], axis=1)
total_time = occupancy_time['Duration'].sum()
bin_probability = occupancy_time['Duration'].div(total_time)

occupancy_time

In [None]:
#calculate activity map and combine with duration data
cell_activations_per_bin=activity_map(interpolated_data_for_session)

In [None]:
#zero rows with occupancy numbers less than threshold
n=0
limit=len(cell_activations_per_bin.index)

for i in range (0, limit):
    if occupancy_time.iloc[i,0]<threshold_time:
        n=n+1
        for j in range (0, N_cells):
            cell_activations_per_bin.iloc[i,j]=0

print(n)


In [None]:
true_bin_data=pd.concat([occupancy_time, cell_activations_per_bin], axis=1).fillna(0)
true_bin_data

In [None]:
#calculate the true spatial infomation for the cells
true_spatial_information=spatial_info(true_bin_data)
true_spatial_information

In [None]:
#Next, the spatial information for the shuffles is calculated

shuffled_raw_data=shuffle(interpolated_data_for_session)
if pruning==1:
    shuffled_raw_data=prune(shuffled_raw_data)
shuffled_activations_per_bin=activity_map(shuffled_raw_data)
shuffled_bin_data=pd.concat([occupancy_time, shuffled_activations_per_bin], axis=1).fillna(0)
shuffled_spatial_information=spatial_info(shuffled_bin_data)

shuffled_spatial_information_df=shuffled_spatial_information

shuffled_spatial_information_df

In [None]:
for i in range (No_shuffles):
    shuffled_raw_data=shuffle(interpolated_data_for_session)
    if pruning==1:
        shuffled_raw_data=prune(shuffled_raw_data)
    shuffled_activations_per_bin=activity_map(shuffled_raw_data)
    shuffled_bin_data=pd.concat([occupancy_time, shuffled_activations_per_bin], axis=1).fillna(0)
    shuffled_spatial_information=spatial_info(shuffled_bin_data)
    
    shuffled_spatial_information_df=pd.concat([shuffled_spatial_information_df, shuffled_spatial_information], axis=1)

shuffled_spatial_information_df

In [None]:
#sort them in order
shuffled_spatial_information_df.values.sort()
shuffled_spatial_information_df

In [None]:
#what is the n% top value for each cell?
special_index=np.floor(No_shuffles*(100-Spat_inf_thresh)/100)
special_index=int(special_index)
spatial_info_value_to_beat=shuffled_spatial_information_df.iloc[:,special_index]
spatial_info_value_to_beat

In [None]:
#calculate difference between spatial info and the top n% value
spatial_analysis=pd.concat([true_spatial_information.rename('Spatial info'), spatial_info_value_to_beat.rename('Shuffled spatial info')], axis=1)
spatial_analysis['Difference']=spatial_analysis['Spatial info']-spatial_analysis['Shuffled spatial info']
spatial_analysis

In [None]:
Eventcount_threthold = total_time#/60#EventCount_Threthold
Eventcount_threthold

In [None]:
def place_cell (row):
    if row['Difference']>0:
        return 'Place cell'
    else:
        return 'Not place cell'

In [None]:
spatial_analysis['Place cell?'] = spatial_analysis.apply(place_cell, axis=1)
spatial_analysis=spatial_analysis.drop(['Difference'], axis=1)
spatial_analysis

In [None]:
#write to file
spatial_analysis.to_csv(f'{newpath}/{testName}D3R1_place_cell_identity.csv', index=True)

# From here session 6

In [None]:
#read in the combined Ca imaging and behavioural data for one session, delete intermdiate data, rename speed column

interpolated_data_for_session=pd.read_csv(os.path.join(path, Ca_data6))
interpolated_data_for_session=interpolated_data_for_session.drop(['Unnamed: 0','Interpolated speed', 'x-velocity','y-velocity'], axis=1)
interpolated_data_for_session=interpolated_data_for_session.rename({'Calculated speed': 'Speed'}, axis=1)
interpolated_data_for_session

In [None]:
#prune data
if pruning==1:
    interpolated_data_for_session=prune(interpolated_data_for_session)
interpolated_data_for_session 

In [None]:
N_cells=len(interpolated_data_for_session.columns)-5
N_cells

In [None]:
#calculate the overall occupancy per bin
occupancy_time = pd.DataFrame(np.bincount(interpolated_data_for_session['Grid no.'], minlength = 257),columns = ['Freq visit'] )
occupancy_time['Duration'] = occupancy_time*Delta_time
occupancy_time=occupancy_time.drop([0])
occupancy_time=occupancy_time.drop(['Freq visit'], axis=1)
total_time = occupancy_time['Duration'].sum()
bin_probability = occupancy_time['Duration'].div(total_time)

occupancy_time

In [None]:
#calculate activity map and combine with duration data
cell_activations_per_bin=activity_map(interpolated_data_for_session)

In [None]:
#zero rows with occupancy numbers less than threshold
n=0
limit=len(cell_activations_per_bin.index)

for i in range (0, limit):
    if occupancy_time.iloc[i,0]<threshold_time:
        n=n+1
        for j in range (0, N_cells):
            cell_activations_per_bin.iloc[i,j]=0

print(n)


In [None]:
true_bin_data=pd.concat([occupancy_time, cell_activations_per_bin], axis=1).fillna(0)
true_bin_data

In [None]:
#calculate the true spatial infomation for the cells
true_spatial_information=spatial_info(true_bin_data)
true_spatial_information

In [None]:
#Next, the spatial information for the shuffles is calculated

shuffled_raw_data=shuffle(interpolated_data_for_session)
if pruning==1:
    shuffled_raw_data=prune(shuffled_raw_data)
shuffled_activations_per_bin=activity_map(shuffled_raw_data)
shuffled_bin_data=pd.concat([occupancy_time, shuffled_activations_per_bin], axis=1).fillna(0)
shuffled_spatial_information=spatial_info(shuffled_bin_data)

shuffled_spatial_information_df=shuffled_spatial_information

shuffled_spatial_information_df

In [None]:
for i in range (No_shuffles):
    shuffled_raw_data=shuffle(interpolated_data_for_session)
    if pruning==1:
        shuffled_raw_data=prune(shuffled_raw_data)
    shuffled_activations_per_bin=activity_map(shuffled_raw_data)
    shuffled_bin_data=pd.concat([occupancy_time, shuffled_activations_per_bin], axis=1).fillna(0)
    shuffled_spatial_information=spatial_info(shuffled_bin_data)
    
    shuffled_spatial_information_df=pd.concat([shuffled_spatial_information_df, shuffled_spatial_information], axis=1)

shuffled_spatial_information_df

In [None]:
#sort them in order
shuffled_spatial_information_df.values.sort()
shuffled_spatial_information_df

In [None]:
#what is the n% top value for each cell?
special_index=np.floor(No_shuffles*(100-Spat_inf_thresh)/100)
special_index=int(special_index)
spatial_info_value_to_beat=shuffled_spatial_information_df.iloc[:,special_index]
spatial_info_value_to_beat

In [None]:
#calculate difference between spatial info and the top n% value
spatial_analysis=pd.concat([true_spatial_information.rename('Spatial info'), spatial_info_value_to_beat.rename('Shuffled spatial info')], axis=1)
spatial_analysis['Difference']=spatial_analysis['Spatial info']-spatial_analysis['Shuffled spatial info']
spatial_analysis

In [None]:
Eventcount_threthold = total_time#/60#EventCount_Threthold
Eventcount_threthold

In [None]:
def place_cell (row):
    if row['Difference']>0:
        return 'Place cell'
    else:
        return 'Not place cell'

In [None]:
spatial_analysis['Place cell?'] = spatial_analysis.apply(place_cell, axis=1)
spatial_analysis=spatial_analysis.drop(['Difference'], axis=1)
spatial_analysis

In [None]:
#write to file
spatial_analysis.to_csv(f'{newpath}/{testName}D3R2_place_cell_identity.csv', index=True)