In [None]:
#The Ca imaging data and behavioural data are captured at differnt rates
#Consequently, the data need to be aligned, and missing values interpolated
#This program takes the raw Ca and behavioural data for a single session 
#It interpolates behavioural data values for the session at the Ca imaging times

In [1]:
#importing libraries
import pandas as pd
import numpy as np
import os
from scipy.interpolate import interp1d 
import math

In [2]:
#define variables

#Start and end points of the Ca imaging data (Excel cell index numbers)
Ca_start=2
Ca_end=24008

In [3]:
#define filenames
#these are the folder paths
path='/Users/XXX'
folder='XX'
#output_folder='ProcessedData'

#raw beahvioural data filename
raw_behavioural_one= 'Behavioralfileshere.xlsx'

#raw calcium data filename
raw_Ca='CaFileshere.csv'

#out file filename
output_file='XXXXX_interpolated_data.csv'


In [4]:
#read in raw behavioural data for  session 

raw_behavioural_data_raw=pd.read_excel(os.path.join(path, folder, raw_behavioural_one),
                                               skiprows=[1])

29979

In [5]:
#clean up  data
raw_behavioural_data=raw_behavioural_data_raw.replace(['-'],'0') #replaces - with 0 in dataframe to correct missing speeds

#check length is as expected
len(raw_behavioural_data.index)

29979

In [6]:
#read in Ca data for session
#correct start and end point to account for different indexing
start_session=Ca_start-1
end_session=Ca_end-1

#read in Ca data and reindex
raw_calcium_data=pd.read_csv(os.path.join(path, folder, raw_Ca), 
                                         nrows=end_session)
raw_calcium_data=raw_calcium_data.drop(raw_calcium_data.index[0:start_session-1])
raw_calcium_data=raw_calcium_data.reset_index(drop=True)

#check length of Ca data is as expected
len(raw_calcium_data.index)

In [7]:
#align start times (by subtracting the initial time of the Ca imaging data from its Trial times)
t_start=raw_calcium_data.iloc[0]['Time (s)'] #get start time of session
raw_calcium_data['Time (s)']-=t_start        #subtract start time from all times so the session starts from t=0

#check start time to make sure it matches raw data file
t_start

0.0

In [8]:
#check raw Ca data looks as expected
raw_calcium_data

Unnamed: 0,Time (s),C000,C001,C002,C003,C004,C005,C006,C007,C008,...,C130,C131,C132,C133,C134,C135,C136,C137,C138,C139
0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.049962,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.099924,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.149886,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.199848,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24002,1199.187924,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
24003,1199.237886,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
24004,1199.287848,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
24005,1199.337810,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
#check raw behavioural data looks as expected
raw_behavioural_data

Unnamed: 0,Trial time,Recording time,X center,Y center,Area,Areachange,Elongation,Distance moved,Velocity,Mobility,...,In zone(S 248 / Center-point),In zone(S 249 / Center-point),In zone(S 250 / Center-point),In zone(S 251 / Center-point),In zone(S 252 / Center-point),In zone(S 253 / Center-point),In zone(S 254 / Center-point),In zone(S 255 / Center-point),In zone(S 256 / Center-point),MC6
0,0.000,0.000,0.186657,0.167842,0.001785,0,0.707211,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,0.040,0.040,0.186688,0.167665,0.001797,0,0,0.00018,0.004488,0,...,0,0,0,0,0,0,0,0,0,1
2,0.080,0.080,0.186719,0.167488,0.001809,0.000086,0.703982,0.00018,0.004488,2.39589,...,0,0,0,0,0,0,0,0,0,1
3,0.120,0.120,0.187034,0.167253,0.001813,0.000093,0.707035,0.000393,0.009816,2.48246,...,0,0,0,0,0,0,0,0,0,1
4,0.160,0.160,0.186988,0.166903,0.001819,0.0001,0.704697,0.000353,0.008815,2.57582,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29974,1199.847,1199.847,-0.141008,0.064072,0.002020,0.000328,0.754188,0.002514,0.062806,9.20086,...,0,0,0,0,0,0,0,0,0,1
29975,1199.887,1199.887,-0.141589,0.066950,0.002052,0.000339,0.770759,0.002937,0.073366,9.68877,...,0,0,0,0,0,0,0,0,0,1
29976,1199.927,1199.927,-0.141830,0.069697,0.002040,0.000331,0.762886,0.002757,0.068868,9.14136,...,0,0,0,0,0,0,0,0,0,1
29977,1199.967,1199.967,-0.141975,0.072597,0.002029,0.000407,0.748793,0.002904,0.072554,8.84052,...,0,0,0,0,0,0,0,0,0,1


In [10]:
#adjustments to make sure the Ca imaging data ends before the behavioural data for interolation
behavioural_end_time=raw_behavioural_data['Trial time'].max()
raw_calcium_data=raw_calcium_data.drop(raw_calcium_data[raw_calcium_data['Time (s)']>behavioural_end_time].index)
length_Ca_data=len(raw_calcium_data)

#check length of updated Ca data
length_Ca_data

24007

In [11]:
#determined timestep in behavioural data
behavioural_length=len(raw_behavioural_data.index)
Delta_t_behavioural=(raw_behavioural_data['Trial time'][behavioural_length-1])/behavioural_length
Delta_t_behavioural

0.04002825311051069

In [12]:
#get the grid cell data from the raw beahvioural data
grid_data_raw=raw_behavioural_data.drop(['Trial time','Recording time', 'X center','Y center', 'Area', 'Areachange', 'Elongation', 'Distance moved', 'Velocity', 'Mobility'], axis=1)
grid_data_raw=grid_data_raw.drop(['Mobility state(Mobile)','Mobility state(Immobile)', 'Movement(Moving / Center-point)', 'Movement(Not Moving / Center-point)'], axis=1)
grid_data_raw = grid_data_raw.iloc[: , :-1]
#grid_data_raw=grid_data_raw.drop(['MC4'], axis=1)
grid_data_raw=grid_data_raw.reset_index(drop=True).T.reset_index(drop=True).T

#check raw grid data is as expected
grid_data_raw

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,246,247,248,249,250,251,252,253,254,255
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29974,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29975,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29976,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29977,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [13]:
#get the grid cell at each time from the raw ones and zeros of the behavioural data
grid_data_behavioural=grid_data_raw.idxmax(axis=1)
grid_data_behavioural=grid_data_behavioural.to_frame()
grid_data_behavioural=grid_data_behavioural.rename(columns={0:'Grid no.'})
grid_data_behavioural=grid_data_behavioural+1
grid_data_behavioural

Unnamed: 0,Grid no.
0,32
1,32
2,32
3,32
4,32
...,...
29974,98
29975,82
29976,82
29977,82


In [14]:
#interpolate behavioural data for session (positions, speeds and grid locations)

#set up interpolation functions
behavioural_times=raw_behavioural_data['Trial time'] #extract the behavioural data time points
behavioural_x_positions=raw_behavioural_data['X center'] #extract the behavioural data x-positions points
behavioural_y_positions=raw_behavioural_data['Y center'] #extract the behavioural data y-positions points
behavioural_speeds=raw_behavioural_data['Velocity'] #extract the behavioural data y-positions points

x_center_interp = interp1d(behavioural_times, behavioural_x_positions) #define x interploation function
y_center_interp = interp1d(behavioural_times, behavioural_y_positions) #define y interpolation function 
speed_interp= interp1d(behavioural_times, behavioural_speeds) #define speed interpolation function

#create empty lists for x and y positions, speed and grid no.

time=[]
x_positions=[]
y_positions=[]
speed=[]
grid=[]

#interpolate the x and y positions and speeds at the Ca imaging times and determine gridcells at those times
for i in range(length_Ca_data):
    interpolation_time=raw_calcium_data['Time (s)'][i] #get the time for this iteration
    
    #interpolate values
    interpolated_x_value=x_center_interp(interpolation_time) #interpolate the x value
    interpolated_y_value=y_center_interp(interpolation_time) #interpolate the y value
    interpolated_speed_value=speed_interp(interpolation_time)#interpolate the speed value
    
    #calculate gridcell
    grid_index=abs(raw_behavioural_data['Trial time'] - interpolation_time).idxmin()
    grid_square=grid_data_behavioural['Grid no.'][grid_index]
        
    #update lists
    time.append(interpolation_time)          #add time to timelist
    x_positions.append(interpolated_x_value) #add interpolated x values to x list
    y_positions.append(interpolated_y_value) #add interpolated y values to y list
    speed.append(interpolated_speed_value)   #add interpolated y values to y list
    grid.append(grid_square)                 #add grid no. to grid list

#check length of data is as expected    
len(grid)

24007

In [15]:
#calculate the speed from the interpolated x and y positions for session
#More accurate than interpolateed speed above

#define time between samples (i.e. the time between each Ca imaging sample)
delta_time=time[1]

#create list with speed for t=0
initial_x_velocity=(x_positions[1]-x_positions[0])/(delta_time)
initial_y_velocity=(y_positions[1]-y_positions[0])/(delta_time)
initial_speed=math.sqrt(initial_x_velocity*initial_x_velocity+initial_y_velocity*initial_y_velocity)

calculated_speed=[initial_speed]
x_velocity=[initial_x_velocity]
y_velocity=[initial_y_velocity]

#calculate speeds up to penultimate time step
for i in range(1, length_Ca_data-1):
    
    #calculate x and y velocities
    velocity_x=(x_positions[i+1]-x_positions[i-1])/(2*delta_time)
    velocity_y=(y_positions[i+1]-y_positions[i-1])/(2*delta_time)
    
    #calculate speed
    mod_velocity=math.sqrt(velocity_x*velocity_x+velocity_y*velocity_y)
    
    #update lists
    x_velocity.append(velocity_x)
    y_velocity.append(velocity_y)
    calculated_speed.append(mod_velocity)

#calculate final speed
i=length_Ca_data-1
velocity_x=(x_positions[i]-x_positions[i-1])/(delta_time)
velocity_y=(y_positions[i]-y_positions[i-1])/(delta_time)
mod_velocity=math.sqrt(velocity_x*velocity_x+velocity_y*velocity_y)
x_velocity.append(velocity_x)
y_velocity.append(velocity_y)
calculated_speed.append(mod_velocity)

#check length of speed list
len(calculated_speed)   

24007

In [16]:
#combines the interpolated data and the raw Ca Imaging data for session and writes to file

#create a dataframe of the interpolated poistions from the lists

interpolated_positions=pd.DataFrame(list(zip(time, x_positions, y_positions, speed, x_velocity, y_velocity, calculated_speed, grid)),
                 columns = ['Time','X center','Y center', 'Interpolated speed', 'x-velocity', 'y-velocity', 'Calculated speed', 'Grid no.'])

#combine the interpolated positions, interpolated speed and grid no.s with the Ca image data
interpolated_data=pd.concat([interpolated_positions, raw_calcium_data.drop("Time (s)", axis=1)], axis = 1)

#write to file
interpolated_data.to_csv(os.path.join(path, folder, output_file))


In [17]:
#check interpolated dataframe is as expected
interpolated_data

Unnamed: 0,Time,X center,Y center,Interpolated speed,x-velocity,y-velocity,Calculated speed,Grid no.,C000,C001,...,C130,C131,C132,C133,C134,C135,C136,C137,C138,C139
0,0.000000,0.186657,0.167842,0.0,0.000775,-0.004425,0.004492,32,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.049962,0.18669572055,0.16762091815,0.004488262604499999,0.002191,-0.004714,0.005198,32,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.099924,0.1868759015,0.1673709465,0.007142026914,0.003041,-0.006299,0.006995,32,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.149886,0.18699963109999998,0.1669914975,0.009068424580499999,-0.000433,-0.009628,0.009638,32,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.199848,0.1868325928,0.1664088848,0.012965170681999998,-0.011610,-0.022244,0.025092,32,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24002,1199.187924,-0.13184878959999777,0.04628499650000315,0.08603648287997634,0.050996,0.062064,0.080328,99,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
24003,1199.237886,-0.12976101570000761,0.04931007145998878,0.07263441715002703,0.042090,0.051776,0.066726,99,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
24004,1199.287848,-0.12764301640000378,0.05145869567999987,0.058425591520044,0.038970,0.022924,0.045212,99,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
24005,1199.337810,-0.12586698849999994,0.051600704300000015,0.034847524349999014,0.020525,0.006480,0.021524,99,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
