In [None]:
#This program calculates the mean population vector correlations between sessions across all mice
#It also compares the mean population vector correlations to shuffled populatin vector correlations
#The output is a comparison of the population vector correlations to a threshold value of the shuffled correlations (the 95% value)

In [None]:
#importing libraries
import pandas as pd
import glob
import os
import numpy as np
from scipy import signal
import math

#To display all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_column', None)

#this is the location of the csv files for the output
out_path=r'/Users/your_output_path_here'
newpath = out_path + r'/99%'
if not os.path.exists(newpath):
    os.makedirs(newpath)

In [None]:
#define number of bins/gridcells
Grid_dimension=16
N_bins=Grid_dimension*Grid_dimension

#define number of shuffles for the comparison
No_shuffles=1000

In [None]:
path_1 = r'/Users/location_of_session_1_ratemaps' # use your path for session 1 ratemaps

#concatenate files in a folder to get a csv of ratemaps of session 1 with all cells of interst from all mice
all_files_1 = glob.glob(os.path.join(path_1, "*.csv"))

#read in file and remove duplicate column
df_1 = pd.concat((pd.read_csv(f) for f in all_files_1), axis=1, ignore_index=False)
session_1=df_1.drop(['Unnamed: 0'], axis=1)
session_1

In [None]:
path_2 = r'/Users/location_of_session_2_ratemaps' # use your path for session 2 ratemaps

#concatenate files in a folder to get a csv of ratemaps of session 2 with all cells of interst from all mice
all_files_2 = glob.glob(os.path.join(path_2, "*.csv")) 

#read in file and remove duplicate column
session_2 = pd.concat((pd.read_csv(f) for f in all_files_2), axis=1, ignore_index=False)
session_2=session_2.drop(['Unnamed: 0'], axis=1)

In [None]:
path_3 = r'/Users/location_of_session_3_ratemaps' # use your path for session 3 ratemaps

#concatenate files in a folder to get a csv of ratemaps of session 3 with all cells of interst from all mice
all_files_3 = glob.glob(os.path.join(path_3, "*.csv")) 

#read in file and remove duplicate column
session_3 = pd.concat((pd.read_csv(f) for f in all_files_3), axis=1, ignore_index=False)
session_3=session_3.drop(['Unnamed: 0'], axis=1)

In [None]:
path_4 = r'/Users//location_of_session_4_ratemaps' # use your path for session 4 ratemaps

#concatenate files in a folder to get a csv of ratemaps of session 4 with all cells of interst from all mice
all_files_4 = glob.glob(os.path.join(path_4, "*.csv")) #concatenate files in a folder

#read in file and remove duplicate column
session_4 = pd.concat((pd.read_csv(f) for f in all_files_4), axis=1, ignore_index=False)
session_4=session_4.drop(['Unnamed: 0'], axis=1)

In [None]:
#calculate mean of pop vector correlation between session 1 and 3 across all cells
pop_vector_correlation_1_3=session_1.corrwith(session_3, axis = 1)
pop_vector_correlation_1_3_mean=pop_vector_correlation_1_3.mean()
pop_vector_correlation_1_3_mean

In [None]:
#calculate mean of pop vector correlation between session 2 and 4 across all cells
pop_vector_correlation_2_4=session_2.corrwith(session_4, axis = 1)
pop_vector_correlation_2_4_mean=pop_vector_correlation_2_4.mean()
pop_vector_correlation_2_4_mean

In [None]:
#create dataframe with results of population vector correlations
pop_vector_correlation_means=[pop_vector_correlation_1_3_mean,pop_vector_correlation_2_4_mean]
pop_vector_correlation_means

In [None]:
#this part shuffles the data to determine values to beat for significance

#set up arrays for shuffled correlation values
pop_vector_correlation_1_3_mean_shuffled=np.zeros((No_shuffles))
pop_vector_correlation_2_4_mean_shuffled=np.zeros((No_shuffles))

for i in range (No_shuffles):
    
    #shuffle columns of dataframes and rename columns
    shuffled_data_session_one=session_1.sample(frac=1, axis=1).reset_index(drop=True)
    shuffled_data_session_one.columns = range(shuffled_data_session_one.columns.size)
    
    shuffled_data_session_two=session_2.sample(frac=1, axis=1).reset_index(drop=True)
    shuffled_data_session_two.columns = range(shuffled_data_session_two.columns.size)
    
    shuffled_data_session_three=session_3.sample(frac=1, axis=1).reset_index(drop=True)
    shuffled_data_session_three.columns = range(shuffled_data_session_three.columns.size)
    
    shuffled_data_session_four=session_4.sample(frac=1, axis=1).reset_index(drop=True)
    shuffled_data_session_four.columns = range(shuffled_data_session_four.columns.size)
    
    #calculate correlations between shuffled population vectors
    pop_vector_correlation_1_3_shuffled=shuffled_data_session_one.corrwith(shuffled_data_session_three, axis = 1)
    pop_vector_correlation_2_4_shuffled=shuffled_data_session_two.corrwith(shuffled_data_session_four, axis = 1)
    
    #average correlations across all cells
    shuffled_pop_vector_correlation_1_3_mean=pop_vector_correlation_1_3_shuffled.mean()
    shuffled_pop_vector_correlation_2_4_mean=pop_vector_correlation_2_4_shuffled.mean()
    
    #write averages to arrays
    pop_vector_correlation_1_3_mean_shuffled[i]=pop_vector_correlation_1_3_shuffled.mean()
    pop_vector_correlation_2_4_mean_shuffled[i]=pop_vector_correlation_2_4_shuffled.mean()


In [None]:
#convert arrays of shuffled correlation values to dataframes
pv_1_3_mean_shuffled = pd.DataFrame(pop_vector_correlation_1_3_mean_shuffled)
pv_2_4_mean_shuffled = pd.DataFrame(pop_vector_correlation_2_4_mean_shuffled)

In [None]:
#write shuffled correlation values to files for plotting
pv_1_3_mean_shuffled.to_csv(f'{newpath}/PV_shuffles1_3.csv', index=True)
pv_2_4_mean_shuffled.to_csv(f'{newpath}/PV_shuffles2_4.csv', index=True)

In [None]:
#get statistics for shuffled values using .describe
des_pv_1_3_mean_shuffled = pv_1_3_mean_shuffled.describe()
des_pv_2_4_mean_shuffled = pv_2_4_mean_shuffled.describe()

In [None]:
#rename columns of description dataframes
des_pv_1_3_mean_shuffled = des_pv_1_3_mean_shuffled.rename(columns={0: '1-3'})
des_pv_2_4_mean_shuffled = des_pv_2_4_mean_shuffled.rename(columns={0: '2-4'})

In [None]:
#combine statistics of shuffled values and write to file
shuffle_description = pd.concat([des_pv_1_3_mean_shuffled, des_pv_2_4_mean_shuffled], axis=1)

shuffle_description
shuffle_description.to_csv(f'{newpath}/PVshuffle_description_99%.csv', index=True)

In [None]:
#sort shuffled population vector correlations in value order
pop_vector_correlation_1_3_mean_shuffled=np.sort(pop_vector_correlation_1_3_mean_shuffled)
pop_vector_correlation_2_4_mean_shuffled=np.sort(pop_vector_correlation_2_4_mean_shuffled)

#determine number to beat (i.e., the shuffled vale the true vaue has to beat to be better than 95% of shuffles)
beat_index=(99*No_shuffles)//100 #index of top 1% value
pop_vector_correlation_means_to_beat=[pop_vector_correlation_1_3_mean_shuffled[beat_index],
                                     pop_vector_correlation_2_4_mean_shuffled[beat_index]]

pop_vector_correlation_means_to_beat

In [None]:
#create output dataframe of real correlation values and shuffled correlation values

PV = pd.DataFrame([pop_vector_correlation_means, pop_vector_correlation_means_to_beat], 
                  columns=["1-3", "2-4"], 
                  index=["Correlation mean", "Shuffled correlation mean"])
PV

In [None]:
#Write PV to file 
PV.to_csv(f'{newpath}/PV.csv', index=True)