### Perform autocorrelation analysis of neurons across worms and datasets
---
@anshul Please provide a descriptive header title and then a short paragraph description of every notebook you write.

---
To study the auto-correlation and cross-correlation structure of different neurons. When do these fall to zero?

Temporal Structure Analysis and Request for Guidance: While we acknowledge the potential insights that could be derived from studying the auto-correlation and cross-correlation structure of different neurons, we face certain challenges with our dataset that make the execution of this analysis complex.

Variability Across Subjects: The measured neurons vary from worm to worm, which means standardizing a correlation analysis across multiple datasets is not straightforward. If in a particular worm there are $k$ measured/labelled neurons (out of the possible $300$), we would be conducting $k$ autocorrelations and $k(k-1)$ cross-correlations for that worm, but the comparison across different worms and datasets becomes less clear.

Aggregation of Analysis: Aggregating this data meaningfully poses a significant challenge. We are considering focusing on common neurons measured across worms and applying statistical methods to manage missing data.

The cross-correlation and auto-correlation need only be computed for a relatively short number of lags, since long lags become more independent as behavior is not typically periodic. While I agree that this would require some careful bookkeeping, it is straightforward to script. Estimates of the correlation functions themselves would be computed within a worm recording for all pairs (including self). Then when you are getting average across worm recording dataset you may not have all pairs, but simply average what is available. Of course the number will vary by neuron-to-neuron pair but that is fine.

---

*Last updated: 1 July 2024*

In [None]:
import os
import shutil
import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm
from utils import NEURON_LABELS
from data._utils import pick_worm, load_dataset

In [None]:
def correlate_with_specific_lag(f, g, lag):
    g = np.pad(g[lag:], (0, lag), 'constant')
    if len(f) > len(g):
        g = np.pad(g[lag:], (0, lag+(len(f)-len(g))), 'constant')
    else:
        f = np.pad(f, (0, len(g)-len(f)), 'constant')
    return np.sum(np.multiply(f, g))

In [None]:
def cross_correlate(X1, X2, lag_limit):
    vals = []
    for i in range(lag_limit):
        vals += [correlate_with_specific_lag(X1, X2, i)]

    arr = np.array(vals)
    final_arr = arr / np.max(np.abs(arr))
    
    return final_arr

In [None]:
def save_crosscorrelation(X, worm_idx=0, lag_limit=100, dataset="", mask=None):
    """
    Plot the crosscorrelation for each neuron's trajectory.

    Parameters:
    - X: A 2D numpy array of shape (max_timesteps, num_neurons) containing the neural trajectory data.
    - neurons: A list or array containing the neuron identifiers.

    Returns:
    - None: The function creates and displays a plot.
    """
    
    worm_corr_data = np.zeros((len(mask), len(mask), lag_limit))

    # Iterate over the number of neurons to create individual plots
    for i in tqdm(range(len(mask))):
        for j in range(len(mask)):
            if mask[i] == True and mask[j] == True:
                # 0 -> lag_limit
                corr = cross_correlate(X[:, i], X[:, j], lag_limit)
                
                worm_corr_data[i, j] = corr
            else:
                worm_corr_data[i, j] = np.full((lag_limit,), np.NaN)
    
    if not os.path.exists(f"../analysis/figures/corr_data"):
        os.makedirs(f"../analysis/figures/corr_data")
                            
    np.save('../analysis/figures/corr_data/worm_' + str(dataset) + "_" + str(worm_idx) + '.npy', worm_corr_data)

In [None]:
dataset_name = "Kato2015"
Kato2015 = load_dataset(dataset_name)

dataset_name = "Nichols2017"
Nichols2017 = load_dataset(dataset_name)

dataset_name = "Skora2018"
Skora2018 = load_dataset(dataset_name)

dataset_name = "Kaplan2020"
Kaplan2020 = load_dataset(dataset_name)

dataset_name = "Yemini2021"
Yemini2021 = load_dataset(dataset_name)

dataset_name = "Uzel2022"
Uzel2022 = load_dataset(dataset_name)

dataset_name = "Lin2023"
Lin2023 = load_dataset(dataset_name)

dataset_name = "Leifer2023"
Leifer2023 = load_dataset(dataset_name)

dataset_name = "Flavell2023"
Flavell2023 = load_dataset(dataset_name)

datasets = [Kato2015, Nichols2017, Skora2018, Kaplan2020, Yemini2021, Uzel2022, Lin2023, Leifer2023, Flavell2023]

for i, dataset in enumerate(datasets):
        if i >= -1:
                worms = list(dataset.keys())   
                for idx in tqdm(range(len(worms))):
                        if i != 6 or (i == 6 and idx > 443):
                                worm = worms[idx]

                                single_worm_dataset = pick_worm(dataset, worm)

                                data = single_worm_dataset["calcium_data"]
                                mask = single_worm_dataset["named_neurons_mask"]
                                neurons = sorted(single_worm_dataset["named_neuron_to_slot"])

                                # X = data[:, mask].numpy()
                                X = data.numpy()

                                # plot autocorrelation and partial autocorrelation
                                save_crosscorrelation(X, idx, 100, i, mask)

# Produce figures from data

In [None]:
# collect all data
print("Collecting data...")

worm_files = sorted(os.listdir("../analysis/figures/corr_data"))
print(worm_files)
seperated_files = [[]]

curr_set = 0

datasets = ["Kato2015", "Nichols2017", "Skora2018", "Kaplan2020", "Yemini2021", "Uzel2022", "Lin2023", "Leifer2023", "Flavell2023"]

if not os.path.exists(f"../analysis/figures/analysis/figures/corr_figs"):
    os.makedirs(f"../analysis/figures/corr_figs")
                            
for file in worm_files:
    file_set = int(file.split("_")[1])
    if file_set != curr_set:
        curr_set = file_set
        seperated_files += [[file]]
    else:
        seperated_files[-1] += [file]

for set_idx, files in enumerate(seperated_files):
    if set_idx != 6 and set_idx > 0:
        all_data = np.empty((len(files), 300, 300, 100))
        for i, file in tqdm(enumerate(files)):
            all_data[i] = np.load(f'../analysis/figures/corr_data/{file}')

        # filter data
        # length of filtered data list is 300 neurons*300 neurons
        print("Filtering data...")

        filtered_data = [None for i in range(300**2)]

        for i in tqdm(range(all_data.shape[1])):
            for j in range(all_data.shape[2]):
                for worm_idx in range(all_data.shape[0]):
                    if filtered_data[i*all_data.shape[1]+j] is None and not np.any(np.isnan(all_data[worm_idx, i, j])):
                        filtered_data[i*all_data.shape[1]+j] = [all_data[worm_idx, i, j]]
                    elif not np.any(np.isnan(all_data[worm_idx, i, j])):
                        filtered_data[i*all_data.shape[1]+j] += [all_data[worm_idx, i, j]]

        for i in range(len(filtered_data)):
            if filtered_data[i] is not None:
                filtered_data[i] = np.array(filtered_data[i])

        # generate plots 
        print("Generating plots...")
            
        for i, neuron1 in enumerate(NEURON_LABELS):
            if i > -1:
                for j, neuron2 in tqdm(enumerate(NEURON_LABELS)):
                    if filtered_data[i*len(NEURON_LABELS)+j] is not None:
                        plt.figure()
                        plt.ylim(-1, 1)
                        
                        plt.title(f"Dataset {datasets[set_idx]}: {neuron1}_{neuron2}")
                        plt.xlabel("Lag")
                        plt.ylabel("Correlation Score")
                        
                        mean = np.mean(filtered_data[i*len(NEURON_LABELS)+j], axis=0)
                        std = np.std(filtered_data[i*len(NEURON_LABELS)+j], axis=0)

                        plt.plot(np.arange(0, 100, 1), mean, linewidth=5.0, alpha=1.0, color="cornflowerblue")

                        for k in filtered_data[i*len(NEURON_LABELS)+j]:
                            plt.plot(np.arange(0, 100, 1), k, alpha=0.3, color="cornflowerblue")

                        z=1
                        plt.fill_between(np.arange(0, 100, 1), mean-(std*z), mean+(std*z), alpha=0.4)  
                        
                        if not os.path.exists(f"../analysis/figures/corr_figs/{set_idx}"):
                            os.makedirs(f"../analysis/figures/corr_figs/{set_idx}")
                            
                        if not os.path.exists(f"../analysis/figures/corr_figs/{set_idx}/{neuron1}"):
                            os.makedirs(f"../analysis/figures/corr_figs/{set_idx}/{neuron1}")
                            
                        plt.savefig(f"../analysis/figures/corr_figs/{set_idx}/{neuron1}/{neuron1}_{neuron2}.png")
                        plt.clf()
                        

# Extract Autocorr

In [None]:
dirs = os.listdir("../analysis/figures/corr_figs")
file_paths = []

for directory in dirs:
    subdirs = os.listdir("../analysis/figures/corr_figs/" + directory)
    for subdir in subdirs:
        file_paths += [f"../analysis/figures/corr_figs/{directory}/{subdir}/{subdir}_{subdir}.png"]
file_paths

In [None]:
if not os.path.exists(f"../analysis/figures/autocorr_figs"):
    os.makedirs(f"../analysis/figures/autocorr_figs")
                            
for file in file_paths:
    split_path = file.split("/")
    dataset = split_path[2]
    if not os.path.exists(f"../analysis/figures/autocorr_figs/{dataset}"):
        os.makedirs(f"../analysis/figures/autocorr_figs/{dataset}")
    shutil.copyfile(file, f"{split_path[0]}/autocorr_figs/{split_path[2]}/{split_path[-1]}")