In [13]:
from vit_pipeline.utils import make_container_dict
from dotenv import load_dotenv
import numpy as np
import pickle
from pathlib import Path
from allensdk.core.brain_observatory_cache import BrainObservatoryCache
import os

# Load environment variables
load_dotenv()

# Set up paths
allen_cache_path = os.environ.get('HGMS_ALLEN_CACHE_PATH')
boc = BrainObservatoryCache(manifest_file=str(Path(allen_cache_path) / Path('brain_observatory_manifest.json')))

# Load experiment containers and session data
experiment_containers = make_container_dict(boc)
session_A = experiment_containers[643061996]['three_session_A']
data_set_events = boc.get_ophys_experiment_events(session_A)

dat = boc.get_ophys_experiment_data(session_A)
stim_table = dat.get_stimulus_table('natural_movie_one')

# Load the transformer embeddings
transformer_embedding_path = Path("/home/maria/Documents/HuggingMouseData/TransformerEmbeddings/google_vit-base-patch16-224_embeddings.pkl")
with open(transformer_embedding_path, 'rb') as file:
    transfr = pickle.load(file)

embedding = transfr['natural_movie_one']  # Shape: (total_time_points, embedding_dim)
embedding_dim = embedding.shape[1]

# Get trial-specific stimulus times
trial_starts = stim_table['start'].values  # Start times for each trial
num_trials = len(trial_starts)  # Total number of trials

# Extract neural events (all cells, all trials)
all_cells = data_set_events[:, trial_starts]  # Shape: (num_cells, num_trials)

def get_neuron_trial_embeddings(neuron_index, all_cells, embedding, num_trials, trial_length=900):
    """
    Compute the trial-wise embeddings for a given neuron.
    
    Args:
        neuron_index (int): Index of the neuron to analyze.
        all_cells (np.ndarray): Neural event data (num_cells x total_time_points).
        embedding (np.ndarray): Transformer embeddings (total_time_points x embedding_dim).
        num_trials (int): Total number of trials.
        trial_length (int): Number of time points per trial (default: 900).
        
    Returns:
        np.ndarray: A (num_trials x embedding_dim) array where each row corresponds to
                    the filtered embedding for a trial for the specified neuron.
    """
    embedding_dim = embedding.shape[1]  # Dimensionality of the embeddings
    
    # Initialize an array to store the filtered embeddings for each trial
    trial_embeddings = np.zeros((num_trials, embedding_dim))
    
    for trial_idx in range(num_trials):
        # Get the start and end indices for this trial
        trial_start = trial_idx * trial_length
        trial_end = trial_start + trial_length

        

        # Extract the neural activity for this neuron during the trial
        neuron_activity = all_cells[neuron_index, trial_start:trial_end]  # Shape: (900,)

        # Extract the corresponding embeddings for the trial
        trial_embedding = embedding[trial_start:trial_end, :]  # Shape: (900, embedding_dim)

        # Compute the filtered embedding for this trial (weighted sum of embeddings)
        if np.any(neuron_activity > 0):  # Only compute if neuron is active during the trial
            trial_embeddings[trial_idx, :] = neuron_activity @ trial_embedding
        else:
            trial_embeddings[trial_idx, :] = np.zeros(embedding_dim)  # Set to zero if inactive

    return trial_embeddings

# Example usage:
neuron_index = 0  # Specify the neuron index
num_trials = 10  # Assume 10 trials in the data
neuron_trial_embeddings = get_neuron_trial_embeddings(
    neuron_index=neuron_index,
    all_cells=data_set_events,
    embedding=embedding,
    num_trials=num_trials
)

print(f"Neuron {neuron_index} Trial Embeddings Shape: {neuron_trial_embeddings.shape}")


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 0 is different from 900)