# View GCaMP mismatch closed and open loop seesion 1 and 2

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from scipy.stats import pearsonr, spearmanr
from analysis_functions import *
from model_functions import *
import matplotlib.patches as patches
import seaborn as sns

In [None]:
RunTresholdDict = {'B3M1': 145, 'B3M2': 295, 'B3M3': 325, 'B2M4': 110, 'B2M5': 180}

In [None]:
GCaMP_MM_1 = pd.read_csv('GCaMP_MMclosed_open_session1.csv', dtype=dtype_dict)
GCaMP_MM_2 = pd.read_csv('GCaMP_MMclosed_open_session2.csv', dtype=dtype_dict)

In [None]:
# Chaning event name to halt and making it so that its True when there is a halt
#GCaMP_MM_1.loc[:, 'event'] = GCaMP_MM_1['event'].replace({False: True, True: False})
GCaMP_MM_1.rename(columns = {'event': 'halt'}, inplace = True)
GCaMP_MM_2.rename(columns = {'event': 'halt'}, inplace = True)


In [None]:
#Make sure the index provides inforamtion of seconds since start
GCaMP_MM_1.set_index('Seconds', inplace=True)
GCaMP_MM_2.set_index('Seconds', inplace=True)


In [None]:
#Check that the same mice are in the datsets
print(GCaMP_MM_1.mouseID.unique())
print(GCaMP_MM_2.mouseID.unique())


In [None]:
## View session data
- Loop through the mouse names present in one of the dataframes (ideally they should be the same)
- save a variable where each session is saved for the current mouse
- use the view_session_mouse() funciton from analysis_functions.py to plot the Delta F/F 470 fluorescence and movement in X direction with halts in grey, and session blocks marked in colors.
- Edit the function to plot different fluorescence traces, movements, and eyes.
- This is mostly to get an impression of the overall data trends.

In [None]:
for mouse in GCaMP_MM_1.mouseID.unique():
    mousedata = {'session 1':GCaMP_MM_1.loc[GCaMP_MM_1.mouseID == mouse], 'session 2':GCaMP_MM_2.loc[GCaMP_MM_2.mouseID == mouse]}
    fig, ax =view_session_mouse(mousedata, mouse)
    fig.savefig(f'Figures/GCaMP_{mouse}_view_alignment.png', format = 'png', dpi = 300)

In [None]:
#Filter mice to get unique indexes
mouse_data = {'session 1':{'closedloop': {},'openloop': {} }, 'session 2': {'closedloop': {},'openloop': {}}}

for mouse in GCaMP_MM_1.mouseID.unique():
    mouse_data['session 1']['closedloop'][mouse] = filter_data(GCaMP_MM_1, filters = [mouse, 'closed_block'])
    mouse_data['session 1']['openloop'][mouse] = filter_data(GCaMP_MM_1, filters = [mouse, 'open_block'])
for mouse in GCaMP_MM_2.mouseID.unique():
    mouse_data['session 2']['closedloop'][mouse] = filter_data(GCaMP_MM_2, filters = [mouse, 'closed_block'])
    mouse_data['session 2']['openloop'][mouse] = filter_data(GCaMP_MM_2, filters = [mouse, 'open_block'])

def align_to_event_start(df, trace, event_col, range_around_event):
    
    trace_chunk_list = []
    bsl_trace_chunk_list = []
    run_speed_list = []
    turn_speed_list = []
    event_index_list = []
    
    # Identify the start times for each event
    event_times = df.loc[df[event_col] & ~df[event_col].shift(1, fill_value=False)].index

    # Calculate the time range around each event
    before_0 = range_around_event[0]
    after_0 = range_around_event[1]
    
    # Calculate the target length of each chunk based on the sampling rate
    sampling_rate = 0.001
    target_length = int(((before_0 + after_0) / sampling_rate) + 1)  # Include both ends
    Index= pd.Series(np.linspace(-range_around_event[0], range_around_event[1], target_length)) # common index
    
    for event_time in event_times:
        
        # Determine the time range for each chunk
        start = event_time - before_0
        end = event_time + after_0
        
        # Extract the chunk from the trace column
        chunk = df[trace].loc[start:end]
        runspeed = df['movementX'].loc[start:event_time].mean() #Saving mean run speed up until halt
        turningspeed = df['movementY'].loc[start:event_time].mean() 
        # Normalize the index to start at -before_0
        chunk.index = (chunk.index - chunk.index[0]) - before_0
        # Check if the chunk is shorter than the target length
        if len(chunk) < target_length:
            # Pad the chunk with NaN values at the end to reach the target length
            padding = pd.Series([np.nan] * (target_length - len(chunk)), index=pd.RangeIndex(len(chunk), target_length))
            chunk = pd.concat([chunk, padding])
            chunk.index = Index # Getting the same index as the others
        
        # Baseline the chunk
        baselined_chunk = baseline(chunk)
        
        # Append the chunk and baselined chunk to lists
        trace_chunk_list.append(chunk.values)
        bsl_trace_chunk_list.append(baselined_chunk.values)
        run_speed_list.append(runspeed)
        turn_speed_list.append(turningspeed)
        event_index_list.append(event_time)  # Store the event time for use in final column names
    # Convert lists of arrays to DataFrames
    try:
        trace_chunks = pd.DataFrame(np.column_stack(trace_chunk_list), columns=event_index_list)
        bsl_trace_chunks = pd.DataFrame(np.column_stack(bsl_trace_chunk_list), columns=event_index_list)
        run_speeds = pd.DataFrame(np.column_stack(run_speed_list), columns=event_index_list)
        turn_speeds = pd.DataFrame(np.column_stack(turn_speed_list), columns=event_index_list)
        movement_speeds = pd.concat([run_speeds, turn_speeds])
        
        # Set the index as the common time range index for each chunk
        trace_chunks.index = Index
        bsl_trace_chunks.index = Index
        movement_speeds.index = ['Mean_moveX', 'Mean_moveY'] #set X and Y movement as movement speed index
        
        return trace_chunks, bsl_trace_chunks, movement_speeds
    
    except ValueError:
        if len(event_times) < 1:
            print('could not align to events because there were none, will return nothing')
            
        return 0, 0, 0


In [None]:
#Aligning open and closed loop block data to halt start times
mouse_aligned = {'session 1':{'closedloop': {},'openloop': {} }, 'session 2': {'closedloop': {},'openloop': {}}}
move_speeds = {'session 1':{'closedloop': {},'openloop': {} }, 'session 2': {'closedloop': {},'openloop': {}}}

#Using the aling_to_event_start function, make sure that the sampling rate = 0.001, otherwise, change the sampling_rate variable in the function
for session, session_dict in mouse_data.items():
    for block, mice in session_dict.items():
        for mouse, df in mice.items():
            event_alinged, bsl_event_alinged, run_speeds = align_to_event_start(df, '470_dfF', 'halt',[1,2])
            mouse_aligned[session][block][mouse]  = bsl_event_alinged  #bsl indicates that it is baselined to the last 1 second before halt
            move_speeds[session][block][mouse]  = run_speeds
    

In [None]:
move_speeds['session 1']['closedloop']['B3M1']

In [None]:

mouse_aligned['session 1']['closedloop']['B3M1']

In [None]:
len(mouse_aligned['session 1']['closedloop'].keys())

In [None]:
mouse_aligned_nohalt = {'session 1':{'closedloop': {},'openloop': {} }, 'session 2': {'closedloop': {},'openloop': {}}}
move_speeds_nohalt = {'session 1':{'closedloop': {},'openloop': {} }, 'session 2': {'closedloop': {},'openloop': {}}}
for session, session_dict in mouse_data.items():
    for block, mice in session_dict.items():
        for mouse, df in mice.items():
            event_alinged, bsl_event_alinged, run_speeds = align_to_event_start(df, '470_dfF', 'No_halt',[1,2])
            mouse_aligned_nohalt[session][block][mouse] = bsl_event_alinged  #bsl indicates that it is baselined to the last 1 second before halt
            move_speeds_nohalt[session][block][mouse]  = run_speeds

In [None]:
print('\033[1m Session 1 \033[0m')
mean_mouse_dict_s1 =plot_compare_blocks(mouse_aligned['session 1'], 'halt')


In [None]:
print('\033[1m Session 2 \033[0m')
mean_mouse_dict_s2 =plot_compare_blocks(mouse_aligned['session 2'], 'halt')

In [None]:
mean_mouse_dict_s1_nohalt =plot_compare_blocks(mouse_aligned_nohalt['session 1'], 'No halt')

In [None]:
mouse_aligned['session 1']['openloop']['B2M5']

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def plot_mouse_sessions(
    mouse_aligned,
    mouse_aligned_nohalt,
    block_names,
    title="Mouse Sessions",
    stimulus_duration=1,
):
    """
    Plot session-specific traces for each mouse with control data.

    Parameters:
    - mouse_aligned (dict): Dictionary containing session data.
    - mouse_aligned_nohalt (dict): Dictionary containing control data.
    - block_names (list): List of block names (e.g., ['openloop', 'closedloop']) to plot.
    - title (str): Title prefix for each mouse figure.
    - stimulus_duration (float): Duration of the stimulus in seconds (default: 1 second).
    """
    mice = set()
    sessions = mouse_aligned.keys()

    # Collect all mice
    for session in mouse_aligned.values():
        for block in block_names:
            mice.update(session[block].keys())

    for mouse in mice:
        # Create a figure for each mouse
        fig, axes = plt.subplots(1, len(block_names), figsize=(5 * len(block_names), 4))
        axes = np.atleast_1d(axes)  # Ensure axes is iterable
        fig.suptitle(f"{title}: {mouse}")

        for i, block in enumerate(block_names):
            ax = axes[i]
            ax.spines[['right', 'top']].set_visible(False)

            # Plot control data (averaged across sessions)
            control_data = []
            for session in mouse_aligned_nohalt.values():
                if block in session and mouse in session[block]:
                    control_data.append(session[block][mouse])

            if control_data:
                # Ensure all arrays have the same length and use their time index
                valid_control_data = [
                    arr for arr in control_data if arr.shape[0] > 0
                ]
                if valid_control_data:
                    time_index = valid_control_data[0].index
                    control_data_concat = np.concatenate(valid_control_data, axis=1)
                    control_mean = control_data_concat.mean(axis=1)
                    control_std = control_data_concat.std(axis=1)
                    ax.plot(
                        time_index,
                        control_mean,
                        label="Control",
                        color="black",
                    )
                    ax.fill_between(
                        time_index,
                        control_mean - control_std,
                        control_mean + control_std,
                        color="black",
                        alpha=0.1,
                    )

            # Plot main traces for each session
            for session_name, session in mouse_aligned.items():
                if block in session and mouse in session[block]:
                    main_data = session[block][mouse]
                    time_index = main_data.index
                    main_mean = main_data.mean(axis=1)
                    main_std = main_data.std(axis=1)
                    ax.plot(
                        time_index,
                        main_mean,
                        label=f"{session_name}",
                        alpha=0.8,
                    )
                    ax.fill_between(
                        time_index,
                        main_mean - main_std,
                        main_mean + main_std,
                        alpha=0.3,
                    )

            # Shade the stimulus area
            ax.axvline(0, color="grey", linestyle="--")
            ax.axvspan(
                0,
                stimulus_duration,
                color="grey",
                alpha=0.1,
                label="Stimulus",
            )

            ax.set_title(f"{block} Block")
            ax.legend()

        plt.tight_layout()
        plt.show()
        plt.close(fig)  # Clear the figure after displaying it


In [None]:
plot_mouse_sessions(
    mouse_aligned,
    mouse_aligned_nohalt,
    block_names=['openloop', 'closedloop'],
    title="Mouse Data Comparison",
    stimulus_duration=1,
)


In [None]:
def plot_mean_across_blocks(session_blocks, control_blocks, title="Mean Across Blocks", stimulus_duration = 1):
    """
    Plot a single figure with the mean across mouse means for each block.
    
    Parameters:
    - session_blocks (dict): Dictionary of session data, where keys are block names
                             and values are dictionaries of mouse data.
    - control_blocks (dict): Dictionary of control data with the same structure.
    - title (str): Title of the plot.
    """
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.spines[['right', 'top']].set_visible(False)

    for block_name, mouse_data in session_blocks.items():
        if not mouse_data:
            print(f"No data for block: {block_name}")
            continue

        # Compute mean and std across all mice
        block_means = [data.mean(axis=1) for data in mouse_data.values()]
        mean_across_mice = pd.concat(block_means, axis=1).mean(axis=1)
        std_across_mice = pd.concat(block_means, axis=1).std(axis=1)

        # Plot block trace
        ax.plot(mean_across_mice, label=f'{block_name} Mean')
        ax.fill_between(
            mean_across_mice.index,
            mean_across_mice - std_across_mice,
            mean_across_mice + std_across_mice,
            alpha=0.2,
        )

    # Plot control trace
    if control_blocks:
        control_means = [
            data.mean(axis=1) for block_data in control_blocks.values()
            for data in block_data.values()
        ]
        control_mean = pd.concat(control_means, axis=1).mean(axis=1)
        control_std = pd.concat(control_means, axis=1).std(axis=1)

        ax.plot(control_mean, label="Control Mean", color='black', linestyle='--')
        ax.fill_between(
            control_mean.index,
            control_mean - control_std,
            control_mean + control_std,
            color='grey',
            alpha=0.3,
        )

    ax.axvline(0, color='grey', linestyle='--')
    ax.axvspan(0, stimulus_duration, color='grey', alpha=0.1)
    ax.set_title(title)
    ax.legend()
    plt.tight_layout()
    plt.show()


In [None]:
plot_mean_across_blocks(session_blocks, control_blocks, title="Mean Across All Blocks")


In [None]:
sessions = mouse_aligned.keys()


In [None]:
def extract_aligned_vars(aligned_data_dict, aligned_movement_dict):
    # Initialize an empty list to store results
    results = []
    
    for session_number, session_blocks in aligned_data_dict.items():
        for session_block, mice_data in session_blocks.items():
            for mouse_id, item in mice_data.items():
                run_df = aligned_movement_dict[session_number][session_block][mouse_id]
                # Check if the item is a DataFrame
                if not isinstance(item, pd.DataFrame):
                    print(f"Warning: The data for Mouse ID '{mouse_id}' in session '{session_number}' and block '{session_block}' is not a DataFrame. Skipping.")
                    continue

                # Copy the DataFrame and ensure the index is numeric
                df = item.copy()
                df.index = pd.to_numeric(df.index)

                # Process each column independently
                for column in df.columns:
                    event_time_data = df.loc[0:1, column]  # Data during the event (0 to +1 seconds)
                    post_event_data = df.loc[1:2, column]  # Data during the first second after the event (+1 to +2 seconds)

                    peak_response = event_time_data.max()  # Max response during the event
                    min_response = event_time_data.min()  # Minimum response during the event
                    mean_response_event = event_time_data.mean()  # Mean response during the event
                    mean_response_post_event = post_event_data.mean()  # Mean response during the post-event time
                    min_response_post_event = post_event_data.min()  #Minimum response during the post-event time
                    peak_response_post_event = post_event_data.max() #Maximum response during the post-event time

                    #Given Mean_moveX and Y being the row names in the movement df, the 1 second pre halt movement speeds are added
                    x_move = run_df.loc['Mean_moveX', column]
                    y_move = run_df.loc['Mean_moveY', column]
    
                    #add results to list of dicts
                    results.append({
                        "SessionNumber": session_number,
                        "SessionBlock": session_block,
                        "MouseID": mouse_id,
                        "EventTime": column,
                        "moveX": x_move,
                        "moveY": y_move,
                        "PeakResponse": peak_response,
                        "MinResponse":  min_response,
                        "MeanResponse": mean_response_event,
                        "MeanResponse_after": mean_response_post_event,
                        "MinResponse_after": min_response_post_event,
                        "PeakResponse_after": peak_response_post_event
                    })

    # convert to a pandas df
    output_df = pd.DataFrame(results)
    return output_df



In [None]:
extracted_variables = extract_aligned_vars(mouse_aligned, move_speeds)


In [None]:
extracted_variables_nohalt = extract_aligned_vars(mouse_aligned_nohalt, move_speeds_nohalt)


In [None]:
extracted_variables_nohalt['event']='no halt'
extracted_variables['event']='halt'
combined_vars = pd.concat([extracted_variables_nohalt, extracted_variables])

combined_vars.to_csv('GCaAMP_MM_extracted_vars.csv', index=False)

In [None]:
combined_vars

In [None]:
##Consider adding to the fitted model a continous variable which is the time column
#EventTime should currently be seconds from session start

# Look for correlation between running and fluorescence changes

In [None]:


# Compute Pearson correlation
pearson_corr, pearson_pval = pearsonr(GCaMP_MM_1['movementX'], GCaMP_MM_1['470_dfF'])

# Compute Spearman correlation (handles nonlinear relationships better)
spearman_corr, spearman_pval = spearmanr(GCaMP_MM_1['movementX'], GCaMP_MM_1['470_dfF'])

print(f"Pearson correlation: {pearson_corr}, p-value: {pearson_pval}")
print(f"Spearman correlation: {spearman_corr}, p-value: {spearman_pval}")


In [None]:
for mouse in GCaMP_MM_1.mouseID.unique():
    subset = filter_data(GCaMP_MM_1, [mouse, 'day1'])
    # Compute Pearson correlation
    pearson_corr, pearson_pval = pearsonr(subset['movementX'], subset['470_dfF'])
    
    # Compute Spearman correlation (handles nonlinear relationships better)
    spearman_corr, spearman_pval = spearmanr(subset['movementX'], subset['470_dfF'])
    print(f'\n \033[1m {mouse} \033[0m \n')
    print(f"Pearson correlation: {pearson_corr}, p-value: {pearson_pval}")
    print(f"Spearman correlation: {spearman_corr}, p-value: {spearman_pval}")
    
    resampled = subset.sample(10000, random_state=9)  # Sample 10,000 rows
    pearson_corr, pearson_pval = pearsonr(resampled['movementX'], resampled['470_dfF'])
    spearman_corr, spearman_pval = spearmanr(resampled['movementX'], resampled['470_dfF'])
    print('\n Random 1000 samples: \n')
    print(f"Subset Pearson correlation: {pearson_corr}, p-value: {pearson_pval}")
    print(f"Subset Spearman correlation: {spearman_corr}, p-value: {spearman_pval}")

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import linregress
import matplotlib.pyplot as plt

def correlation_remove(data, fluorescence="470_dfF", movement=["movementX", "movementY"], chunk_size=100000):
    """
    Remove the effect of movement from the fluorescence signal using linear regression.

    Parameters:
    - data (DataFrame): The input data containing fluorescence and movement data.
    - fluorescence (str): Column name of the fluorescence signal.
    - movement (list): List of column names representing movement data.
    - chunk_size (int): Number of rows to process at a time to handle memory constraints.

    Returns:
    - detrended_df (DataFrame): A DataFrame with the detrended fluorescence signal.
    """
    detrended_data = []

    # Combine multiple movement columns into a single column
    if len(movement) > 1:
        data['movement_combined'] = data[movement].sum(axis=1)
        movement_col = 'movement_combined'
    else:
        movement_col = movement[0]

    # Process data in chunks
    for start in range(0, len(data), chunk_size):
        end = start + chunk_size
        chunk = data.iloc[start:end].copy()  # Ensure we work on a copy

        # Skip chunks with identical or missing movement values
        if chunk[movement_col].nunique() <= 1:  # All values are identical or missing
            print(f"Skipping chunk {start}-{end} due to insufficient variability in {movement_col}")
            continue

        # Perform linear regression
        slope, intercept, _, _, _ = linregress(chunk[movement_col], chunk[fluorescence])

        # Remove the effect of movement
        chunk[f"{fluorescence}_detrended"] = chunk[fluorescence] - (slope * chunk[movement_col] + intercept)

        # Append the detrended data
        detrended_data.append(chunk[[f"{fluorescence}_detrended"]])

    # Combine all processed chunks
    if detrended_data:
        detrended_df = pd.concat(detrended_data, axis=0)
    else:
        raise ValueError("No valid data found for detrending.")

    # Plot comparison
    fig, ax = plt.subplots(3, figsize=(15, 6))
    ax[0].plot(data.index, data[fluorescence], label="Original Fluorescence", alpha=0.5)
    if not detrended_df.empty:
        ax[0].plot(detrended_df.index, detrended_df[f"{fluorescence}_detrended"], label="Detrended Fluorescence", alpha=0.8)
    ax[1].plot(data.index, data[movement[0]])
    ax[2].plot(data.index, data[movement[1]])
    ax[0].set_title("Fluorescence Signal Before and After Movement Correction")
    ax[0].set_xlabel("Time (s)")
    ax[0].set_ylabel("Fluorescence Signal")
    ax[0].legend()
    plt.show()

    return detrended_df


In [None]:
subset = filter_data(GCaMP_MM_1, ['B3M3', 'day1'])

detrended_data = correlation_remove(subset, fluorescence="470_dfF", movement=["movementX", "movementY"], chunk_size=300000)


In [None]:
p

In [None]:
subset