# SLEAP Distance Calculation

Brief 1-2 sentence description of notebook.

In [1]:
import os
import glob

In [2]:
# Imports of all used packages and libraries
import pandas as pd
import h5py

## Inputs & Data

Explanation of each input and where it comes from.

In [3]:
# Inputs and Required data loading
# input varaible names are in all caps snake case
# Whenever an input changes or is used for processing 
# the vairables are all lower in snake case
INPUT_VARIABLE = 1
CHANNEL_MAPPING_DF = pd.read_excel("../../channel_mapping.xlsx")
TONE_TIMESTAMP_DF = pd.read_excel("../../rce_tone_timestamp.xlsx", index_col=0)
VIDEO_TO_FRAME_AND_SUBJECT_DF = pd.read_excel("./video_to_frame_and_subject.xlsx")
SLEAP_DIR = "/scratch/back_up/reward_competition_extention/proc/id_corrected"
OUTPUT_DIR = r"./proc" # where data is saved should always be shown in the inputs


## Outputs

Describe each output that the notebook creates. 

- Is it a plot or is it data?

- How valuable is the output and why is it valuable or useful?

## Processing

Describe what is done to the data here and how inputs are manipulated to generate outputs. 

In [4]:
# As much code and as many cells as required
# includes EDA and playing with data
# GO HAM!

# Ideally functions are defined here first and then data is processed using the functions

# function names are short and in snake case all lowercase
# a function name should be unique but does not have to describe the function
# doc strings describe functions not function names

def calc_bmi(weight, height):
    """
    This is a function that calculates BMI.
    it uses height and weight...etc.
    Meghan plz show us your docsctring format here.
    """
    bmi = weight/(height**2)
    return(bmi)


In [5]:
def get_sleap_tracks_from_h5(filename):
    """
    Retrieve pose tracking data (tracks) from a SLEAP-generated h5 file.
    
    This function is intended for use with Pandas' apply method on columns containing filenames.
    
    Parameters:
    ----------
    filename : str
        Path to the SLEAP h5 file containing pose tracking data.
        
    Returns:
    -------
    np.ndarray
        A transposed version of the 'tracks' dataset in the provided h5 file.
        
    Example:
    --------
    df['tracks'] = df['filename_column'].apply(get_sleap_tracks_from_h5)
    
    """
    with h5py.File(filename, "r") as f:
        return f["tracks"][:].T

In [6]:
def get_sleap_track_names_from_h5(filename):
    """
    Retrieve the names of tracked features from a SLEAP-generated h5 file.
    
    This function is intended for use with Pandas' apply method on columns containing filenames.
    
    Parameters:
    ----------
    filename : str
        Path to the SLEAP h5 file containing pose tracking data.
        
    Returns:
    -------
    h5py.Dataset
        The 'track_names' dataset in the provided h5 file, representing the names of the tracked features.
        
    Example:
    --------
    df['track_names'] = df['filename_column'].apply(get_sleap_track_names_from_h5)
    
    """
    with h5py.File(filename, "r") as f:
        return [item.tobytes().decode('utf-8') for item in f["track_names"][:]]


## Combining the h5 files between recordings

In [8]:
VIDEO_TO_FRAME_AND_SUBJECT_DF = VIDEO_TO_FRAME_AND_SUBJECT_DF.dropna(subset="start_frame")

In [9]:
VIDEO_TO_FRAME_AND_SUBJECT_DF.head()

Unnamed: 0,file_path,start_frame,stop_frame,individual_subj,all_subj
0,/scratch/back_up/reward_competition_extention/...,32792.0,68495.0,1.4,1.1_1.4
1,/scratch/back_up/reward_competition_extention/...,0.0,32316.0,1.1_1.4,1.1_1.4
2,/scratch/back_up/reward_competition_extention/...,32792.0,68495.0,1.1,1.1_1.4
4,/scratch/back_up/reward_competition_extention/...,32860.0,68288.0,1.2,1.1_1.2
5,/scratch/back_up/reward_competition_extention/...,2027.0,32240.0,1.1_1.2,1.1_1.2


In [10]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["start_frame"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["start_frame"].astype(int)
VIDEO_TO_FRAME_AND_SUBJECT_DF["stop_frame"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["stop_frame"].astype(int)

In [11]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["recording_name"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["file_path"].apply(lambda x: os.path.basename(x).split(".")[0])

In [12]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["locations"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["file_path"].apply(lambda x: get_sleap_tracks_from_h5(x))
VIDEO_TO_FRAME_AND_SUBJECT_DF["track_names"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["file_path"].apply(lambda x: get_sleap_track_names_from_h5(x))

In [13]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["individual_subj"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["individual_subj"].astype(str)

In [14]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["track_names"]

0                                                [1.4]
1    [1.1, 1.4, track_867, track_868, track_869, tr...
2                                                [1.1]
4                                                [1.2]
5    [1.1, 1.2, track_16, track_18, track_40, track...
6                                                [1.1]
Name: track_names, dtype: object

In [15]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["subject_to_index"] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda x: {k: x["track_names"].index(k) for k in x["individual_subj"].split("_")}, axis=1)

In [16]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["subject_to_tracks"] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda x: {k: x["locations"][:,:,:,v] for k, v in x["subject_to_index"].items()}, axis=1)

In [18]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["subject_to_tracks"]

0    {'1.4': [[[340.27038574 388.14276123], [324.34...
1    {'1.1': [[[340.27038574 388.14276123], [324.34...
2    {'1.1': [[[nan nan], [nan nan], [nan nan], [na...
4    {'1.2': [[[303.96658325 368.13119507], [299.83...
5    {'1.1': [[[303.96658325 368.13119507], [299.83...
6    {'1.1': [[[nan nan], [nan nan], [nan nan], [na...
Name: subject_to_tracks, dtype: object

In [17]:
raise ValueError()

ValueError: 

In [None]:
all_trials_df = TONE_TIMESTAMP_DF.dropna(subset="condition").reset_index(drop=True)

In [None]:
sorted(all_trials_df["recording_dir"].unique())

In [None]:
all_trials_df["resampled_index"] = all_trials_df["time_stamp_index"] // 20

In [None]:
all_trials_df["recording_dir"] = all_trials_df["recording_dir"].apply(lambda x: x if "2023" in x else "subj" + "_".join(x.split("_")[-5:]))

In [None]:
all_trials_df["all_subjects"] = all_trials_df["recording_dir"].apply(lambda x: sorted([num.strip("_").replace("_",".") for num in x.replace("-", "_").split("subj")[-1].strip("_").split("and")]))

In [None]:
all_trials_df["current_subject"] = all_trials_df["subject_info"].apply(lambda x: ".".join(x.replace("-","_").split("_")[:2]))

In [None]:
all_trials_df

In [None]:
all_trials_df["trial_outcome"] = all_trials_df.apply(
    lambda x: "win" if str(x["condition"]).strip() == str(x["current_subject"]) 
             else ("lose" if str(x["condition"]) in x["all_subjects"] 
                   else x["condition"]), axis=1)

In [None]:
all_trials_df

# Get Coordinates of all the corners

- Reading in all the files with the corner coordinate and the pose tracking"

In [None]:
all_corner_h5 = glob.glob(os.path.join(SLEAP_DIR, "*/*.corner.h5"))

In [None]:
all_pose_estimation_h5 = glob.glob(os.path.join(SLEAP_DIR, "*/*.id_corrected.h5"))

In [None]:
len(all_corner_h5)

In [None]:
len(all_pose_estimation_h5)

- Matching all the video file names to the corresponding corner and pose tracking coordinate file

In [None]:
file_name_to_corner = {item.split("/")[-1].split(".")[0].strip(): item for item in all_corner_h5}

In [None]:
file_name_to_pose_estimation = {item.split("/")[-1].split(".")[0].strip(): item for item in all_pose_estimation_h5}

In [None]:
tone_start_df["corner_file"] = tone_start_df["File Name"].map(file_name_to_corner)
tone_start_df["pose_estimation_file"] = tone_start_df["File Name"].map(file_name_to_pose_estimation)

In [None]:
tone_start_df.head()

- Function that gets the SLEAP data

In [None]:
def get_sleap_coordinates(filename):
    """
    """
    with h5py.File(filename, "r") as f:
        dset_names = list(f.keys())
        locations = f["tracks"][:].T
        node_names = [n.decode() for n in f["node_names"][:]]
        track_names = [n.decode() for n in f["track_names"][:]]
    return locations, node_names, track_names

In [None]:
corner_node_names = get_sleap_coordinates(all_corner_h5[0])[1]

In [None]:
corner_node_names

- Getting the coordinates of each corner and the reward port

In [None]:
for index, node in enumerate(corner_node_names):
    tone_start_df["{}_coordinates".format(node)] = tone_start_df["corner_file"].apply(lambda x: get_sleap_coordinates(x)[0][0,index,:,0])

In [None]:
tone_start_df.head()

In [None]:
corner_columns = [col for col in tone_start_df.columns if "coordinates" in col]

In [None]:
fig, ax = plt.subplots()

for index, row in tone_start_df.iterrows():
    for corner in corner_columns:
        plt.scatter(row[corner][0], row[corner][1], label=corner)
    break
plt.legend()

# Getting the distances between corners

- Getting the average width and height so that we can convert pixels to cm

In [None]:
tone_start_df["bottom_width"] = tone_start_df.apply(lambda row: row["box_bottom_right_coordinates"][0] - row["box_bottom_left_coordinates"][0], axis=1)
tone_start_df["top_width"] = tone_start_df.apply(lambda row: row["box_top_right_coordinates"][0] - row["box_top_left_coordinates"][0], axis=1)

In [None]:
tone_start_df["right_height"] = tone_start_df.apply(lambda row: row["box_bottom_right_coordinates"][1] - row["box_top_right_coordinates"][1], axis=1)
tone_start_df["left_height"] = tone_start_df.apply(lambda row: row["box_bottom_left_coordinates"][1] - row["box_top_left_coordinates"][1], axis=1)

In [None]:
tone_start_df["average_height"] = tone_start_df.apply(lambda row: (row["right_height"] + row["left_height"])/2, axis=1)
tone_start_df["average_width"] = tone_start_df.apply(lambda row: (row["bottom_width"] + row["top_width"])/2, axis=1)

- Getthing the pixel to cm ratio

In [None]:
tone_start_df["width_ratio"] = 29.5 / tone_start_df["average_width"]
tone_start_df["height_ratio"] = 24 / tone_start_df["average_height"]


In [None]:
tone_start_df["reward_port_scaled"] = tone_start_df.apply(lambda row: np.array([row["reward_port_coordinates"][0] * row["width_ratio"], row["reward_port_coordinates"][1] * row["height_ratio"]]), axis=1)

In [None]:
tone_start_df["reward_port_scaled"]

# Getting the coordinates of each mouse

- Function that fills missing coordinates by interpolating

In [None]:
from scipy.interpolate import interp1d

def fill_missing(Y, kind="linear"):
    """Fills missing values independently along each dimension after the first."""

    # Store initial shape.
    initial_shape = Y.shape

    # Flatten after first dim.
    Y = Y.reshape((initial_shape[0], -1))

    # Interpolate along each slice.
    for i in range(Y.shape[-1]):
        y = Y[:, i]

        # Build interpolant.
        x = np.flatnonzero(~np.isnan(y))
        f = interp1d(x, y[x], kind=kind, fill_value=np.nan, bounds_error=False)

        # Fill missing
        xq = np.flatnonzero(np.isnan(y))
        y[xq] = f(xq)
        
        # Fill leading or trailing NaNs with the nearest non-NaN values
        mask = np.isnan(y)
        y[mask] = np.interp(np.flatnonzero(mask), np.flatnonzero(~mask), y[~mask])

        # Save slice
        Y[:, i] = y

    # Restore to initial shape.
    Y = Y.reshape(initial_shape)

    return Y


In [None]:
body_part_node_names = get_sleap_coordinates(all_pose_estimation_h5[0])[1]

In [None]:
body_part_node_names

- Function that scales each coordinate

In [None]:
def scale_coordinates(row, coordinate_col, height_ratio_col="height_ratio", width_ratio_col="width_ratio"):
    """
    """
    return np.dstack((row[coordinate_col][:,:,0] * row[width_ratio_col], row[coordinate_col][:,:,1] * row[height_ratio_col]))


In [None]:
for num in range(2):
    tone_start_df["pose_estimation_subj_{}_original_coordinates".format(num + 1)] = tone_start_df["pose_estimation_file"].apply(lambda x: fill_missing(get_sleap_coordinates(x)[0][:,:,:,num]))
    tone_start_df["pose_estimation_subj_{}_scaled_coordinates".format(num + 1)] = tone_start_df.apply(lambda row: scale_coordinates(row, "pose_estimation_subj_{}_original_coordinates".format(num + 1)), axis=1)     
    tone_start_df["pose_estimation_subj_{}_thorax_coordinates".format(num + 1)] = tone_start_df["pose_estimation_subj_{}_scaled_coordinates".format(num + 1)].apply(lambda x: x[:,4,:])
    tone_start_df["pose_estimation_subj_{}_name".format(num + 1)] = tone_start_df["pose_estimation_file"].apply(lambda x: get_sleap_coordinates(x)[2][num])


In [None]:
tone_start_df.head()

In [None]:
tone_start_df["pose_estimation_subj_1_thorax_coordinates"].iloc[0].shape

# Calculating the distance from thorax to reward port

In [None]:
tone_start_df["thorax_to_reward_port_distance_subj_1"] = tone_start_df.apply(lambda row: np.linalg.norm(row["pose_estimation_subj_1_thorax_coordinates"] - row["reward_port_scaled"], axis=1), axis=1)
tone_start_df["thorax_to_reward_port_distance_subj_2"] = tone_start_df.apply(lambda row: np.linalg.norm(row["pose_estimation_subj_2_thorax_coordinates"] - row["reward_port_scaled"], axis=1), axis=1)



In [None]:
frame_range = 20 * 30

In [None]:
tone_start_df["thorax_to_reward_port_tone_slices_subj_1"] = tone_start_df.apply(lambda row:  np.vstack([row["thorax_to_reward_port_distance_subj_1"][tone_frame-frame_range:tone_frame+frame_range] for tone_frame in row["all_tone_frame"] if tone_frame <= row["thorax_to_reward_port_distance_subj_1"].shape[0]]), axis=1)
tone_start_df["thorax_to_reward_port_tone_slices_subj_2"] = tone_start_df.apply(lambda row:  np.vstack([row["thorax_to_reward_port_distance_subj_2"][tone_frame-frame_range:tone_frame+frame_range] for tone_frame in row["all_tone_frame"] if tone_frame <= row["thorax_to_reward_port_distance_subj_2"].shape[0]]), axis=1)


In [None]:
# Iterate over each row in the array
for i, row in enumerate(tone_start_df["thorax_to_reward_port_tone_slices_subj_1"].iloc[4]):
    # Plot the row data
    plt.plot(row, label=f'Line {i+1}')



# Averaging across trials based on strain

In [None]:
tone_start_df["thorax_to_reward_port_tone_slices_subj_1"].iloc[0].shape

In [None]:
tone_start_df["thorax_to_reward_port_tone_trial_average_subj_1"] = tone_start_df["thorax_to_reward_port_tone_slices_subj_1"].apply(lambda x: np.mean(x, axis=0))
tone_start_df["thorax_to_reward_port_tone_trial_average_subj_2"] = tone_start_df["thorax_to_reward_port_tone_slices_subj_2"].apply(lambda x: np.mean(x, axis=0))

In [None]:
tone_start_df.head()

In [None]:
c57_mean_distance = np.mean(tone_start_df[tone_start_df["Strain"] == "C57"]["thorax_to_reward_port_tone_trial_average_subj_1"], axis=0)
cd1_mean_distance = np.mean(tone_start_df[tone_start_df["Strain"] == "CD1"]["thorax_to_reward_port_tone_trial_average_subj_1"], axis=0)
cd1_sem_distance = np.std(tone_start_df[tone_start_df["Strain"] == "CD1"]["thorax_to_reward_port_tone_trial_average_subj_1"].to_list(), axis=0) / np.sqrt(tone_start_df[tone_start_df["Strain"] == "CD1"]["thorax_to_reward_port_tone_trial_average_subj_1"].iloc[0].shape[0])  
c57_sem_distance = np.std(tone_start_df[tone_start_df["Strain"] == "C57"]["thorax_to_reward_port_tone_trial_average_subj_1"].to_list(), axis=0) / np.sqrt(tone_start_df[tone_start_df["Strain"] == "C57"]["thorax_to_reward_port_tone_trial_average_subj_1"].iloc[0].shape[0])  

In [None]:
tone_start_df[tone_start_df["Strain"] == "CD1"]["thorax_to_reward_port_tone_trial_average_subj_1"].iloc[0].shape

In [None]:
np.sqrt(tone_start_df[tone_start_df["Strain"] == "CD1"]["thorax_to_reward_port_tone_trial_average_subj_1"].shape[0])  

In [None]:
# Add shading for the standard error
plt.plot(cd1_mean_distance, color="#ffaf00", label="CD1")
plt.fill_between(range(len(cd1_mean_distance)), cd1_mean_distance - cd1_sem_distance, cd1_mean_distance + cd1_sem_distance, color="#ffaf00", alpha=0.2)

# Add shading for the standard error
plt.plot(c57_mean_distance, color="#15616f", label="C57")
plt.fill_between(range(len(c57_mean_distance)), c57_mean_distance - c57_sem_distance, c57_mean_distance + c57_sem_distance, color="#15616f", alpha=0.2)
plt.ylabel("Distance of thorax to reward port (cm)")
plt.xlabel("Time from tone onset (seconds)")

xticks = plt.xticks()[0]  # Get current x-axis ticks
plt.xticks(xticks, xticks // 30 - 20)  # Set new x-axis ticks
plt.xlim(0, 1200)
plt.ylim(0, 12)
plt.title("C57 are closer to the reward port")

plt.legend()

# Separating Rows into the two subjects

In [None]:
tone_start_df.columns

- Getting all the columns with subject 1 or subject 2 in the name

In [None]:
subj_1_col = [col for col in tone_start_df.columns if "subj_1" in col or "Strain" in col or "all_subj" in col or "date" in col]
subj_2_col = [col for col in tone_start_df.columns if "subj_2" in col or "Strain" in col or "all_subj" in col or "date" in col]


In [None]:
subj_1_df = tone_start_df[subj_1_col].copy()
subj_2_df = tone_start_df[subj_2_col].copy()

- Standarizing all the columns so it's just "subj"

In [None]:
subj_1_df.columns = [col.replace("subj_1", "subj") for col in subj_1_df.columns]
subj_2_df.columns = [col.replace("subj_2", "subj") for col in subj_2_df.columns]

In [None]:
subj_1_df.head()

- Putting the dataframes for subject 1 and subject 2 together

In [None]:
combined_subj_df = pd.concat([subj_1_df, subj_2_df])

In [None]:
combined_subj_df["agent"] = combined_subj_df.apply(lambda x: list(set(x["all_subj"]) - set([x["subj"]]))[0], axis=1)

In [None]:
combined_subj_df.head()

In [None]:
combined_subj_df.tail()

In [None]:
combined_subj_df.shape

In [None]:
len(combined_subj_df["subj"].unique())

- Calculating the average distance

In [None]:
c57_combined_subj_df = combined_subj_df[combined_subj_df["Strain"] == "C57"]
cd1_combined_subj_df = combined_subj_df[combined_subj_df["Strain"] == "CD1"]

In [None]:
c57_combined_subj_df

In [None]:
c57_mean_distance = np.mean(c57_combined_subj_df["thorax_to_reward_port_tone_trial_average_subj"], axis=0)
cd1_mean_distance = np.mean(cd1_combined_subj_df["thorax_to_reward_port_tone_trial_average_subj"], axis=0)

In [None]:

cd1_std_distance = np.std(cd1_combined_subj_df["thorax_to_reward_port_tone_trial_average_subj"].to_list(), axis=0) 
c57_std_distance = np.std(c57_combined_subj_df["thorax_to_reward_port_tone_trial_average_subj"].to_list(), axis=0)

In [None]:
len(cd1_combined_subj_df["thorax_to_reward_port_tone_trial_average_subj"])

In [None]:
len(c57_combined_subj_df["thorax_to_reward_port_tone_trial_average_subj"])

In [None]:

cd1_sem_distance = cd1_std_distance / np.sqrt(len(cd1_combined_subj_df["thorax_to_reward_port_tone_trial_average_subj"]))  
c57_sem_distance = c57_std_distance / np.sqrt(len(c57_combined_subj_df["thorax_to_reward_port_tone_trial_average_subj"])) 

In [None]:
# Add shading for the standard error
plt.plot(cd1_mean_distance, color="#15616f", label="CD1")
plt.fill_between(range(len(cd1_mean_distance)), cd1_mean_distance - cd1_sem_distance, cd1_mean_distance + cd1_sem_distance, color="#15616f", alpha=0.2)

# Add shading for the standard error
plt.plot(c57_mean_distance, color="#ffaf00", label="C57")
plt.fill_between(range(len(c57_mean_distance)), c57_mean_distance - c57_sem_distance, c57_mean_distance + c57_sem_distance, color="#ffaf00", alpha=0.2)
plt.ylabel("Distance of thorax to reward port (cm)")
plt.xlabel("Time from tone onset (seconds)")

xticks = plt.xticks()[0]  # Get current x-axis ticks
plt.xticks(xticks, xticks // 30 - 20)  # Set new x-axis ticks
plt.xlim(0, 1200)
plt.ylim(0, 12)
plt.title("C57 are closer to the reward port")

plt.legend()

# Adding the win and loss information

# Incorporating Winning and Losing

In [None]:
reward_comp_scoring_df = pd.read_csv("./data/scoring/pilot_3_reward_competition_all_competition_cage_1_2_3_4_5_6_date_20221003_20221004.csv")

In [None]:
reward_comp_scoring_df.columns

- Updating the date so that we can merge using it

In [None]:
reward_comp_scoring_df["date"] = reward_comp_scoring_df["rc_date"].apply(lambda x: str(x).strip(""))

In [None]:
reward_comp_scoring_df["date"].iloc[0]

- Updating the IDs so we can merge using it

In [None]:
reward_comp_scoring_df["all_subj"] = reward_comp_scoring_df["rc_animal_ids"].apply(lambda x: ast.literal_eval(x))

In [None]:
reward_comp_scoring_df["all_subj"].iloc[0]

In [None]:
reward_comp_scoring_df["rc_winner"] = reward_comp_scoring_df["rc_winner"].apply(lambda x: ast.literal_eval(x))

In [None]:
reward_comp_scoring_df["rc_averaged_winner"] = reward_comp_scoring_df["rc_averaged_winner"].astype(str)
reward_comp_scoring_df["rc_averaged_loser"] = reward_comp_scoring_df["rc_averaged_loser"].astype(str)

In [None]:
for num in reward_comp_scoring_df["rc_winner"]:
    print(num)

- Merging the dataframes

In [None]:
combined_subj_df["date"]

In [None]:
reward_comp_scoring_df["date"]

In [None]:
distance_and_scoring_df = combined_subj_df.merge(reward_comp_scoring_df, on=['date', 'all_subj'], how='outer')

In [None]:
distance_and_scoring_df = distance_and_scoring_df.dropna(subset=["rc_winner", "subj"])#.dropna(subset="subj")

- Getting all the winning and losing trials based on matching IDs

In [None]:
distance_and_scoring_df["subj"].unique()

In [None]:
distance_and_scoring_df["per_trial_winning_indexes"] = distance_and_scoring_df.apply(lambda x: [i for i, trial in enumerate(x["rc_winner"]) if trial == x["subj"] and i < x["thorax_to_reward_port_tone_slices_subj"].shape[0]], axis=1)
distance_and_scoring_df["per_trial_losing_indexes"] = distance_and_scoring_df.apply(lambda x: [i for i, trial in enumerate(x["rc_winner"]) if trial == x["agent"] and i < x["thorax_to_reward_port_tone_slices_subj"].shape[0]], axis=1)


In [None]:
distance_and_scoring_df["all_subj"].head()

In [None]:
distance_and_scoring_df["rc_winner"].head()

In [None]:
distance_and_scoring_df["per_trial_winning_indexes"].head()

In [None]:
distance_and_scoring_df["per_trial_losing_indexes"].head()

# TODO: Check the winner vs loser ste calculations

In [None]:
distance_and_scoring_df["thorax_to_reward_port_all_winning_trial_distances"] = distance_and_scoring_df.apply(lambda x: x["thorax_to_reward_port_tone_slices_subj"][x["per_trial_winning_indexes"]], axis=1)
distance_and_scoring_df["thorax_to_reward_port_all_losing_trial_distances"] = distance_and_scoring_df.apply(lambda x: x["thorax_to_reward_port_tone_slices_subj"][x["per_trial_losing_indexes"]], axis=1)


In [None]:
distance_and_scoring_df["thorax_to_reward_port_all_winning_trial_distances"].iloc[0].shape

In [None]:
distance_and_scoring_df["thorax_to_reward_port_all_losing_trial_distances"].iloc[0].shape

In [None]:
distance_and_scoring_df["thorax_to_reward_port_all_winning_trial_distances"].iloc[1].shape

In [None]:
distance_and_scoring_df["thorax_to_reward_port_all_losing_trial_distances"].iloc[1].shape

- Averaging across winning and losing trials

In [None]:
distance_and_scoring_df["thorax_to_reward_port_tone_winning_trial_average"] = distance_and_scoring_df["thorax_to_reward_port_all_winning_trial_distances"].apply(lambda x: np.mean(x, axis=0))
distance_and_scoring_df["thorax_to_reward_port_tone_losing_trial_average"] = distance_and_scoring_df["thorax_to_reward_port_all_losing_trial_distances"].apply(lambda x: np.mean(x, axis=0))

In [None]:
distance_and_scoring_df["thorax_to_reward_port_tone_winning_trial_average"]

- Filtering out rows that have NANs in the average distance array

In [None]:
distance_and_scoring_df[~distance_and_scoring_df["thorax_to_reward_port_tone_winning_trial_average"].apply(lambda x: np.isnan(x).any())]

In [None]:
distance_and_scoring_df[~distance_and_scoring_df["thorax_to_reward_port_tone_losing_trial_average"].apply(lambda x: np.isnan(x).any())]

In [None]:
c57_all_distance_and_scoring_df = distance_and_scoring_df[distance_and_scoring_df["Strain"] == "C57"]
cd1_all_distance_and_scoring_df = distance_and_scoring_df[distance_and_scoring_df["Strain"] == "CD1"]

In [None]:
# Define a function to check if a NumPy array contains any NaN values
def contains_nan(arr):
    return np.isnan(arr).any()

In [None]:
# Use the apply method to apply the function to each element in the 'A' column,
# and then use the resulting Boolean Series to select the rows where 'A' does not contain a NaN
c57_winning_distance_and_scoring_df = c57_all_distance_and_scoring_df[~c57_all_distance_and_scoring_df['thorax_to_reward_port_tone_winning_trial_average'].apply(contains_nan)]
c57_losing_distance_and_scoring_df = c57_all_distance_and_scoring_df[~c57_all_distance_and_scoring_df['thorax_to_reward_port_tone_losing_trial_average'].apply(contains_nan)]
cd1_winning_distance_and_scoring_df = cd1_all_distance_and_scoring_df[~cd1_all_distance_and_scoring_df['thorax_to_reward_port_tone_winning_trial_average'].apply(contains_nan)]
cd1_losing_distance_and_scoring_df = cd1_all_distance_and_scoring_df[~cd1_all_distance_and_scoring_df['thorax_to_reward_port_tone_losing_trial_average'].apply(contains_nan)]


In [None]:
c57_winning_mean_distance = np.mean(c57_winning_distance_and_scoring_df["thorax_to_reward_port_tone_winning_trial_average"], axis=0)
c57_winning_sem_distance = np.std(c57_winning_distance_and_scoring_df["thorax_to_reward_port_tone_winning_trial_average"].to_list(), axis=0) / np.sqrt(c57_winning_distance_and_scoring_df["thorax_to_reward_port_tone_winning_trial_average"].iloc[0].shape[0])  
c57_losing_mean_distance = np.mean(c57_losing_distance_and_scoring_df["thorax_to_reward_port_tone_losing_trial_average"], axis=0)
c57_losing_sem_distance = np.std(c57_losing_distance_and_scoring_df["thorax_to_reward_port_tone_losing_trial_average"].to_list(), axis=0) / np.sqrt(c57_losing_distance_and_scoring_df["thorax_to_reward_port_tone_losing_trial_average"].iloc[0].shape[0])  

cd1_winning_mean_distance = np.mean(cd1_winning_distance_and_scoring_df["thorax_to_reward_port_tone_winning_trial_average"], axis=0)
cd1_winning_sem_distance = np.std(cd1_winning_distance_and_scoring_df["thorax_to_reward_port_tone_winning_trial_average"].to_list(), axis=0) / np.sqrt(cd1_winning_distance_and_scoring_df["thorax_to_reward_port_tone_winning_trial_average"].iloc[0].shape[0])  
cd1_losing_mean_distance = np.mean(cd1_losing_distance_and_scoring_df["thorax_to_reward_port_tone_losing_trial_average"], axis=0)
cd1_losing_sem_distance = np.std(cd1_losing_distance_and_scoring_df["thorax_to_reward_port_tone_losing_trial_average"].to_list(), axis=0) / np.sqrt(cd1_losing_distance_and_scoring_df["thorax_to_reward_port_tone_losing_trial_average"].iloc[0].shape[0])  


Colors are the website colors (Yellow #FFAF00; teal #15616F) but ive added these: light yellow: #FFDB91 (r 255 g 219 b 145) light teal: #C2DBDC  (r 194 g 219 b 220), thoughts on making all titles, axes, and numbers gray instead of black? specifically #666666 (r 96 b 96 g 96)

In [None]:
# Add shading for the standard error
plt.plot(cd1_winning_mean_distance, color="#15616F", label="CD1 Winning Trials")
plt.fill_between(range(len(cd1_winning_mean_distance)), cd1_winning_mean_distance - cd1_winning_sem_distance, cd1_winning_mean_distance + cd1_winning_sem_distance, color="#15616f", alpha=0.2)

# Add shading for the standard error
plt.plot(cd1_losing_mean_distance, color="#C2DBDC", label="CD1 Lost Trials")
plt.fill_between(range(len(cd1_losing_mean_distance)), cd1_losing_mean_distance - cd1_losing_sem_distance, cd1_losing_mean_distance + cd1_losing_sem_distance, color="#C2DBDC", alpha=0.2)

# Add shading for the standard error
plt.plot(c57_losing_mean_distance, color="#FFDB91", label="C57 Lost Trials")
plt.fill_between(range(len(c57_losing_mean_distance)), c57_losing_mean_distance - c57_losing_sem_distance, c57_losing_mean_distance + c57_losing_sem_distance, color="#FFDB91", alpha=0.2)

# Add shading for the standard error
plt.plot(c57_winning_mean_distance, color="#FFAF00", label="C57 Winning Trials")
plt.fill_between(range(len(c57_winning_mean_distance)), c57_winning_mean_distance - c57_winning_sem_distance, c57_winning_mean_distance + c57_winning_sem_distance, color="#ffaf00", alpha=0.2)


plt.ylabel("Distance of thorax to reward port (cm)")
plt.xlabel("Time from tone onset (seconds)")

xticks = plt.xticks()[0]  # Get current x-axis ticks
plt.xticks(xticks, xticks // 30 - 20)  # Set new x-axis ticks
plt.xlim(0, 1200)

plt.title("C57 have closer competitions than CD1")

plt.legend()

# Separating by overall winner and loser

In [None]:
no_tie_distance_and_scoring_df = distance_and_scoring_df[~distance_and_scoring_df["rc_is_win_to_win_and_loss_ratio_tie"]]

In [None]:
no_tie_distance_and_scoring_df["is_winner"] = no_tie_distance_and_scoring_df["subj"] == no_tie_distance_and_scoring_df["rc_averaged_winner"]

In [None]:
c57_winner_distance_and_scoring_df = no_tie_distance_and_scoring_df[(no_tie_distance_and_scoring_df["Strain"] == "C57") & (no_tie_distance_and_scoring_df["is_winner"])]
c57_loser_distance_and_scoring_df = no_tie_distance_and_scoring_df[(no_tie_distance_and_scoring_df["Strain"] == "C57") & ~(no_tie_distance_and_scoring_df["is_winner"])]

cd1_winner_distance_and_scoring_df = no_tie_distance_and_scoring_df[(no_tie_distance_and_scoring_df["Strain"] == "CD1") & (no_tie_distance_and_scoring_df["is_winner"])]
cd1_loser_distance_and_scoring_df = no_tie_distance_and_scoring_df[(no_tie_distance_and_scoring_df["Strain"] == "CD1") & ~(no_tie_distance_and_scoring_df["is_winner"])]

In [None]:
c57_winner_mean_distance = np.mean(c57_winner_distance_and_scoring_df["thorax_to_reward_port_tone_trial_average_subj"], axis=0)
c57_winner_sem_distance = np.std(c57_winner_distance_and_scoring_df["thorax_to_reward_port_tone_trial_average_subj"].to_list(), axis=0) / np.sqrt(c57_winner_distance_and_scoring_df["thorax_to_reward_port_tone_trial_average_subj"].iloc[0].shape[0])  
c57_loser_mean_distance = np.mean(c57_loser_distance_and_scoring_df["thorax_to_reward_port_tone_trial_average_subj"], axis=0)
c57_loser_sem_distance = np.std(c57_loser_distance_and_scoring_df["thorax_to_reward_port_tone_trial_average_subj"].to_list(), axis=0) / np.sqrt(c57_loser_distance_and_scoring_df["thorax_to_reward_port_tone_trial_average_subj"].iloc[0].shape[0])  

cd1_winner_mean_distance = np.mean(cd1_winner_distance_and_scoring_df["thorax_to_reward_port_tone_trial_average_subj"], axis=0)
cd1_winner_sem_distance = np.std(cd1_winner_distance_and_scoring_df["thorax_to_reward_port_tone_trial_average_subj"].to_list(), axis=0) / np.sqrt(cd1_winner_distance_and_scoring_df["thorax_to_reward_port_tone_trial_average_subj"].iloc[0].shape[0])  
cd1_loser_mean_distance = np.mean(cd1_loser_distance_and_scoring_df["thorax_to_reward_port_tone_trial_average_subj"], axis=0)
cd1_loser_sem_distance = np.std(cd1_loser_distance_and_scoring_df["thorax_to_reward_port_tone_trial_average_subj"].to_list(), axis=0) / np.sqrt(cd1_loser_distance_and_scoring_df["thorax_to_reward_port_tone_trial_average_subj"].iloc[0].shape[0])  


In [None]:
import seaborn as sns

In [None]:
# Add shading for the standard error
plt.plot(cd1_winner_mean_distance, color="#15616F", label="CD1 Overall Winner")
plt.fill_between(range(len(cd1_winner_mean_distance)), cd1_winner_mean_distance - cd1_winner_sem_distance, cd1_winner_mean_distance + cd1_winner_sem_distance, color="#15616f", alpha=0.2)

# Add shading for the standard error
plt.plot(cd1_loser_mean_distance, color="#C2DBDC", label="CD1 Overall Loser")
plt.fill_between(range(len(cd1_loser_mean_distance)), cd1_loser_mean_distance - cd1_loser_sem_distance, cd1_loser_mean_distance + cd1_loser_sem_distance, color="#C2DBDC", alpha=0.2)

# Add shading for the standard error
plt.plot(c57_loser_mean_distance, color="#FFDB91", label="C57 Overall Loser")
plt.fill_between(range(len(c57_loser_mean_distance)), c57_loser_mean_distance - c57_loser_sem_distance, c57_loser_mean_distance + c57_loser_sem_distance, color="#FFDB91", alpha=0.2)

# Add shading for the standard error
plt.plot(c57_winner_mean_distance, color="#FFAF00", label="C57 Overall Winner")
plt.fill_between(range(len(c57_winner_mean_distance)), c57_winner_mean_distance - c57_winner_sem_distance, c57_winner_mean_distance + c57_winner_sem_distance, color="#ffaf00", alpha=0.2)


plt.ylabel("Distance of thorax to reward port (cm)")
plt.xlabel("Time from tone onset (seconds)")

xticks = plt.xticks()[0]  # Get current x-axis ticks
plt.xticks(xticks, xticks // 30 - 20)  # Set new x-axis ticks
plt.xlim(0, 1200)

plt.title("C57 have closer competitions than CD1")

plt.legend()

In [None]:
sns.histplot(distance_and_scoring_df[distance_and_scoring_df["strain"] == "C57"]["rc_tie_count"], alpha=0.5, color="#FFAF00", binwidth=1, label="C57")
sns.histplot(distance_and_scoring_df[distance_and_scoring_df["strain"] == "CD1"]["rc_tie_count"], alpha=0.5, color="#15616F", binwidth=1, label="CD1")
plt.title("C57 have more ties")
plt.xlabel("Number of ties")
plt.legend()

In [None]:
distance_and_scoring_df[distance_and_scoring_df["strain"] == "C57"]["rc_tie_count"]

In [None]:
distance_and_scoring_df[distance_and_scoring_df["strain"] == "CD1"]["rc_tie_count"]

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=a4490980-3f6a-4f44-80eb-ebd789a5b21f' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>