# SLEAP Distance Calculation

Brief 1-2 sentence description of notebook.

In [None]:
#calling it a second time may prevent some graphics errors
import matplotlib.pyplot as plt

In [None]:
import os
import glob
import git
import sys


In [None]:
# Imports of all used packages and libraries
import numpy as np
import pandas as pd
# import seaborn as sns
import matplotlib as mpl
# import matplotlib.pyplot as plt
import h5py
from scipy.interpolate import interp1d
from scipy.signal import savgol_filter


In [None]:
import umap
from sklearn.preprocessing import StandardScaler
import sklearn.cluster as cluster


In [None]:
import cv2 
import os 

In [None]:
git_repo = git.Repo(".", search_parent_directories=True)
git_root = git_repo.git.rev_parse("--show-toplevel")

In [None]:
git_root

In [None]:
sys.path.insert(0, os.path.join(git_root, 'src'))

In [None]:
import utilities.helper
import sleap.process_pose

In [None]:
from pathlib import Path
import imageio
import hdbscan

In [None]:
# sns.set('notebook', 'ticks', font_scale=1.2)
mpl.rcParams['figure.figsize'] = [15,6]

# Functions

In [None]:
def calculate_angles_from_arrays(A, B, C, D):
    """
    Calculate the angles between vectors AB and CD for arrays of 2D points.

    Parameters:
    - A, B, C, D: Each is a 2D numpy array where each row represents a point in 2D space.
                  A and B represent points defining the first vector, AB, and C and D represent points defining the second vector, CD.

    Returns:
    - A numpy array of angles in degrees between the vectors AB and CD for each set of points.
    """
    # Ensure input arrays are numpy arrays
    A, B, C, D = map(np.array, [A, B, C, D])

    # Calculate vectors AB and CD
    AB = B - A
    CD = D - C

    # Calculate dot products and magnitudes for each pair of vectors
    dot_products = np.einsum('ij,ij->i', AB, CD)
    norms_AB = np.linalg.norm(AB, axis=1)
    norms_CD = np.linalg.norm(CD, axis=1)

    # Calculate cosine of the angle using the dot product and magnitudes
    cos_angles = dot_products / (norms_AB * norms_CD)
    
    # Clip values to prevent domain errors due to numerical issues
    cos_angles = np.clip(cos_angles, -1.0, 1.0)

    # Calculate angles in radians and then convert to degrees
    angles_radians = np.arccos(cos_angles)
    angles_degrees = np.degrees(angles_radians)

    return angles_degrees


In [None]:
def extract_frames_and_make_gif(video_path, frame_numbers, output_folder, gif_name="output.gif", fps=10, max_width=640):
    """
    Extracts frames from a video at specific frame numbers, resizes them, and creates a GIF from those frames.

    Parameters:
        video_path (str): Path to the video file.
        frame_numbers (list): List of frame numbers to extract.
        output_folder (str): Directory to save the frames and GIF.
        gif_name (str): Filename for the GIF.
        fps (int): Frames per second for the GIF.
        max_width (int): Maximum width of the frames in the GIF. Height is adjusted proportionally.
    """
    # Create output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    # Initialize video capture
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error: Could not open video.")
        return

    # Prepare to extract frames
    frames = []
    frame_ids = set(frame_numbers)  # Convert list to set for faster lookup
    current_frame = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        if current_frame in frame_ids:
            # Resize frame to reduce GIF size
            height, width = frame.shape[:2]
            scaling_factor = max_width / float(width)
            if width > max_width:  # Only resize if the image is wider than the max width
                new_dim = (max_width, int(height * scaling_factor))
                frame = cv2.resize(frame, new_dim, interpolation=cv2.INTER_AREA)
            
            frame_path = os.path.join(output_folder, f"frame_{current_frame}.png")
            cv2.imwrite(frame_path, frame)
            frames.append(frame_path)
            print(f"Extracted frame {current_frame}")
        
        current_frame += 1
    
    # Close video file
    cap.release()

    # Create GIF
    if frames:
        with imageio.get_writer(os.path.join(output_folder, gif_name), mode='I', fps=fps) as writer:
            for filename in frames:
                image = imageio.imread(filename)
                writer.append_data(image)
        print(f"GIF created at {os.path.join(output_folder, gif_name)}")
    else:
        print("No frames extracted, GIF not created.")

In [None]:
def compute_velocity(node_loc, window_size=25, polynomial_order=3):
    """
    Calculate the velocity of tracked nodes from pose data.
    
    The function utilizes the Savitzky-Golay filter to smooth the data and compute the velocity.
    
    Parameters:
    ----------
    node_loc : numpy.ndarray
        The location of nodes, represented as an array of shape [frames, 2]. 
        Each row represents x and y coordinates for a particular frame.
        
    window_size : int, optional
        The size of the window used for the Savitzky-Golay filter. 
        Represents the number of consecutive data points used when smoothing the data.
        Default is 25.
        
    polynomial_order : int, optional
        The order of the polynomial fit to the data within the Savitzky-Golay filter window.
        Default is 3.

    Returns:
    -------
    numpy.ndarray
        The velocity for each frame, calculated from the smoothed x and y coordinates.
    
    """
    node_loc_vel = np.zeros_like(node_loc)
    
    # For each coordinate (x and y), smooth the data and calculate the derivative (velocity)
    for c in range(node_loc.shape[-1]):
        node_loc_vel[:, c] = savgol_filter(node_loc[:, c], window_size, polynomial_order, deriv=1)
    
    # Calculate the magnitude of the velocity vectors for each frame
    node_vel = np.linalg.norm(node_loc_vel, axis=1)

    return node_vel

In [None]:
def rolling_average(arr, window_size):
    """
    Computes the rolling average using a specified window size.
    
    Parameters:
        arr (numpy.array): The input array to compute the rolling average for.
        window_size (int): The size of the rolling window.

    Returns:
        numpy.array: The rolling average of the input array.
    """
    if window_size < 1:
       raise ValueError("Window size must be at least 1.")
    
    # Create a uniform window of given window size
    window = np.ones(window_size) / window_size

    # Use numpy's convolve function to compute the rolling average
    return np.convolve(arr, window, mode='valid')



In [None]:
def chunked_average(arr, chunk_size):
    """
    Computes the average for non-overlapping chunks of the input array.
    
    Parameters:
        arr (numpy.array): The input array.
        chunk_size (int): The size of each chunk.

    Returns:
        numpy.array: The averages of the non-overlapping chunks.
    """

    # Number of chunks
    num_chunks = len(arr) // chunk_size
    
    # Reshape the array into a 2D array of shape (num_chunks, chunk_size)
    reshaped_arr = arr[:num_chunks * chunk_size].reshape(num_chunks, chunk_size)
    
    # Compute the mean along the second axis (i.e., for each chunk)
    return reshaped_arr.mean(axis=1)

In [None]:
def sliding_window_average(arr, window_size, step=1):
    """
    Apply a sliding window to a 1D numpy array, returning the average of windows of a specified size.

    :param arr: Input 1D numpy array.
    :param window_size: Size of the window.
    :param step: The step size or number of elements to slide the window by. Default is 1.
    :return: A 1D numpy array where each element is the average of a window from the input.
    """
    # Number of windows
    num_windows = ((arr.size - window_size) // step) + 1
    
    # Output array for averages
    averages = np.zeros(num_windows)
    
    for i in range(num_windows):
        # Calculate the start and end index for the window
        start = i * step
        end = start + window_size
        # Calculate the average of the window
        averages[i] = np.mean(arr[start:end])

    return averages

In [None]:
def calculate_all_window_indices(original_index, window_size, step, array_length):
    """
    Calculate all the start and stop indices for sliding windows based on an original start index.

    :param original_index: The original index from which the first window should start.
    :param window_size: The size of each sliding window.
    :param step: The step size or number of elements to slide the window by.
    :param array_length: The total number of elements in the array.
    :return: A list of tuples, each containing the start and stop indices for a sliding window.
    """

    # Initialize the list to hold the start and stop indices for all windows
    windows = []

    # Initialize the current start index with the original index
    current_start_index = original_index

    # Loop through the array until the end is reached
    while current_start_index + window_size <= original_index + array_length:
        # Calculate the stop index based on the window size
        stop_index = current_start_index + window_size

        # Add the start and stop indices to the list
        windows.append((current_start_index, stop_index))

        # Update the current start index by adding the step size
        current_start_index += step

    return windows

In [None]:
def calculate_angle(ax, ay, bx, by, cx, cy):
    """
    Calculate the smallest angle between the vectors BA and BC with all points given in Cartesian coordinates.

    Parameters:
        ax, ay (float): Coordinates of point A.
        bx, by (float): Coordinates of point B, the vertex of the angle.
        cx, cy (float): Coordinates of point C.

    Returns:
        float: The smallest angle between vectors BA and BC, in radians, within the range [0, pi].
    """
    # Calculate the angles of vectors BA and BC relative to the positive x-axis
    ang_ba = np.arctan2(ay - by, ax - bx)
    ang_bc = np.arctan2(cy - by, cx - bx)

    # Compute the difference of angles
    ang = ang_bc - ang_ba

    # Normalize the angle to the range [0, 2*pi)
    ang = (ang + 2 * np.pi) % (2 * np.pi)

    # Ensure the angle is within [0, pi]
    if ang > np.pi:
        ang = 2 * np.pi - ang

    return ang

# Example usage:
ax, ay = 0, 1  # Coordinates for point A
bx, by = 0, 0  # Coordinates for point B (origin)
cx, cy = -0.5, 0.5  # Coordinates for point C

angle = calculate_angle(ax, ay, bx, by, cx, cy)
print("Angle in radians:", angle)
print("Angle in degrees:", np.degrees(angle))

In [None]:
def calculate_time_series_angles(A, B, C):
    """
    Calculate the smallest angle between vectors BA and BC for arrays of 2D points over time.

    Parameters:
        A, B, C (np.array): Each is a 2D numpy array of shape (T, 2) where T is the number of time steps.
                            Each array holds the x and y coordinates of points A, B, and C over time.

    Returns:
        np.array: Array of smallest angles between vectors BA and BC, in radians, within the range [0, pi].
    """
    # Extract x and y coordinates
    ax, ay = A[:, 0], A[:, 1]
    bx, by = B[:, 0], B[:, 1]
    cx, cy = C[:, 0], C[:, 1]
    
    # Calculate the angles of vectors BA and BC relative to the positive x-axis
    ang_ba = np.arctan2(ay - by, ax - bx)
    ang_bc = np.arctan2(cy - by, cx - bx)
    
    # Compute the difference of angles
    ang = ang_bc - ang_ba
    
    # Normalize the angle to the range [0, 2*pi)
    ang = (ang + 2 * np.pi) % (2 * np.pi)
    
    # Ensure the angle is within [0, pi]
    ang[ang > np.pi] = 2 * np.pi - ang[ang > np.pi]
    
    return ang

In [None]:
# Concatenating and stacking arrays from all columns
def concat_arrays(row):
    return np.hstack(row)



In [None]:
def calculate_speed_from_distances(distances, dt):
    """
    Calculate the speed from a numpy array of distances measured at regular time intervals.

    Parameters:
        distances (np.array): 1D Numpy array where each element represents a distance measured at a specific time.
        dt (float): Time interval between consecutive distance measurements.

    Returns:
        np.array: Array of speeds calculated as the change in distance divided by the time interval.
    """
    # Calculate the change in distance
    delta_distances = np.diff(distances)

    # Calculate speeds as change in distance divided by change in time
    speeds = delta_distances / dt

    # Pad the speed array at the beginning with zero to maintain the same length
    speeds = np.concatenate([np.array([speeds[0]]), speeds])

    return speeds

## Inputs & Data

Explanation of each input and where it comes from.

In [None]:
# Inputs and Required data loading
# input varaible names are in all caps snake case
# Whenever an input changes or is used for processing 
# the vairables are all lower in snake case
THORAX_INDEX = 1

# LFP_SPECTRAL_DF = pd.read_pickle("./proc/rce_pilot_2_03_spectral_bands.pkl")
# LFP_SPECTRAL_DF["video_name"] = LFP_SPECTRAL_DF["video_name"].apply(lambda x: x.strip(".videoTimeStamps.cameraHWSync"))

# SLEAP_DIR = os.path.join(git_root, "proc/sleap") 
# SLEAP_DIR = "/scratch/back_up/reward_competition_extention/final_proc/id_corrected"
SLEAP_DIR = "./data"

OUTPUT_DIR = r"./proc" # where data is saved should always be shown in the inputs
MED_PC_WIDTH = 29.5
MED_PC_HEIGHT = 24
FRAME_RATE = 22
WINDOW_SIZE = 25
DISTANCE_THRESHOLD = 2

In [None]:
START_STOP_FRAME_DF = pd.read_excel("./data/rce_pilot_3_long_comp_per_subject_start_stop_video_frame.xlsx")


## Outputs

Describe each output that the notebook creates. 

- Is it a plot or is it data?

- How valuable is the output and why is it valuable or useful?

In [None]:
# Inputs and Required data loading
# input varaible names are in all caps snake case
# Whenever an input changes or is used for processing 
# the vairables are all lower in snake case
OUTPUT_DIR = r"./proc/" # where data is saved should always be shown in the inputs
os.makedirs(OUTPUT_DIR, exist_ok=True)
OUTPUT_PREFIX = "rce_pilot_3"

In [None]:
FULL_LFP_TRACES_PKL = "{}_04_spectral_and_sleap.pkl".format(OUTPUT_PREFIX)

## Processing

Describe what is done to the data here and how inputs are manipulated to generate outputs. 

# Getting the videos where the subject is in the recording

### Looking at when each subject was in each video

In [None]:
START_STOP_FRAME_DF = START_STOP_FRAME_DF.dropna(subset=["file_path"])

- Getting the name of the SLEAP and video files where each subject was in

In [None]:
START_STOP_FRAME_DF["sleap_name"] = START_STOP_FRAME_DF["file_path"].apply(lambda x: os.path.basename(x))
START_STOP_FRAME_DF["video_name"] = START_STOP_FRAME_DF["file_path"].apply(lambda x: ".".join(os.path.basename(x).split(".")[:2]))
START_STOP_FRAME_DF["start_frame"] = START_STOP_FRAME_DF["start_frame"].astype(int)
START_STOP_FRAME_DF["stop_frame"] = START_STOP_FRAME_DF["stop_frame"].astype(int)

In [None]:
START_STOP_FRAME_DF = START_STOP_FRAME_DF.drop(columns=["file_path", "notes"], errors="ignore")

In [None]:
START_STOP_FRAME_DF["video_name"].unique()

In [None]:
START_STOP_FRAME_DF.head()

- Splitting each row into seperate row for each subject in the video

In [None]:
START_STOP_FRAME_DF["tracked_subject"] = START_STOP_FRAME_DF["tracked_subject"].apply(lambda x: str(x).split("_"))
START_STOP_FRAME_DF["current_subject"] = START_STOP_FRAME_DF["tracked_subject"]

In [None]:
START_STOP_FRAME_DF = START_STOP_FRAME_DF.explode("current_subject")

In [None]:
START_STOP_FRAME_DF.head()

In [None]:
START_STOP_FRAME_DF.shape

# Reading in the h5 files between recordings

In [None]:
START_STOP_FRAME_DF["sleap_glob"] = START_STOP_FRAME_DF["sleap_name"].apply(lambda x: glob.glob(os.path.join(SLEAP_DIR, "**", x)))


In [None]:
for name in START_STOP_FRAME_DF[START_STOP_FRAME_DF["sleap_glob"].apply(lambda x: len(x) == 0)]["sleap_name"]:
    print(name)

In [None]:
START_STOP_FRAME_DF = START_STOP_FRAME_DF[START_STOP_FRAME_DF['sleap_glob'].apply(lambda x: len(x) >= 1)]
START_STOP_FRAME_DF = START_STOP_FRAME_DF.reset_index(drop=True)




In [None]:
START_STOP_FRAME_DF["sleap_path"] = START_STOP_FRAME_DF["sleap_glob"].apply(lambda x: x[0])

In [None]:
START_STOP_FRAME_DF["all_sleap_data"] = START_STOP_FRAME_DF["sleap_path"].apply(lambda x: sleap.process_pose.extract_sleap_data(x))


In [None]:
START_STOP_FRAME_DF["body_parts"] = START_STOP_FRAME_DF["sleap_path"].apply(lambda x: sleap.process_pose.get_node_names_from_sleap(x))

In [None]:
START_STOP_FRAME_DF["body_parts"].iloc[0]

In [None]:
START_STOP_FRAME_DF["locations"] = START_STOP_FRAME_DF["all_sleap_data"].apply(lambda x: x["locations"])

In [None]:
START_STOP_FRAME_DF["track_names"] = START_STOP_FRAME_DF["all_sleap_data"].apply(lambda x: x["track_names"])

In [None]:
START_STOP_FRAME_DF["locations"].iloc[0].shape

In [None]:
START_STOP_FRAME_DF.head()

In [None]:
# Getting the indexes of each subject from the track list
START_STOP_FRAME_DF["subject_to_index"] = START_STOP_FRAME_DF.apply(lambda x: {k: x["track_names"].index(k) for k in x["tracked_subject"] if k in x["track_names"]}, axis=1)

In [None]:
START_STOP_FRAME_DF["subject_to_index"].head()

In [None]:
START_STOP_FRAME_DF["subject_to_tracks"] = START_STOP_FRAME_DF.apply(lambda x: {k:v for k, v in x["subject_to_index"].items()}, axis=1)

In [None]:
START_STOP_FRAME_DF["subject_to_tracks"] = START_STOP_FRAME_DF.apply(lambda x: {k: x["locations"][:,:,:,v] for k, v in x["subject_to_index"].items()}, axis=1)

In [None]:
START_STOP_FRAME_DF["subject_to_tracks"].head()

In [None]:
START_STOP_FRAME_DF["subject_to_tracks"].apply(lambda x: x.keys()).head()

In [None]:
START_STOP_FRAME_DF.head()

## Getting the coordinates of the corners

In [None]:
START_STOP_FRAME_DF["sleap_path"].iloc[0]

In [None]:
# Each corner file is the in the same folder and has the same basename of the pose tracking file 
START_STOP_FRAME_DF["corner_path"] = START_STOP_FRAME_DF["sleap_path"].apply(lambda x: x.replace("id_corrected.h5", "corner.h5").replace(".fixed", "").replace(".round_1", "").replace(".1_subj", "").replace(".2_subj", ""))


In [None]:
START_STOP_FRAME_DF["corner_path"].iloc[0]

In [None]:
# Getting the indexes of each corner location
START_STOP_FRAME_DF["corner_parts"] = START_STOP_FRAME_DF["corner_path"].apply(lambda x: sleap.process_pose.get_node_names_from_sleap(x))

In [None]:
START_STOP_FRAME_DF["corner_parts"]

In [None]:
# TODO: Remove this once corner files are fixed
START_STOP_FRAME_DF = START_STOP_FRAME_DF[START_STOP_FRAME_DF["corner_parts"].apply(lambda x: "reward_port" in x)]

In [None]:
# Getting the coordinates of all the corners
START_STOP_FRAME_DF["corner_to_coordinate"] = START_STOP_FRAME_DF["corner_path"].apply(lambda x: sleap.process_pose.get_sleap_tracks_from_h5(x))

In [None]:
# Parsing out each corner and creating a dictionary of name to coordinates
START_STOP_FRAME_DF["corner_to_coordinate"] = START_STOP_FRAME_DF.apply(lambda x: {part: x["corner_to_coordinate"][:,index,:,:] for index, part in enumerate(x["corner_parts"])}, axis=1)

In [None]:
START_STOP_FRAME_DF["corner_to_coordinate"]

In [None]:
# Filtering out all the Nans because there's only one labeled frame
START_STOP_FRAME_DF["corner_to_coordinate"] = START_STOP_FRAME_DF.apply(lambda x: {k: v[~np.isnan(v)][:2] for k, v in x["corner_to_coordinate"].items()}, axis=1)

In [None]:
START_STOP_FRAME_DF["corner_to_coordinate"]

# Getting the distances between corners

- Getting the average width and height so that we can convert pixels to cm

In [None]:
# Using the x-coordinates for the width
START_STOP_FRAME_DF["bottom_width"] = START_STOP_FRAME_DF["corner_to_coordinate"].apply(lambda x: x["box_bottom_right"][0] - x["box_bottom_left"][0])
START_STOP_FRAME_DF["top_width"] = START_STOP_FRAME_DF["corner_to_coordinate"].apply(lambda x: x["box_top_right"][0] - x["box_top_left"][0])


In [None]:
# Using the y-coordinates for the height
START_STOP_FRAME_DF["right_height"] = START_STOP_FRAME_DF["corner_to_coordinate"].apply(lambda x: x["box_bottom_right"][1] - x["box_top_right"][1])
START_STOP_FRAME_DF["left_height"] = START_STOP_FRAME_DF["corner_to_coordinate"].apply(lambda x: x["box_bottom_left"][1] - x["box_top_left"][1])


In [None]:
# averaging the width and height by adding both sides and then getting the mean
START_STOP_FRAME_DF["average_height"] = START_STOP_FRAME_DF.apply(lambda row: (row["right_height"] + row["left_height"])/2, axis=1)
START_STOP_FRAME_DF["average_width"] = START_STOP_FRAME_DF.apply(lambda row: (row["bottom_width"] + row["top_width"])/2, axis=1)

- Getthing the pixel to cm ratio

In [None]:
START_STOP_FRAME_DF["width_ratio"] = MED_PC_WIDTH / START_STOP_FRAME_DF["average_width"]
START_STOP_FRAME_DF["height_ratio"] = MED_PC_HEIGHT / START_STOP_FRAME_DF["average_height"]

In [None]:
START_STOP_FRAME_DF["height_ratio"]

In [None]:
START_STOP_FRAME_DF["width_ratio"]

## Converting Pixels to cm

In [None]:
START_STOP_FRAME_DF["in_video_subjects"] = START_STOP_FRAME_DF["in_video_subjects"].apply(lambda x: x.split("_"))

In [None]:
START_STOP_FRAME_DF["subject_to_tracks"] = START_STOP_FRAME_DF.apply(lambda x: {k: v for k, v in x["subject_to_tracks"].items() if k in x["in_video_subjects"]}, axis=1)

- Converting the X-dimension

In [None]:
START_STOP_FRAME_DF["subject_to_tracks"].head()

In [None]:
START_STOP_FRAME_DF["rescaled_locations"] = START_STOP_FRAME_DF.apply(lambda x: {key: sleap.process_pose.fill_missing(sleap.process_pose.rescale_dimension_in_array(value, dimension=0, ratio=x["width_ratio"])) for key, value in x["subject_to_tracks"].items()}, axis=1)

- Converting the Y-dimension

In [None]:
START_STOP_FRAME_DF["rescaled_locations"] = START_STOP_FRAME_DF.apply(lambda x: {key: sleap.process_pose.rescale_dimension_in_array(value, dimension=1, ratio=x["height_ratio"]) for key, value in x["rescaled_locations"].items()}, axis=1)

In [None]:
START_STOP_FRAME_DF["corner_to_coordinate"]

In [None]:
START_STOP_FRAME_DF.head()

In [None]:
# Normalize dictionary column
normalized = pd.json_normalize(START_STOP_FRAME_DF["corner_to_coordinate"])

In [None]:
normalized.head()

In [None]:


# Drop the original column and concat the normalized DataFrame
START_STOP_FRAME_DF = pd.concat([START_STOP_FRAME_DF.drop(["corner_to_coordinate"], axis=1), normalized], axis=1)


In [None]:
START_STOP_FRAME_DF.head()

In [None]:
START_STOP_FRAME_DF = START_STOP_FRAME_DF.dropna(subset=["reward_port"])

- Converting the corner coordinates into cms

In [None]:
for corner in START_STOP_FRAME_DF["corner_parts"].iloc[0]:
    START_STOP_FRAME_DF[corner] = START_STOP_FRAME_DF.apply(lambda x: [x[corner][0]*x["width_ratio"], x[corner][1]*x["height_ratio"]], axis=1)

## Looking over the tracks

In [None]:
FILE_INDEX = 0

In [None]:
START_STOP_FRAME_DF["sleap_path"].iloc[FILE_INDEX]

In [None]:
START_STOP_FRAME_DF["rescaled_locations"].head()

In [None]:
START_STOP_FRAME_DF.columns

In [None]:
with h5py.File(START_STOP_FRAME_DF["sleap_path"].iloc[FILE_INDEX], "r") as f:
    dset_names = list(f.keys())
    current_subject = START_STOP_FRAME_DF["current_subject"].iloc[FILE_INDEX]
    locations = START_STOP_FRAME_DF["rescaled_locations"].iloc[FILE_INDEX][current_subject]
    node_names = [n.decode() for n in f["node_names"][:]]
    
print("===HDF5 datasets===")
print(dset_names)
print()

print("===locations data shape===")
print(locations.shape)
print()

print("===nodes===")
for i, name in enumerate(node_names):
    print(f"{i}: {name}")
print()

In [None]:
thorax_loc = locations[:, THORAX_INDEX, :]

In [None]:
fig, ax = plt.subplots()

plt.plot(thorax_loc[:,0],label='X-coordinates')
# Converting to negative so that we can see both x and y track
plt.plot(-1*thorax_loc[:,1], label='Y-coordinates')

plt.legend(loc="center right")
plt.title('Thorax locations')
plt.xlabel("Time in frames")
plt.ylabel("Coordinate Position")

In [None]:
plt.figure(figsize=(7,7))
plt.plot(thorax_loc[:,0],thorax_loc[:,1])


plt.title('Thorax tracks')
plt.xlabel("X-Coordinates")
plt.ylabel("Y-Coordinates")


## Creating an individual column for each pose tracking

In [None]:
START_STOP_FRAME_DF = START_STOP_FRAME_DF.dropna(subset="current_subject")

In [None]:
START_STOP_FRAME_DF.head()

In [None]:
START_STOP_FRAME_DF["agent"] = START_STOP_FRAME_DF.apply(lambda x: list((set(x["tracked_subject"]) - set([x["current_subject"]]))), axis=1)

In [None]:
START_STOP_FRAME_DF[START_STOP_FRAME_DF["agent"].apply(lambda x: len(x) != 1)]

In [None]:
START_STOP_FRAME_DF["agent"] = START_STOP_FRAME_DF["agent"].apply(lambda x: x[0] if len(x) == 1 else None)

In [None]:
START_STOP_FRAME_DF["subject_locations"] = START_STOP_FRAME_DF.apply(lambda x: x["rescaled_locations"][x["current_subject"]] , axis=1)

In [None]:
START_STOP_FRAME_DF["agent_locations"] = START_STOP_FRAME_DF.apply(lambda x: x["rescaled_locations"].get(x["agent"], np.nan) if x["agent"] else np.nan, axis=1)

## Getting all the timestamps

In [None]:
START_STOP_FRAME_DF.columns

In [None]:
START_STOP_FRAME_DF["frame_index"] = START_STOP_FRAME_DF["subject_locations"].apply(lambda x: np.arange(0, x.shape[0]) + 1)

## Removing unnecessary columns

In [None]:
START_STOP_FRAME_DF = START_STOP_FRAME_DF.drop(["sleap_glob", "subject_to_index", "subject_to_tracks", "corner_parts", "corner_to_coordinate", "bottom_width", "top_width", "right_height", "left_height", "average_height", "average_width", "width_ratio", "height_ratio", 'locations', 'track_names', 'sleap_path', 'corner_path', 'all_sleap_data', 'rescaled_locations'], errors="ignore", axis=1)

In [None]:
START_STOP_FRAME_DF.columns

In [None]:
START_STOP_FRAME_DF.head()

# Calculate relavant features

## Features to calculate

1. velocity of the mice
2. distance between thoraxes
3. distances to port of mice
4. angles of orientation of mice to port




Original features to calculate
1. distance between thoraxes
2. velocity of mouse 1 + velocity of mouse 2
3. | velocity of mouse 1 - velocity of mouse 2 |
4. sum of angles of orientation of mice to port
5. | difference of angles of orientation of mice to port |
6. sum of distances to port of mice
7. | differences of distances to port of mice |

In [None]:
START_STOP_FRAME_DF.columns

# Getting relavent body parts

In [None]:
START_STOP_FRAME_DF["body_parts"].apply(lambda x: x.index("thorax"))

In [None]:
START_STOP_FRAME_DF["subject_thorax"] = START_STOP_FRAME_DF.apply(lambda x: x["subject_locations"][:,x["body_parts"].index("thorax"),:], axis=1)
START_STOP_FRAME_DF["subject_nose"] = START_STOP_FRAME_DF.apply(lambda x: x["subject_locations"][:,x["body_parts"].index("nose"),:], axis=1)
START_STOP_FRAME_DF["subject_tail_base"] = START_STOP_FRAME_DF.apply(lambda x: x["subject_locations"][:,x["body_parts"].index("tail_base"),:], axis=1)


In [None]:
START_STOP_FRAME_DF["agent_thorax"] = START_STOP_FRAME_DF.apply(lambda x: x["agent_locations"][:,x["body_parts"].index("thorax"),:], axis=1)
START_STOP_FRAME_DF["agent_nose"] = START_STOP_FRAME_DF.apply(lambda x: x["agent_locations"][:,x["body_parts"].index("nose"),:], axis=1)
START_STOP_FRAME_DF["agent_tail_base"] = START_STOP_FRAME_DF.apply(lambda x: x["subject_locations"][:,x["body_parts"].index("tail_base"),:], axis=1)


In [None]:
START_STOP_FRAME_DF.head()

# Calculate velocity

In [None]:
START_STOP_FRAME_DF["subject_thorax_velocity"] = START_STOP_FRAME_DF.apply(lambda x: compute_velocity(x["subject_thorax"], window_size=FRAME_RATE*3) * FRAME_RATE, axis=1)
START_STOP_FRAME_DF["subject_thorax_velocity"] = START_STOP_FRAME_DF["subject_thorax_velocity"].apply(lambda x: x.astype(np.float32) if x is not np.nan else np.nan)

In [None]:
START_STOP_FRAME_DF["agent_locations"]

In [None]:
START_STOP_FRAME_DF["agent_thorax_velocity"] = START_STOP_FRAME_DF.apply(lambda x: compute_velocity(x["agent_thorax"], window_size=FRAME_RATE*3) * FRAME_RATE if x["agent_locations"] is not np.nan else np.nan, axis=1)
START_STOP_FRAME_DF["agent_thorax_velocity"] = START_STOP_FRAME_DF["agent_thorax_velocity"].apply(lambda x: x.astype(np.float32) if x is not np.nan else np.nan)


In [None]:
START_STOP_FRAME_DF["subject_thorax_velocity"].iloc[0].shape

## Calculate relavant distances

1. distance between thoraxes

In [None]:
START_STOP_FRAME_DF["subject_thorax_to_agent_thorax"] = START_STOP_FRAME_DF.apply(lambda x: np.linalg.norm(x["subject_thorax"] - x["agent_thorax"], axis=1),  axis=1)
START_STOP_FRAME_DF["subject_thorax_to_agent_thorax"] = START_STOP_FRAME_DF["subject_thorax_to_agent_thorax"].apply(lambda x: x.astype(np.float32) if x is not np.nan else np.nan)

In [None]:
START_STOP_FRAME_DF["subject_nose_to_agent_tail_base"] = START_STOP_FRAME_DF.apply(lambda x: np.linalg.norm(x["subject_nose"] - x["agent_tail_base"], axis=1),  axis=1)
START_STOP_FRAME_DF["subject_nose_to_agent_tail_base"] = START_STOP_FRAME_DF["subject_nose_to_agent_tail_base"].apply(lambda x: x.astype(np.float32) if x is not np.nan else np.nan)

In [None]:
START_STOP_FRAME_DF["subject_tail_base_to_agent_nose"] = START_STOP_FRAME_DF.apply(lambda x: np.linalg.norm(x["subject_tail_base"] - x["agent_nose"], axis=1),  axis=1)
START_STOP_FRAME_DF["subject_tail_base_to_agent_nose"] = START_STOP_FRAME_DF["subject_tail_base_to_agent_nose"].apply(lambda x: x.astype(np.float32) if x is not np.nan else np.nan)

2. distances to port of mice

In [None]:
START_STOP_FRAME_DF["subject_thorax_to_reward_port"] = START_STOP_FRAME_DF.apply(lambda x: np.linalg.norm(x["subject_thorax"] - x["reward_port"], axis=1),  axis=1)
START_STOP_FRAME_DF["subject_thorax_to_reward_port"] = START_STOP_FRAME_DF["subject_thorax_to_reward_port"].apply(lambda x: x.astype(np.float32) if x is not np.nan else np.nan)

In [None]:
START_STOP_FRAME_DF["agent_thorax_to_reward_port"] = START_STOP_FRAME_DF.apply(lambda x: np.linalg.norm(x["agent_thorax"] - x["reward_port"], axis=1) if x["agent_locations"] is not np.nan else np.nan,  axis=1)
START_STOP_FRAME_DF["agent_thorax_to_reward_port"] = START_STOP_FRAME_DF["agent_thorax_to_reward_port"].apply(lambda x: x.astype(np.float32) if x is not np.nan else np.nan)

In [None]:
START_STOP_FRAME_DF["subject_nose_to_reward_port"] = START_STOP_FRAME_DF.apply(lambda x: np.linalg.norm(x["subject_nose"] - x["reward_port"], axis=1),  axis=1)
START_STOP_FRAME_DF["subject_nose_to_reward_port"] = START_STOP_FRAME_DF["subject_nose_to_reward_port"].apply(lambda x: x.astype(np.float32) if x is not np.nan else np.nan)

In [None]:
START_STOP_FRAME_DF["agent_nose_to_reward_port"] = START_STOP_FRAME_DF.apply(lambda x: np.linalg.norm(x["agent_nose"] - x["reward_port"], axis=1),  axis=1)
START_STOP_FRAME_DF["agent_nose_to_reward_port"] = START_STOP_FRAME_DF["agent_nose_to_reward_port"].apply(lambda x: x.astype(np.float32) if x is not np.nan else np.nan)

## Calculate to speed

In [None]:
dt = 5

In [None]:
START_STOP_FRAME_DF["subject_speed_to_reward_port"] = START_STOP_FRAME_DF.apply(lambda x: calculate_speed_from_distances(x["subject_thorax_to_reward_port"], dt),  axis=1)
START_STOP_FRAME_DF["subject_speed_to_reward_port"] = START_STOP_FRAME_DF["subject_speed_to_reward_port"].apply(lambda x: x.astype(np.float32) if x is not np.nan else np.nan)

In [None]:
START_STOP_FRAME_DF["agent_speed_to_reward_port"] = START_STOP_FRAME_DF.apply(lambda x: calculate_speed_from_distances(x["agent_thorax_to_reward_port"], dt),  axis=1)
START_STOP_FRAME_DF["agent_speed_to_reward_port"] = START_STOP_FRAME_DF["agent_speed_to_reward_port"].apply(lambda x: x.astype(np.float32) if x is not np.nan else np.nan)

In [None]:
START_STOP_FRAME_DF["subject_to_agent_speed"] = START_STOP_FRAME_DF.apply(lambda x: calculate_speed_from_distances(x["subject_thorax_to_agent_thorax"], dt),  axis=1)
START_STOP_FRAME_DF["subject_to_agent_speed"] = START_STOP_FRAME_DF["subject_to_agent_speed"].apply(lambda x: x.astype(np.float32) if x is not np.nan else np.nan)

## Calculating orientation of the mouse

4. angles of orientation of mice to port

In [None]:
START_STOP_FRAME_DF["subject_to_reward_port_angle"] = START_STOP_FRAME_DF.apply(lambda x: calculate_time_series_angles(x["subject_thorax"], x["subject_nose"], np.tile(x["reward_port"], (x["subject_nose"].shape[0], 1))),  axis=1)
START_STOP_FRAME_DF["subject_to_reward_port_angle"] = START_STOP_FRAME_DF["subject_to_reward_port_angle"].apply(lambda x: x.astype(np.float32) if x is not np.nan else np.nan)

In [None]:
START_STOP_FRAME_DF["agent_to_reward_port_angle"] = START_STOP_FRAME_DF.apply(lambda x: calculate_time_series_angles(x["agent_thorax"], x["agent_nose"], np.tile(x["reward_port"], (x["agent_nose"].shape[0], 1))),  axis=1)
START_STOP_FRAME_DF["agent_to_reward_port_angle"] = START_STOP_FRAME_DF["agent_to_reward_port_angle"].apply(lambda x: x.astype(np.float32) if x is not np.nan else np.nan)

In [None]:
START_STOP_FRAME_DF["subject_to_agent_orientation"] = START_STOP_FRAME_DF.apply(lambda x: calculate_angles_from_arrays(x["subject_thorax"], x["subject_nose"], x["agent_thorax"], x["agent_nose"]),  axis=1)
START_STOP_FRAME_DF["subject_to_agent_orientation"] = START_STOP_FRAME_DF["subject_to_agent_orientation"].apply(lambda x: x.astype(np.float32) if x is not np.nan else np.nan)

In [None]:
START_STOP_FRAME_DF.head()

# Making features based on social

In [None]:
START_STOP_FRAME_DF["thorax_velocity_sum"] = START_STOP_FRAME_DF.apply(lambda x: x["subject_thorax_velocity"] + x["agent_thorax_velocity"], axis=1)
START_STOP_FRAME_DF["thorax_velocity_diff"] = START_STOP_FRAME_DF.apply(lambda x: np.abs(x["subject_thorax_velocity"] - x["agent_thorax_velocity"]), axis=1)

In [None]:
START_STOP_FRAME_DF["thorax_to_reward_port_sum"] = START_STOP_FRAME_DF.apply(lambda x: x["subject_thorax_to_reward_port"] + x["agent_thorax_to_reward_port"], axis=1)
START_STOP_FRAME_DF["thorax_to_reward_port_diff"] = START_STOP_FRAME_DF.apply(lambda x: np.abs(x["subject_thorax_to_reward_port"] - x["agent_thorax_to_reward_port"]), axis=1)

In [None]:
START_STOP_FRAME_DF["nose_to_reward_port_sum"] = START_STOP_FRAME_DF.apply(lambda x: x["subject_nose_to_reward_port"] + x["agent_nose_to_reward_port"], axis=1)
START_STOP_FRAME_DF["nose_to_reward_port_diff"] = START_STOP_FRAME_DF.apply(lambda x: np.abs(x["subject_nose_to_reward_port"] - x["agent_nose_to_reward_port"]), axis=1)

In [None]:
START_STOP_FRAME_DF["to_reward_port_angle_sum"] = START_STOP_FRAME_DF.apply(lambda x: x["subject_to_reward_port_angle"] + x["agent_to_reward_port_angle"], axis=1)
START_STOP_FRAME_DF["to_reward_port_angle_diff"] = START_STOP_FRAME_DF.apply(lambda x: np.abs(x["subject_to_reward_port_angle"] - x["agent_to_reward_port_angle"]), axis=1)

In [None]:
START_STOP_FRAME_DF["nose_to_tail_sum"] = START_STOP_FRAME_DF.apply(lambda x: x["subject_tail_base_to_agent_nose"] + x["subject_nose_to_agent_tail_base"], axis=1)
START_STOP_FRAME_DF["nose_to_tail_diff"] = START_STOP_FRAME_DF.apply(lambda x: np.abs(x["subject_tail_base_to_agent_nose"] - x["subject_nose_to_agent_tail_base"]), axis=1)

In [None]:
START_STOP_FRAME_DF["speed_to_reward_port_sum"] = START_STOP_FRAME_DF.apply(lambda x: x["subject_speed_to_reward_port"] + x["agent_speed_to_reward_port"], axis=1)
START_STOP_FRAME_DF["speed_to_reward_port_diff"] = START_STOP_FRAME_DF.apply(lambda x: np.abs(x["subject_speed_to_reward_port"] - x["agent_speed_to_reward_port"]), axis=1)

In [None]:
START_STOP_FRAME_DF.columns

In [None]:
# # based on each subjects individually
# features_columns = ['subject_thorax_velocity', 'agent_thorax_velocity',
#        'subject_thorax_to_agent_thorax', 'subject_thorax_to_reward_port',
#        'agent_thorax_to_reward_port', 'subject_to_reward_port_angle',
#        'agent_to_reward_port_angle']

In [None]:
# based on both subjects together
features_columns = ['frame_index', 'thorax_velocity_sum', 'thorax_velocity_diff',
       'subject_thorax_to_agent_thorax', 'thorax_to_reward_port_sum',
       'thorax_to_reward_port_diff', 'to_reward_port_angle_sum',
       'to_reward_port_angle_diff', 'nose_to_tail_sum', 'nose_to_tail_diff', 'subject_to_agent_orientation']

In [None]:
# based on both subjects together
features_columns = ['frame_index', 'thorax_velocity_sum', 'thorax_velocity_diff',
       'subject_thorax_to_agent_thorax', 'thorax_to_reward_port_sum',
       'thorax_to_reward_port_diff', 'to_reward_port_angle_sum',
       'to_reward_port_angle_diff']

In [None]:
# based on both subjects together
features_columns = ['frame_index', 'thorax_velocity_sum', 'thorax_velocity_diff',
       'thorax_to_reward_port_sum',
       'thorax_to_reward_port_diff', 'to_reward_port_angle_sum',
       'to_reward_port_angle_diff', 'nose_to_tail_sum', 'nose_to_tail_diff', 'subject_to_agent_orientation']

In [None]:
# based on both subjects together
features_columns = ['frame_index', 
                    'thorax_velocity_sum', 
                    'thorax_velocity_diff',
                    'thorax_to_reward_port_sum',
                    'thorax_to_reward_port_diff', 
                    'to_reward_port_angle_sum',
                    'to_reward_port_angle_diff', 
                    'nose_to_tail_sum', 
                    'nose_to_tail_diff', 
                    'subject_to_agent_orientation',
                    'subject_nose_to_reward_port',
                    'agent_nose_to_reward_port',
                    'speed_to_reward_port'
                    ]

In [None]:
# based on both subjects together
features_columns = ['frame_index', 
                    'thorax_velocity_sum', 
                    'thorax_velocity_diff',
                    'thorax_to_reward_port_sum',
                    'thorax_to_reward_port_diff', 
                    'to_reward_port_angle_sum',
                    'to_reward_port_angle_diff', 
                    'nose_to_tail_sum', 
                    'nose_to_tail_diff', 
                    'subject_to_agent_orientation',
                    'nose_to_reward_port_sum',
                    'nose_to_reward_port_diff',
                    'speed_to_reward_port_sum',
                    'speed_to_reward_port_diff'
                    ]

In [None]:
# based on both subjects together
features_columns = ['frame_index', 
                    # 'thorax_velocity_sum', 
                    # 'thorax_velocity_diff',
                    # 'thorax_to_reward_port_sum',
                    # 'thorax_to_reward_port_diff', 
                    'to_reward_port_angle_sum',
                    'to_reward_port_angle_diff', 
                    'nose_to_tail_sum', 
                    'nose_to_tail_diff', 
                    'subject_to_agent_orientation',
                    'nose_to_reward_port_sum',
                    'nose_to_reward_port_diff',
                    'speed_to_reward_port_sum',
                    'speed_to_reward_port_diff',
                    'subject_to_agent_speed'
                    ]

In [None]:
# based on both subjects together
features_columns = ['frame_index', 
                    # 'thorax_velocity_sum', 
                    # 'thorax_velocity_diff',
                    'thorax_to_reward_port_sum',
                    'thorax_to_reward_port_diff', 
                    # 'to_reward_port_angle_sum',
                    # 'to_reward_port_angle_diff', 
                    'nose_to_tail_sum', 
                    'nose_to_tail_diff', 
                    # 'subject_to_agent_orientation',
                    'nose_to_reward_port_sum',
                    'nose_to_reward_port_diff',
                    'speed_to_reward_port_sum',
                    'speed_to_reward_port_diff',
                    'subject_to_agent_speed'
                    ]

In [None]:
# based on both subjects together
features_columns = ['frame_index',
                    # 'subject_thorax_to_agent_thorax'
                    'thorax_velocity_sum', 
                    'thorax_velocity_diff',
                    'thorax_to_reward_port_sum',
                    'thorax_to_reward_port_diff', 
                    'to_reward_port_angle_sum',
                    'to_reward_port_angle_diff', 
                    'nose_to_tail_sum', 
                    'nose_to_tail_diff', 
                    # 'subject_to_agent_orientation',
                    'nose_to_reward_port_sum',
                    'nose_to_reward_port_diff',
                    'speed_to_reward_port_sum',
                    'speed_to_reward_port_diff',
                    'subject_to_agent_speed'
                    ]

In [None]:
# based on both subjects together
features_columns = ['frame_index',
                    # Distance metrics
                    'subject_thorax_to_agent_thorax'
                    'thorax_to_reward_port_sum',
                    'thorax_to_reward_port_diff',
                    'nose_to_tail_sum', 
                    'nose_to_tail_diff',
                    'nose_to_reward_port_sum',
                    'nose_to_reward_port_diff',
                    # Velocity and speed metrics
                    'thorax_velocity_sum', 
                    'thorax_velocity_diff',
                    'speed_to_reward_port_sum',
                    'speed_to_reward_port_diff',
                    'subject_to_agent_speed'
                    # Orientation and angle
                    'to_reward_port_angle_sum',
                    'to_reward_port_angle_diff', 
                    'subject_to_agent_orientation',
                    ]

In [None]:
# based on both subjects together
# features_columns = ['frame_index', 'thorax_velocity_sum', 'thorax_velocity_diff',
#        'subject_thorax_to_agent_thorax', 'thorax_to_reward_port_sum',
#        'thorax_to_reward_port_diff', 'to_reward_port_angle_sum',
#        'to_reward_port_angle_diff']

In [None]:
# # based on each subject individually and both subjects together
# features_columns  = ['subject_thorax_velocity',
#        'subject_thorax_to_reward_port', 'subject_thorax_to_agent_thorax',
#        'subject_to_reward_port_angle',
#        'thorax_velocity_sum', 'thorax_velocity_diff',
#        'thorax_to_reward_port_sum',
#        'thorax_to_reward_port_diff', 'to_reward_port_angle_sum',
#        'to_reward_port_angle_diff']

In [None]:
# # based on just main subject
# features_columns = ['subject_thorax_velocity',
#        'subject_thorax_to_reward_port', 'subject_thorax_to_agent_thorax',
#        'subject_to_reward_port_angle']

In [None]:
trial_labels = pd.read_excel("./data/rce_pilot_3_long_comp_per_video_trial_labels.xlsx")

In [None]:
trial_labels = trial_labels.dropna(subset=["condition "])

In [None]:
trial_labels["video_name"] = trial_labels["video_name"].apply(lambda x: x.replace(".videoTimeStamps.cameraHWSync", ""))

In [None]:
START_STOP_FRAME_DF["video_name"]


In [None]:
merged_trial_labels_df = pd.merge(left = trial_labels, right = START_STOP_FRAME_DF, on=["video_name"])

In [None]:
features_columns

In [None]:
for col in features_columns:
    merged_trial_labels_df[col] = merged_trial_labels_df.apply(lambda x: x[col][x["tone_start_frame"]: x["tone_stop_frame"]], axis=1)

In [None]:
features_columns = [col for col in features_columns if col != "frame_index"]

In [None]:
features_columns

In [None]:
merged_trial_labels_df.head()

In [None]:
merged_trial_labels_df.columns

In [None]:
merged_trial_labels_df = merged_trial_labels_df.drop_duplicates(subset=["video_name", "tone_stop_frame"])

In [None]:
# merged_trial_labels_df = merged_trial_labels_df.dropna(subset=["competition_closeness"])

In [None]:
merged_trial_labels_df.head()

# UMAP Clustering

In [None]:
merged_trial_labels_df.columns

In [None]:
merged_trial_labels_df["tone_frame"] = merged_trial_labels_df.apply(lambda x: np.arange(x["tone_stop_frame"] - x["tone_start_frame"]), axis=1)

In [None]:
exploded_columns = merged_trial_labels_df[['frame_index', "tone_frame", 'session_dir', 'tone_start_frame', 'reward_start',
       'reward_dispensed', 'tone_stop_frame', 'condition ',
       'competition_closeness', 'notes', 'experiment', "sleap_name", "video_name", "current_subject"] + features_columns].explode(features_columns + ["tone_frame"]).reset_index(drop=True)

In [None]:
merged_trial_labels_df["subject_to_agent_orientation"].iloc[0]

In [None]:
exploded_columns.head()

In [None]:
exploded_columns["current_frame"] = exploded_columns.apply(lambda x: x["frame_index"][x["tone_frame"]], axis=1)

In [None]:
exploded_columns.tail()

In [None]:
frame_data = exploded_columns[features_columns].values

scaled_frame_data = StandardScaler().fit_transform(frame_data)

In [None]:
standard_embedding = umap.UMAP(random_state=42).fit_transform(scaled_frame_data)

# Clustering

In [None]:
clusterable_embedding = umap.UMAP(
    n_neighbors=100,
    min_dist=0,
    n_components=2,
    random_state=42,
).fit_transform(scaled_frame_data)

In [None]:
# kmeans_label_zscore = cluster.KMeans(n_clusters=5, random_state=42).fit_predict(clusterable_embedding)

In [None]:
kmeans_label_zscore = cluster.KMeans(n_clusters=8, random_state=42).fit_predict(clusterable_embedding)

In [None]:
# kmeans_label_zscore = hdbscan.HDBSCAN(
#     min_samples=100,
#     min_cluster_size=5000,
# ).fit_predict(clusterable_embedding)

In [None]:
exploded_columns["kmeans_cluster"] = kmeans_label_zscore

In [None]:
exploded_columns.head()

In [None]:
kmeans_label_zscore.shape

In [None]:
kmeans_label_zscore

In [None]:
plt.scatter(
    standard_embedding[:, 0],
    standard_embedding[:, 1],
    s=0.1)
plt.gca().set_aspect('equal', 'datalim')

In [None]:
scatter2 = axs[1].scatter(subsample_cluster_df['embedding_zscore_x_standard'],
                             subsample_cluster_df['embedding_zscore_y_standard'],
                             c=subsample_cluster_df['standard_hdbscan_labels_zscore'],
                             s=0.1,
                             cmap='Spectral')
axs[1].set_title('Behavioral Clusters (standard HDBScan)')
axs[1].legend(*scatter2.legend_elements(), bbox_to_anchor=(1, 1))

In [None]:
umap_cluster = plt.scatter(
    standard_embedding[:, 0],
    standard_embedding[:, 1],
    s=0.1,
    c=kmeans_label_zscore,
    cmap='Spectral')
plt.gca().set_aspect('equal', 'datalim')
plt.legend(*umap_cluster.legend_elements())


## Looking at metrics

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import Normalize

In [None]:
cmap = cm.viridis  # Choose a colormap

In [None]:
features_columns

In [None]:
for featured in features_columns:
    if featured == "frame_index":
        continue
    print(featured)
    fig, ax = plt.subplots()
    norm = Normalize(vmin=np.min(exploded_columns[featured].astype(float)), vmax=np.max(exploded_columns[featured].astype(float)))  # Normalize to the data range
    plt.scatter(
        standard_embedding[:, 0],
        standard_embedding[:, 1],
        c=exploded_columns[featured].astype(float),
        s=0.005, cmap = cmap, norm=norm)

    plt.gca().set_aspect('equal', 'datalim')
    plt.title(featured, fontsize= 16)
    plt.show()

In [None]:
exploded_columns

In [None]:
# assign each feature column to c 
scatter4 = ax[0,3].scatter(subsample_cluster_df['embedding_zscore_x_standard'],
            subsample_cluster_df['embedding_zscore_y_standard'],
            c=subsample_cluster_df['thorax distances'].astype(float),
            s=0.005, cmap = 'afmhot', vmax = 400)  
ax[0,3].set_title('Distance between mice', fontsize= 16)

# Looking at the clusters

In [None]:
exploded_columns

In [None]:
for cluster in exploded_columns["kmeans_cluster"].unique():
    cluster_df = exploded_columns[exploded_columns["kmeans_cluster"] == cluster]
    for vid in cluster_df["video_name"].unique():
        video_df = cluster_df[cluster_df["video_name"] == vid]
        video_name = "{}.fixed.mp4".format(vid)
        video_path = os.path.join("/scratch/back_up/reward_competition_extention/in_progress/rce3/sleap_id_correction/to_be_checked", video_name)
        
        frame_numbers = video_df["current_frame"].to_list()
        Path("./proc/{}/{}".format(cluster, video_name)).mkdir(parents=True, exist_ok=True)
        extract_frames_and_make_gif(video_path, frame_numbers, "./proc/{}/{}".format(cluster, video_name), gif_name="cluster_{}_{}.gif".format(cluster, video_name), fps=25)

In [None]:
raise ValueError()

In [None]:

def calculate_angle(vector_a, vector_b):
    # Convert lists to numpy arrays if they aren't already
    vector_a = np.array(vector_a)
    vector_b = np.array(vector_b)

    # Calculate the dot product of vectors a and b
    dot_product = np.dot(vector_a, vector_b)

    # Calculate the magnitude (norm) of vector a
    norm_a = np.linalg.norm(vector_a)

    # Calculate the magnitude (norm) of vector b
    norm_b = np.linalg.norm(vector_b)

    # Calculate the cosine of the angle between a and b
    cos_angle = dot_product / (norm_a * norm_b)

    # Calculate the angle in radians
    angle = np.arccos(cos_angle)

    # Optionally convert the angle to degrees
    angle_degrees = np.degrees(angle)

    return angle_degrees

In [None]:
raise ValueError()

In [None]:
def calculate_angle_from_points(a, b, c, d):
    """
    Calculate the angle between vectors AB and CD given points A, B, C, D.

    Parameters:
    - a, b: Tuples/lists representing points A(x1, y1) and B(x2, y2).
    - c, d: Tuples/lists representing points C(x3, y3) and D(x4, y4).

    Returns:
    - Angle in degrees between the vectors AB and CD.
    """
    # Convert points to numpy arrays
    a, b, c, d = map(np.array, [a, b, c, d])

    # Compute vectors
    ab = b - a
    cd = d - c

    # Dot product and magnitudes
    dot_prod = np.dot(ab, cd)
    norm_ab = np.linalg.norm(ab)
    norm_cd = np.linalg.norm(cd)

    # Calculate the angle in radians
    cos_angle = dot_prod / (norm_ab * norm_cd)
    angle_radians = np.arccos(cos_angle)

    return angle_radians




In [None]:
# Example usage
a = (0, 0)
b = (-1, 0)
c = (0, 0)
d = (1, 0)
angle = calculate_angle_from_points(a, b, c, d)
print(f"The angle between the vectors is {angle:.2f} degrees")

In [None]:
raise ValueError()

#TODO: Add frame number for the video
- Do this by making a list of frame numbers and exploding that

In [None]:
cluster_df

In [None]:
exploded_columns

In [None]:
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
import numpy as np; np.random.seed(42)

# Generate data x, y for scatter and an array of images.
x = np.arange(20)
y = np.random.rand(len(x))
arr = np.empty((len(x),10,10))
for i in range(len(x)):
    f = np.random.rand(5,5)
    arr[i, 0:5,0:5] = f
    arr[i, 5:,0:5] =np.flipud(f)
    arr[i, 5:,5:] =np.fliplr(np.flipud(f))
    arr[i, 0:5:,5:] = np.fliplr(f)

# create figure and plot scatter
fig = plt.figure()
ax = fig.add_subplot(111)
line, = ax.plot(x,y, ls="", marker="o")

# create the annotations box
im = OffsetImage(arr[0,:,:], zoom=5)
xybox=(50., 50.)
ab = AnnotationBbox(im, (0,0), xybox=xybox, xycoords='data',
        boxcoords="offset points",  pad=0.3,  arrowprops=dict(arrowstyle="->"))
# add it to the axes and make it invisible
ax.add_artist(ab)
ab.set_visible(False)

def hover(event):
    # if the mouse is over the scatter points
    if line.contains(event)[0]:
        # find out the index within the array from the event
        ind, = line.contains(event)[1]["ind"]
        # get the figure size
        w,h = fig.get_size_inches()*fig.dpi
        ws = (event.x > w/2.)*-1 + (event.x <= w/2.) 
        hs = (event.y > h/2.)*-1 + (event.y <= h/2.)
        # if event occurs in the top or right quadrant of the figure,
        # change the annotation box position relative to mouse.
        ab.xybox = (xybox[0]*ws, xybox[1]*hs)
        # make annotation box visible
        ab.set_visible(True)
        # place it at the position of the hovered scatter point
        ab.xy =(x[ind], y[ind])
        # set the image corresponding to that point
        im.set_data(arr[ind,:,:])
    else:
        #if the mouse is not over a scatter point
        ab.set_visible(False)
    fig.canvas.draw_idle()

# add callback for mouse moves
fig.canvas.mpl_connect('motion_notify_event', hover)           
plt.show()

In [None]:
raise ValueError()

In [None]:

# Importing all necessary libraries 
import cv2 
import os 
  
# Read the video from specified path 
cam = cv2.VideoCapture("C:\\Users\\Admin\\PycharmProjects\\project_1\\openCV.mp4") 
  
try: 
      
    # creating a folder named data 
    if not os.path.exists('data'): 
        os.makedirs('data') 
  
# if not created then raise error 
except OSError: 
    print ('Error: Creating directory of data') 
  
# frame 
currentframe = 0
  
while(True): 
      
    # reading from frame 
    ret,frame = cam.read() 
  
    if ret: 
        # if video is still left continue creating images 
        name = './data/frame' + str(currentframe) + '.jpg'
        print ('Creating...' + name) 
  
        # writing the extracted images 
        cv2.imwrite(name, frame) 
  
        # increasing counter so that it will 
        # show how many frames are created 
        currentframe += 1
    else: 
        break
  
# Release all space and windows once done 
cam.release() 
cv2.destroyAllWindows() 


In [None]:
def encode_strings_to_numbers(strings):
    """
    Encodes an array of strings to an array of unique integers.

    Parameters:
        strings (numpy.array): Numpy array of string values.

    Returns:
        numpy.array: An array of integers where each integer represents a unique string.
    """
    # Create a dictionary to map strings to numbers
    unique_strings = np.unique(strings)
    string_to_number = {string: idx for idx, string in enumerate(unique_strings)}

    # Map the original strings to their corresponding numbers
    number_array = np.vectorize(string_to_number.get)(strings)

    return number_array

In [None]:
encode_strings_to_numbers(exploded_columns["competition_closeness"].values)

In [None]:
plt.scatter(
    standard_embedding[:, 0],
    standard_embedding[:, 1],
    s=0.1,
    c=encode_strings_to_numbers(exploded_columns["competition_closeness"].values),
    cmap='Spectral')
plt.gca().set_aspect('equal', 'datalim')

In [None]:
standard_embedding

In [None]:
raise ValueError

# Plotting

In [None]:
raise ValueError()

In [None]:
### Figure plotting for Paper
plotting code for paper ready plots
fig, ax = plt.subplots(figsize=(8, 8))
scatter = plt.scatter(subsample_cluster_df['embedding_zscore_x_standard'],
                             subsample_cluster_df['embedding_zscore_y_standard'],
                             c=subsample_cluster_df['raw_kmeans_labels_zscore'],
                             s=0.1,
                             cmap='Spectral')
plt.title('Behavior Clusters', fontsize = 24)
legend = plt.legend(*scatter.legend_elements(),
                    bbox_to_anchor=(.94, 1),
                    frameon = False,
                    fontsize = 22,
                    markerscale = 2,
                    ncol = 1,
                    handletextpad = -0.2,
                    columnspacing = 0.2)
plt.gca().spines['top'].set_linewidth(2)
plt.gca().spines['right'].set_linewidth(2)
plt.gca().spines['bottom'].set_linewidth(2)
plt.gca().spines['left'].set_linewidth(2)
plt.xticks([])
plt.yticks([])

In [None]:
raise ValueError()

In [None]:
is_first = True
recording_labels = []
frame_indice_labels = []

for file, recording in info.items():
    #creating arrays for recording name, strain, frame indice, 
    # and trial indice (0 for iti, 0-10 for tone) that are as long as there are frames 
    recording_labels += [file] * recording.locations.shape[0]
    file_row = tone_times_df[tone_times_df['File Name'] == file]

    frame_indice_labels = np.concatenate([frame_indice_labels,np.arange(0, recording.locations.shape[0])], axis = 0)
    temp_trial_indices = np.zeros(recording.locations.shape[0])
    #loading in normalization_factor since not all the videos are the same size / resolution
    distance_normalization_factor = box_setup[file]['distance_normalization_factor']
    #loading in reward_point (x,y)
    reward_port = box_setup[file]['reward_port']
    #creating the tone snippets from 0-10 for the frames during the tone
    for trial in recording.tones:
        try:
            temp_trial_indices[trial:trial+trial_length] = np.linspace(0,10,300)
        except ValueError:
            pass
    if is_first:
        trial_indices = temp_trial_indices
    else: 
        trial_indices = np.concatenate([trial_indices, temp_trial_indices])

    if is_first:
        features = np.stack([f1, f2, f3, f6, f7, f8, f9])
    else:
        temp_features =  np.stack([f1, f2, f3, f6, f7, f8, f9])
        features = np.concatenate([features,temp_features], axis = 1)
    is_first = False
recording_labels = np.array(recording_labels)
strain_labels = np.array(strain_labels)
zscored_features = []
# z score each feature 
for i in range(features.shape[0]):
    mean = np.mean(features[i])
    std_dev = np.std(features[i])
    normalized = (features[i]-mean)/std_dev
    zscored_features.append(normalized)
# stack zscored features onto the feature calculations 
features = np.concatenate([features, np.stack(zscored_features)], axis = 0)
# name features 
feature_names = ['thorax distances', 'mouse velocity sum',
                'mouse velocity diff', 
                #'orientation b/w mice sum',
                #'orientation b/w mice diff', 
                'orientation to port sum',
                'orientation to port diff',
                'distance to port sum',
                'distance to port diff']
#name z score features feature name + _zscore
for name in range(len(feature_names)):
    zscore_name = feature_names[name]+'_zscore'
    feature_names.append(zscore_name)
#name the non-feature columns
new_columns = ['Strain',
               'Recording',
               'frame indice',
               'trial_indice',
               'tube_test_elo',
               'urine_marking_elo',
               'home_cage_observation_elo',
               'reward_comp_elo']
# add both lists to create a master list of all column names 
column_names = feature_names + new_columns
# appen them all into an array of features as columns and frames as rows
data = np.column_stack([features.T, 
                        recording_labels[:, None],
                        frame_indice_labels[:, None],
                        trial_indices[:,None],
# turn array into a Dataframe
df = pd.DataFrame(data, columns = column_names)
reduced_frames = len(df) 
#and subsample for every third frame (otherwise my computer crashes)
every_third_index = np.arange(0, reduced_frames, 3)
subsample_df = df.iloc[every_third_index]


In [None]:
is_first = True
recording_labels = []
frame_indice_labels = []
strain_labels = []
tube_test_elo_labels = []
urine_marking_elo_labels = []
home_cage_observation_elo_labels = []
reward_comp_elo_labels = []
#trial length = 30 fps * 10 second tone
trial_length = 10*30
for file, recording in info.items():
    #creating arrays for recording name, strain, frame indice, 
    # and trial indice (0 for iti, 0-10 for tone) that are as long as there are frames 
    recording_labels += [file] * recording.locations.shape[0]
    strain_labels += [recording.strain] * recording.locations.shape[0]
    file_row = tone_times_df[tone_times_df['File Name'] == file]
    # grab elo score information
    if not file_row.empty:
        tube_test_elo_diff = file_row['tube_test_RD'].values[0]
        urine_marking_elo_diff = file_row['urine_marking_RD'].values[0]
        home_cage_elo_diff = file_row['home_cage_observation_RD'].values[0]
        reward_comp_elo_diff = file_row['reward_comp_RD'].values[0]
    else:
        tube_test_elo_diff = 0
        urine_marking_elo_diff = 0
        home_cage_elo_diff = 0
        reward_comp_elo_diff = 0
    #create arrays of length num_frames    
    tube_test_elo_labels += [tube_test_elo_diff] * recording.locations.shape[0]
    urine_marking_elo_labels += [urine_marking_elo_diff] * recording.locations.shape[0]
    home_cage_observation_elo_labels += [home_cage_elo_diff] * recording.locations.shape[0]
    reward_comp_elo_labels += [reward_comp_elo_diff] * recording.locations.shape[0]
    #turn all lists into np. arrays for concatenating and other functions
    tube_test_elo_array = np.array(tube_test_elo_labels)
    urine_marking_elo_array = np.array(urine_marking_elo_labels)
    home_cage_observation_elo_array = np.array(home_cage_observation_elo_labels)
    reward_comp_elo_array = np.array(reward_comp_elo_labels)
    frame_indice_labels = np.concatenate([frame_indice_labels,np.arange(0, recording.locations.shape[0])], axis = 0)
    temp_trial_indices = np.zeros(recording.locations.shape[0])
    #loading in normalization_factor since not all the videos are the same size / resolution
    distance_normalization_factor = box_setup[file]['distance_normalization_factor']
    #loading in reward_point (x,y)
    reward_port = box_setup[file]['reward_port']
    #creating the tone snippets from 0-10 for the frames during the tone
    for trial in recording.tones:
        try:
            temp_trial_indices[trial:trial+trial_length] = np.linspace(0,10,300)
        except ValueError:
            pass
    if is_first:
        trial_indices = temp_trial_indices
    else: 
        trial_indices = np.concatenate([trial_indices, temp_trial_indices])
    # features 1 - 2 distance between thoraxes and noses
    f1 = recording.distances_between_mice('thorax', distance_normalization_factor) # distances between mice works 
    #features 3-4: velocities of each mouse
    velocities = recording.node_velocity('thorax', normalization_factor = distance_normalization_factor)
    f2 = velocities[0] + velocities[1]# this one is from sleap so assuming this works
    f3 = np.abs(velocities[0] - velocities[1])
    #feature 5: angle of orientation between mice, 2pi is facing each other
    # 0 radians is not facing each other (or parallel)
    #orientations = recording.orientation()
    #f4 = orientations[0] + orientations [1]# this works
    #f5 = np.abs(orientations[0] - orientations [1])
    #feature 6-7: angle from nose to forehead to reward port
    angle_to_port = recording.point_angles('nose', 'thorax', reward_port)
    distance_to_port = recording.distances_to_point('nose', reward_port, distance_normalization_factor)
    f6 = angle_to_port[0] + angle_to_port[1]# this works with thorax
    f7 = np.abs(angle_to_port[0] - angle_to_port[1])
    #feature 8-9: distance to reward point for each mosue
    f8 = distance_to_port[0] + distance_to_port[1] # this works
    f9 = np.abs(distance_to_port[0] - distance_to_port[1])
    #add calculated features into a multidimensional array 
    # n columns for each feature, rows are calculations per frame
    if is_first:
        features = np.stack([f1, f2, f3, f6, f7, f8, f9])
    else:
        temp_features =  np.stack([f1, f2, f3, f6, f7, f8, f9])
        features = np.concatenate([features,temp_features], axis = 1)
    is_first = False
recording_labels = np.array(recording_labels)
strain_labels = np.array(strain_labels)
zscored_features = []
# z score each feature 
for i in range(features.shape[0]):
    mean = np.mean(features[i])
    std_dev = np.std(features[i])
    normalized = (features[i]-mean)/std_dev
    zscored_features.append(normalized)
# stack zscored features onto the feature calculations 
features = np.concatenate([features, np.stack(zscored_features)], axis = 0)
# name features 
feature_names = ['thorax distances', 'mouse velocity sum',
                'mouse velocity diff', 
                #'orientation b/w mice sum',
                #'orientation b/w mice diff', 
                'orientation to port sum',
                'orientation to port diff',
                'distance to port sum',
                'distance to port diff']
#name z score features feature name + _zscore
for name in range(len(feature_names)):
    zscore_name = feature_names[name]+'_zscore'
    feature_names.append(zscore_name)
#name the non-feature columns
new_columns = ['Strain',
               'Recording',
               'frame indice',
               'trial_indice',
               'tube_test_elo',
               'urine_marking_elo',
               'home_cage_observation_elo',
               'reward_comp_elo']
# add both lists to create a master list of all column names 
column_names = feature_names + new_columns
# appen them all into an array of features as columns and frames as rows
data = np.column_stack([features.T, 
                        strain_labels[:, None],
                        recording_labels[:, None],
                        frame_indice_labels[:, None],
                        trial_indices[:,None],
                        tube_test_elo_array[:, None],
                        urine_marking_elo_array[:, None],
                        home_cage_observation_elo_array[:, None],
                        reward_comp_elo_array[:,None]])
# turn array into a Dataframe
df = pd.DataFrame(data, columns = column_names)
reduced_frames = len(df) 
#and subsample for every third frame (otherwise my computer crashes)
every_third_index = np.arange(0, reduced_frames, 3)
subsample_df = df.iloc[every_third_index]


## Putting together LFP and video start/stop

In [None]:
START_STOP_FRAME_DF["video_name"].unique()[:5]

In [None]:
LFP_SPECTRAL_DF["video_name"].unique()[:5]

In [None]:
LFP_SPECTRAL_DF["current_subject"].unique()

In [None]:
START_STOP_FRAME_DF["current_subject"].unique()

In [None]:
LFP_AND_SLEAP_DF = pd.merge(LFP_SPECTRAL_DF, START_STOP_FRAME_DF, on=["video_name", "current_subject"], how="inner")

In [None]:
LFP_AND_SLEAP_DF["video_timestamps"].apply(lambda x: x.shape).head()

In [None]:
LFP_AND_SLEAP_DF.head()

- Checking if any of the velocities contain Nans

In [None]:
LFP_AND_SLEAP_DF[LFP_AND_SLEAP_DF["subject_thorax_velocity"].apply(lambda x: np.isnan(x).any())]

## Exporting

In [None]:
LFP_AND_SLEAP_DF.columns

In [None]:
FULL_LFP_TRACES_PKL

In [None]:
LFP_AND_SLEAP_DF.to_pickle(os.path.join(OUTPUT_DIR, FULL_LFP_TRACES_PKL))

In [None]:
LFP_AND_SLEAP_DF.head()

In [None]:
for col in LFP_AND_SLEAP_DF.columns:
    print(col)

In [None]:
raise ValueError()