In [1]:
import h5py
import numpy as np
import math
import glob
import os
import umap
import hdbscan
import seaborn as sns
import pandas as pd
import pickle
import openpyxl
import cv2
from scipy import ndimage as ndi

from functools import reduce
from scipy.interpolate import interp1d
from scipy.signal import savgol_filter
import matplotlib.pyplot as plt
from scipy.stats import linregress
from scipy.optimize import curve_fit
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_openml
from sklearn.decomposition import PCA
from sklearn.metrics import adjusted_rand_score, adjusted_mutual_info_score
import sklearn.cluster as cluster
from moviepy.editor import *
import imageio

In [2]:
def get_running_total(lst: list) -> list:
    """
    Compute the running total for a list of numbers.
    
    Given a list of numbers, this function returns a new list 
    where each element is the cumulative sum of the elements 
    up to that point in the original list.

    Parameters:
    - lst (list): A list of numbers.

    Returns:
    - list: A list containing the running total.

    Example:
    >>> get_running_total([1, 2, 3, 4])
    [1, 3, 6, 10]
    """

    def accumulator(acc, x):
        if acc:
            return acc + [acc[-1] + x]
        else:
            return [x]
        
    return reduce(accumulator, lst, [])

def make_dict(my_list):
    my_dict = {}
    for i in range(len(my_list)):
        my_dict[my_list[i]] = i
    return my_dict

def fill_missing(Y, kind="linear"):
    """Fills missing values independently along each dimension after the first."""

    # Store initial shape.
    initial_shape = Y.shape

    # Flatten after first dim.
    Y = Y.reshape((initial_shape[0], -1))

    # Interpolate along each slice.
    for i in range(Y.shape[-1]):
        y = Y[:, i]

        # Build interpolant.
        x = np.flatnonzero(~np.isnan(y))
        f = interp1d(x, y[x], kind=kind, fill_value=np.nan, bounds_error=False)

        # Fill missing
        xq = np.flatnonzero(np.isnan(y))
        y[xq] = f(xq)
        
        # Fill leading or trailing NaNs with the nearest non-NaN values
        mask = np.isnan(y)
        y[mask] = np.interp(np.flatnonzero(mask), np.flatnonzero(~mask), y[~mask])

        # Save slice
        Y[:, i] = y

    # Restore to initial shape.
    Y = Y.reshape(initial_shape)

    return Y

def smooth_diff(node_loc, deriv, win=25, poly=3):
    """
    node_loc is a [frames, 2] array
    
    win defines the window to smooth over
    
    poly defines the order of the polynomial
    to fit with
    
    """
    node_vel = np.zeros_like(node_loc)
    for c in range(node_loc.shape[-1]):
        node_vel[:, c] = savgol_filter(node_loc[:, c], win, poly, deriv)
    if deriv != 0:
        node_vel = np.linalg.norm(node_vel,axis=1)
    return node_vel

def mmss_to_ms(mmss):
    try:
        minutes, seconds = map(int, mmss.split(':'))
        total_seconds = minutes * 60 + seconds
        milliseconds = int(total_seconds * 1000)
        return milliseconds
    except ValueError:
        return None
    
def mmss_to_frames(mmss, fps = 30):
    try:
        minutes, seconds = map(int, mmss.split(':'))
        total_seconds = minutes * 60 + seconds
        frame_no = int(total_seconds * 30)
        return frame_no
    except ValueError:
        return None
class sleap_vid():

    def __init__(self, filename, track_no):
        self.name = filename
        self.features = {}
        self.mice = track_no
        self.get_info(filename)

    def get_info(self, filename):
        with h5py.File(filename, "r") as f:
            tracks= [n.decode() for n in f["track_names"][:]]
            locations = f["tracks"][:].T
            locations = locations[:,:,:,:self.mice]
            self.locations = fill_missing(locations)
            nodes = [n.decode() for n in f["node_names"][:]]
            self.tracks = tracks
            self.nodes = nodes
            self.track_dict = make_dict(tracks)
            self.node_dict = make_dict(nodes)

    def smooth_locations(self, win=25, poly=3):
        for node in self.node_dict.values():
            for track in range(self.mice):
                nodeloc = self.locations[:,node,:,track]
                smoothed_node = smooth_diff(nodeloc, deriv=0, win=win, poly=poly)
                self.locations[:,node,:,track] = smoothed_node
        

    def node_velocity(self, node, win=25, poly=3, normalization_factor=None, track_order = None):
        """
        takes in node and returns the velocity of that node 
        for each mouse

        Args: 
            node: string, name of node
        
        Returns:
            velocities: 2d np.array of floats (d = track_no x # of frames)
                where each element is the velocity for that node
                distances[i] = velocities for mouse i
        """
        velocities = []
        if track_order is not None:
            order = []
            for i in track_order:
                order.append(self.track_dict[i])
        else:
            order = range(self.mice)
        print(order)
        for i in order:
            print(i)
            node_loc = self.locations[:, self.node_dict[node], :, i]
            if normalization_factor != None:
                node_loc = node_loc * normalization_factor
            vel = smooth_diff(node_loc, deriv = 1, win=win, poly=poly)
            velocities.append(vel)
        velocities = np.array(velocities)
        return velocities 
   
    def distances_between_mice(self, node, normalization_factor=None):
        """
        takes in node name
        returns a list of distances between the nodes of the two mice

        Args:
            node: string, name of node
            mice: list of ints, default = [0,1], track numbers for mice 
            normalization_factor
        Returns:
            c_list: 1D np.array of floats (d = # of frames) between mice
        """
        locations = self.locations 
        node_index = self.node_dict[node]
        x1 = locations[:,node_index,0,0]
        y1 = locations[:,node_index,1,0]
        # x , y coordinate of nose for mouse 1
        x2 = locations[:,node_index,0,1]
        y2 =  locations[:,node_index,1,1]
        # x and y coordinate of nose of mouse 2
        # solve for c using pythagroean theory
        distances = np.sqrt(((x1 - x2)**2) + ((y1 - y2)**2))
        if normalization_factor != None: 
            distances = distances * normalization_factor
            distances = distances.T.flatten()
        return distances
    
    def distances_between_nodes(self, node1, node2, normalization_factor = None):
        """
        takes in two nodes and returns the distances between those nodes 
        for each mouse

        Args: 
            node1: string, name of node 1
            node2: string, name of node 2 
        
        Returns:
            distances: 2d np.array of floats (d = 2 x #of frames)
                where each element is the distance between node1 and node2 
                distances[0] = distances for mouse1
                distances[1] = distances for mouse2
        """
        locations = self.locations
        node_index1 = self.node_dict[node1]
        node_index2 = self.node_dict[node2]
        x1,y1 = locations[:, node_index1,0,0], locations[:,node_index1,1,0]
        # x , y coordinate of node 1 for mouse 1
        x2,y2 = locations[:,node_index2,0,0], locations[:,node_index2,1,0]
        # x, y coordiantes of node 2 for mouse 1
        x3, y3 = locations[:,node_index1,0,1], locations[:,node_index1,1,1]
        # x and y coordinate of node 1 of mouse 2
        x4, y4 = locations[:,node_index2,0,1], locations[:,node_index2,1,1]
        # solve for c using pythagroean theory
        c2 = np.sqrt(((x3 -x4)**2)+ ((y3 - y4)**2))
        c1 = np.sqrt(((x1 - x2)**2) + ((y1 - y2)**2))
        if normalization_factor != None:
            c2 = (c2*normalization_factor).T.flatten()
            c1 = (c1*normalization_factor).T.flatten()
        distances = np.array[c1, c2]
        return distances
    
    def distances_to_point(self, node, point, track_order = None, normalization_factor = None):
        # CONFIRMED THAT IT WORKS in terms of doing the math by hand
        """
        takes in two nodes and returns the distances between those nodes 
        for each mouse

        Args: 
            node1: string, name of node 1
            point: tuple or list of two floats,
                x and y coordinate of desired point

        Returns:
            distances: 2d np.array of floats (d = no of tracks x #of frames)
                where each element is the distance between node and point 
                distances[i] = distances for mouse i
        """
        locations = self.locations
        node_index = self.node_dict[node]
        px, py = point[0], point[1]
        distances = []
        if track_order is not None:
            order = []
            for i in track_order:
                order.append(self.track_dict[i])
        else:
            order = range(self.mice)
        for i in order:
            x,y = locations[:, node_index,0,i], locations[:,node_index,1,i]
            # x , y coordinate of node 1 for mouse i
            px = np.full_like(x, fill_value=px)
            py = np.full_like(y, fill_value=py)
            # solve for c using pythagroean theory
            c = np.sqrt(((x - px)**2) + (( - py)**2))
            if normalization_factor != None:
                c = (c*normalization_factor).T.flatten()
            distances.append(c)
        distances = np.array(distances)
        return distances


    def node_angles(self, node1, node2, node3, track_order = None):
        """
        takes in locations and three nodes, calculates angle between the three points 
        with the second node being the center point
        
        Args:  
            node1: string, name of node 1
            node2: string, name of node 2 
            node3: string, name of node 3

        Returns:
            ang: 2d np. array (d = 2 x # of frames)
                where each element is the angle between 
                node1 and node3 with node2 as center point 
                anles_all_mice[0] = angles for mouse1
                anles_all_mice[1] = angles for mouse2
        """
        locations = self.locations  
        node_index_1 = self.node_dict[node1]
        node_index_2 = self.node_dict[node2]
        node_index_3 = self.node_dict[node3]
        if track_order is not None:
            order = []
            for i in track_order:
                order.append(self.track_dict[i])
        else:
            order = list(range(self.mice))
        ax = locations[:,node_index_1, 0, :]
        ay = locations[:,node_index_1, 1, :]
        bx = locations[:,node_index_2, 0, :]
        by = locations[:,node_index_2, 1, :]
        cx = locations[:,node_index_3,0,:]
        cy = locations[:,node_index_3, 1, :]
        ang = np.arctan2(cy-by, cx-bx) - np.arctan2(ay-by, ax-bx) 
        ang_swapped = np.arctan2(cy-by, cx-bx) - np.arctan2(cy-by, cx-bx) 
        ang = np.maximum(ang, ang_swapped)
        # re order this 
        angles = ang.T
        return angles[order]
    
    def point_angles(self, node1, node2, point, track_order):
        """
        takes in locations and three nodes, calculates angle between the three points 
        with the second node being the center point
        
        Args:  
            node1: string, name of node 1
            node2: string, name of node 2 
            point: tuple or list of two floats,
                x and y coordinate of desired point

        Returns:
            ang: 2d np. array (d = 2 x # of frames)
                where each element is the angle between 
                node1 and point with node2 as center point 
                anles_all_mice[0] = angles for mouse1
                anles_all_mice[1] = angles for mouse2
        """
        if track_order is not None:
            order = []
            for i in track_order:
                order.append(self.track_dict[i])
        else:
            order = list(range(self.mice))
        locations = self.locations  
        node_index_1 = self.node_dict[node1]
        node_index_2 = self.node_dict[node2]
        px, py = point[0], point[1]
        ax = locations[:,node_index_1, 0, :]
        ay = locations[:,node_index_1, 1, :]
        bx = locations[:,node_index_2, 0, :]
        by = locations[:,node_index_2, 1, :]
        px = np.full_like(ax, fill_value=px)
        py = np.full_like(ay, fill_value=py)
        ang = np.arctan2(py-by, px-bx) - np.arctan2(ay-by, ax-bx) 
        ang_swapped = np.arctan2(ay-by, ax-bx) - np.arctan2(py-by, px-bx) 
        ang = np.maximum(ang, ang_swapped)
        ang = ang.T
        return ang[order]

    def orientation(self, track_order = None):
        """
        Takes in locations and nose and thorax node index to calculate the angle of orientation
        between mice where two mice facing each other results in pi
        theta = 0 means they are not facing each other 
        
        """
        locations = self.locations
        nose_node = self.node_dict['nose']
        thorax_node = self.node_dict['thorax']
        if track_order is not None:
            order = []
            for i in track_order:
                order.append(self.track_dict[i])
        else:
            order = list(range(self.mice))
        ax = locations[:, nose_node, 0, order[0]]
        ay = locations[:, nose_node, 1, order[0]]
        bx = locations[:,thorax_node, 0, order[0]]
        by = locations[:,thorax_node, 1, order[0]]
        cx = locations[:, nose_node, 0, order[1]]
        cy = locations[:,nose_node, 1, order[1]]
        ang_m1 = np.arctan2(cy-by, cx-bx) - np.arctan2(ay-by, ax-bx) 
        ang_m1_swapped = np.arctan2(ay-by, ax-bx) - np.arctan2(cy-by, cx-bx)
        ax = locations[:, nose_node, 0, order[1]]
        ay = locations[:, nose_node, 1, order[1]]
        bx = locations[:,thorax_node, 0, order[1]]
        by = locations[:,thorax_node, 1, order[1]]
        cx = locations[:, nose_node, 0, order[0]]
        cy = locations[:,nose_node, 1, order[0]]
        ang_m2 = np.arctan2(cy-by, cx-bx) - np.arctan2(ay-by, ax-bx) 
        ang_m2_swapped = np.arctan2(ay-by, ax-bx) - np.arctan2(cy-by, cx-bx) 
        ang_m1 = np.maximum(ang_m1,ang_m1_swapped)
        ang_m2 = np.maximum(ang_m2,ang_m2_swapped)
        return np.array([ang_m1, ang_m2])
    
    def create_events_array(thresh, feature, greater_than = False, frame_rate = 30):
        """
        This function will create a 2D array of start and stop times (ms) for which 
        a given feature passes a given threshold. The output is an array of two
        element arrays, where the first value is the start time and the second 
        value is the stop time. 

        Args (4 total, 2 required):
            thresh: float, threshold value to filter frames
            feature: 1D numpy array, values of some feature for each frame
            greater_than: Bool, default = False, True if finding frames where feature >= thresh, False for <=
            frame-rate: int, default = 30 fps, frame rate in frames per second

        Return (1):
            output: 2D numpy array, Pairs of start and stop frames for which the value of the feature agrees with your filter    
        """

        #{feature_name: [x,y]} x -> greater than y -> less than 
        if greater_than:
            ind1 = feature.index[feature >= thresh].tolist()
        else:
            ind1 = feature.index[feature <= thresh].tolist()
        start = []
        start.append(ind1[0])

        end = []

        for i in range(len(ind1)-1):
            if (ind1[i]-ind1[i-1])>3:
                start.append(ind1[i])
            
            if (ind1[i+1]-ind1[i])>3:
                end.append(ind1[i])

        end.append(ind1[-1])

        output = np.column_stack((start/frame_rate*1000, end/frame_rate*1000))
        return output
    

#TODO:
figure out how you are going to handle dead time or ITI's from the master excel 


In [3]:

p2_master_excel = pd.read_excel("ephys_socialmem_vidinfo.xlsx", sheet_name = "Phase 2")
p2_master_excel = p2_master_excel.dropna(subset=['Name of h5 file '])
p2_master_excel

Unnamed: 0,Name of Recording,Name of h5 file,Familiarization Trial,Mouse taken out,ITI,Social Type 1,Mouse put in,Social 1,Social Type 2,Mouse taken out/put in,Social 2,Social Type 3,Mouse taken out/put in.1
0,20230817_100823_1.1,20230817 100823 1.1,0:00-10: 30,10:30-10:33,10:33-20:26,Cagemate,20:26-20:28,20:28-25:26,Novel,25:26-25:31,25:31-30:28,Familiar,30:28-30:32
2,20230803 101331 1.2,20230803_101331_1.2,,,0:00-6:14,Familiar,6:14-6:17,6:17-11:11,Cagemate,11:11-11:17,11:17-16:10,Novel,16:10-16:16
3,20230803 121318 1.1,20230803_121318_1.1,0:05-9:59,9:59-10:04,10:04-17:01,,,,,,,,
5,20230804_141009_1.2_2t2bL_NFC,20230804 141009 1.1,0:08-10:00,10:00-10:06,10:06-19:59,Novel,20:00-20:02,20:02-25:00,Familiar,25:00-25:08,25:08-30:00,Cagemate,30:00-30:06
7,20230818 115728 1.1,20230818 115728 1.1,0:00-1:51,,,,,,,,,,
8,20230818 115728 1.2,20230818 115728 1.2,0:00-8:47,8:47-8:53,8:53-19:07,Novel,19:07-19:11,19:11-24:06,Familiar,24:06-24:14,24:14-29:14,Cagemate,29:14-29:22
9,20230803_141047_1.2_2t2bL_CNF,20230803 141047 1.1,0:00-10:00,10:00-10:05,10:05-20:00,Cagemate,20:00-20:03,20:03-25:00,Novel,25:00-25:06,25:06-30:00,Familiar,30:00-30:06
10,20230804_121600_1.4_3t3bL_FCN,20230804 121600,0:00-10:00,10:00-10:07,10:07-20:03,Familiar,20:03-20:06,20:06-24:59,Cagemate,24:59-25:05,25:05-29:59,Novel,29:59-30:07
11,20230818_133620_1.4_3t3bL_CNF,20230818 133620 1.1,0:04-9:59,9:59-10:05,10:05-19:58,Cagemate,19:58-20:02,20:02-24:57,Novel,24:57-25:03,25:03-30:00,Familiar,30:00-30:06


In [4]:
p2_master_excel = pd.read_excel("ephys_socialmem_vidinfo.xlsx", sheet_name = "Phase 2")
p2_master_excel = p2_master_excel.dropna(subset=['Name of h5 file '])

columns = ['Mouse taken out', 
            'ITI', 
           'Mouse put in', 
           'Social 1',
           'Mouse taken out/put in', 
           'Social 2',
           'Mouse taken out/put in.1']
def try_split(x):
    try:
       return(x.split("-")[1])
    except IndexError:
        return 'NaN'
times = []
for column_name in columns:
    p2_master_excel[column_name] = p2_master_excel[column_name].astype(str)
    start = p2_master_excel[column_name].apply(lambda x: x.split("-")[0])
    end = p2_master_excel[column_name].apply(lambda x: try_split(x))
    start = start.apply(lambda x: mmss_to_frames(x))
    end = end.apply(lambda x: mmss_to_frames(x))
    new_column = np.column_stack((start, end))
    times.append(new_column.tolist())
    p2_master_excel[column_name + ' ms'] = new_column.tolist()
print(len(times))
print(len(times[0]))
reshaped = np.array(times)
reshaped_2 = np.transpose(reshaped, (1, 0, 2))
print(reshaped_2[0])
p2_master_excel['combinedtimes'] = reshaped_2.tolist()
p2_master_excel = p2_master_excel[['Name of Recording',
 'Name of h5 file ',
 'Social Type 1',
 'Social Type 2',
 'Social Type 3',
 'combinedtimes',
 'Mouse taken out ms',
 'ITI ms',
 'Social 1 ms',
 'Mouse put in ms',
 'Social 2 ms',
 'Mouse taken out/put in ms',
 'Mouse taken out/put in.1 ms']]

p2_master_excel


7
9
[[18900. 18990.]
 [18990. 36780.]
 [36780. 36840.]
 [36840. 45780.]
 [45780. 45930.]
 [45930. 54840.]
 [54840. 54960.]]


Unnamed: 0,Name of Recording,Name of h5 file,Social Type 1,Social Type 2,Social Type 3,combinedtimes,Mouse taken out ms,ITI ms,Social 1 ms,Mouse put in ms,Social 2 ms,Mouse taken out/put in ms,Mouse taken out/put in.1 ms
0,20230817_100823_1.1,20230817 100823 1.1,Cagemate,Novel,Familiar,"[[18900.0, 18990.0], [18990.0, 36780.0], [3678...","[18900.0, 18990.0]","[18990.0, 36780.0]","[36840.0, 45780.0]","[36780.0, 36840.0]","[45930.0, 54840.0]","[45780.0, 45930.0]","[54840.0, 54960.0]"
2,20230803 101331 1.2,20230803_101331_1.2,Familiar,Cagemate,Novel,"[[nan, nan], [0.0, 11220.0], [11220.0, 11310.0...","[nan, nan]","[0.0, 11220.0]","[11310.0, 20130.0]","[11220.0, 11310.0]","[20310.0, 29100.0]","[20130.0, 20310.0]","[29100.0, 29280.0]"
3,20230803 121318 1.1,20230803_121318_1.1,,,,"[[17970.0, 18120.0], [18120.0, 30630.0], [nan,...","[17970.0, 18120.0]","[18120.0, 30630.0]","[nan, nan]","[nan, nan]","[nan, nan]","[nan, nan]","[nan, nan]"
5,20230804_141009_1.2_2t2bL_NFC,20230804 141009 1.1,Novel,Familiar,Cagemate,"[[18000.0, 18180.0], [18180.0, 35970.0], [3600...","[18000.0, 18180.0]","[18180.0, 35970.0]","[36060.0, 45000.0]","[36000.0, 36060.0]","[45240.0, 54000.0]","[45000.0, 45240.0]","[54000.0, 54180.0]"
7,20230818 115728 1.1,20230818 115728 1.1,,,,"[[nan, nan], [nan, nan], [nan, nan], [nan, nan...","[nan, nan]","[nan, nan]","[nan, nan]","[nan, nan]","[nan, nan]","[nan, nan]","[nan, nan]"
8,20230818 115728 1.2,20230818 115728 1.2,Novel,Familiar,Cagemate,"[[15810.0, 15990.0], [15990.0, 34410.0], [3441...","[15810.0, 15990.0]","[15990.0, 34410.0]","[34530.0, 43380.0]","[34410.0, 34530.0]","[43620.0, 52620.0]","[43380.0, 43620.0]","[52620.0, 52860.0]"
9,20230803_141047_1.2_2t2bL_CNF,20230803 141047 1.1,Cagemate,Novel,Familiar,"[[18000.0, 18150.0], [18150.0, 36000.0], [3600...","[18000.0, 18150.0]","[18150.0, 36000.0]","[36090.0, 45000.0]","[36000.0, 36090.0]","[45180.0, 54000.0]","[45000.0, 45180.0]","[54000.0, 54180.0]"
10,20230804_121600_1.4_3t3bL_FCN,20230804 121600,Familiar,Cagemate,Novel,"[[18000.0, 18210.0], [18210.0, 36090.0], [3609...","[18000.0, 18210.0]","[18210.0, 36090.0]","[36180.0, 44970.0]","[36090.0, 36180.0]","[45150.0, 53970.0]","[44970.0, 45150.0]","[53970.0, 54210.0]"
11,20230818_133620_1.4_3t3bL_CNF,20230818 133620 1.1,Cagemate,Novel,Familiar,"[[17970.0, 18150.0], [18150.0, 35940.0], [3594...","[17970.0, 18150.0]","[18150.0, 35940.0]","[36060.0, 44910.0]","[35940.0, 36060.0]","[45090.0, 54000.0]","[44910.0, 45090.0]","[54000.0, 54180.0]"


#TODO handle subject vs social agent typing 

## Uploading h5 files 

In [5]:
path = r'D:\social_ephys_pilot2_cum\proc\ID_corrected_phase2'
ending = "*.h5"
search_path = os.path.join(path, ending)
info ={}
total_frames = 0
for filename in glob.glob(search_path):
    file = os.path.basename(filename)
    file = file[:file.index('.analysis')]
    info[file] = sleap_vid(filename, track_no=2)
    total_frames = info[file].locations.shape[0] + total_frames
    info[file].smooth_locations()
    print(file)
    info[file].social1 = p2_master_excel[
        p2_master_excel['Name of h5 file '] == file]['Social Type 1'].values[0]
    info[file].social2 = p2_master_excel[
        p2_master_excel['Name of h5 file '] == file]['Social Type 2'].values[0]
    info[file].social3 = p2_master_excel[
        p2_master_excel['Name of h5 file '] == file]['Social Type 3'].values[0]
    info[file].markers = p2_master_excel[
        p2_master_excel['Name of h5 file '] == file]['combinedtimes'].values[0]
    print(info[file].tracks)
print(info.keys())

20230803 141047 1.1
['subject', 'social_agent']
20230803_101331_1.2
['subject', 'social_agent']
20230803_121318_1.1
['subject', 'social_agent']


20230818 115728 1.1
['social_agent', 'subject']
20230818 115728 1.2
['social_agent', 'subject']
20230818 133620 1.1
['social_agent', 'subject']
dict_keys(['20230803 141047 1.1', '20230803_101331_1.2', '20230803_121318_1.1', '20230818 115728 1.1', '20230818 115728 1.2', '20230818 133620 1.1'])


## Feature Extraction


### Feature List

1. orientation of subject to social agent
2. orientation of social agent to subject
3. subject velocity
4. social agent velocity
5. distance between subject and social agent (thorax) 

In [6]:
# markers
# 0 - 5 ignore 
# 6-7 = social 1
# 8-9 = ignore
# 10-11 = social 2
# 12-13 = ignore
# rest social 3
is_first = True
recording_labels = []
frame_indice_labels = []
for file, recording in info.items():
    recording_labels += [file] * recording.locations.shape[0]
    frame_indice_labels = np.concatenate([frame_indice_labels,np.arange(0, recording.locations.shape[0])], axis = 0)
    frame_label = list(frame_indice_labels)
    try:
        stop = int(recording.markers[2][0]-1)
        frame_label[:stop] = ['acquisition'] * (stop)
    except ValueError:
        pass
    try:
        start = int(recording.markers[0][0])
        stop = int(recording.markers[2][1])
        frame_label[start:stop] = ['ignore'] * (stop - start)
    except ValueError:
        pass
    try:
        start = int(recording.markers[4][0])
        stop = int(recording.markers[4][1])
        frame_label[start:stop] = ['ignore'] * (stop -start)
    except ValueError:
        pass
    try:
        start = int(recording.markers[6][0])
        stop = int(recording.markers[6][1])
        frame_label[start:stop] = ['ignore'] * (stop -start)
    except ValueError:
        pass
    try:
        start = int(recording.markers[3][0])
        stop = int(recording.markers[3][1])
        frame_label[start:stop] = [recording.social1] * (stop -start)
    except ValueError:
        pass
    try:
        start = int(recording.markers[5][0])
        stop = int(recording.markers[5][1])
        frame_label[start:stop] = [recording.social2] * (stop -start)
    except ValueError:
        pass
    try:
        start = int(recording.markers[6][1] + 1)
        stop = len(frame_indice_labels)
        frame_label[start:] = [recording.social3] * (stop -start)
    except ValueError:
        pass
    orientations = recording.orientation(track_order = ['subject','social_agent'])
    f1 = orientations[0]
    f2 = orientations [1]
    print('hello')
    velocities = recording.node_velocity('thorax', track_order = ['subject','social_agent'])
    f3 = velocities[0] 
    f4 = velocities[1]
    f5 = recording.distances_between_mice('thorax')
    if is_first:
        features = np.stack([f1, f2, f3, f4, f5])
    else:
        temp_features =  np.stack([f1, f2, f3, f4, f5])
        features = np.concatenate([features,temp_features], axis = 1)
    is_first = False
recording_labels = np.array(recording_labels)
frame_labels = np.array(frame_label)
frame_indice_labels = np.array(frame_indice_labels)
zscored_features = []
for i in range(features.shape[0]):
    mean = np.mean(features[i])
    std_dev = np.std(features[i])
    normalized = (features[i]-mean)/std_dev
    zscored_features.append(normalized)
features = np.concatenate([features, np.stack(zscored_features)], axis = 0)
feature_names = ['sub to SA orientation',
                 'SA  to sub orientation',
                 'subject velocity',
                 'SA velocity', 
                 'distance between thoraxes']
for name in range(len(feature_names)):
    zscore_name = feature_names[name]+'_zscore'
    feature_names.append(zscore_name)
new_columns = ['Recording', 'frame indice', 'frame label']
column_names = feature_names + new_columns
data = np.column_stack([features.T, 
                        recording_labels[:, None],
                        frame_indice_labels[:, None],
                        frame_labels[:, None]])
df = pd.DataFrame(data, columns = column_names)
reduced_frames = len(df) 
every_third_index = np.arange(0, reduced_frames, 3)
subsample_df = df.iloc[every_third_index]

hello
[0, 1]


hello
[0, 1]
hello
[0, 1]
hello
[1, 0]
hello
[1, 0]
hello
[1, 0]


In [7]:
list(range(2))

[0, 1]

In [8]:
subsample_df

Unnamed: 0,sub to SA orientation,SA to sub orientation,subject velocity,SA velocity,distance between thoraxes,sub to SA orientation_zscore,SA to sub orientation_zscore,subject velocity_zscore,SA velocity_zscore,distance between thoraxes_zscore,Recording,frame indice,frame label
0,1.8882366260963088,0.8187823798791216,3.945460525833626,2.9356640814443745e-13,304.7037554798004,0.02380815007022507,-0.6173114872728572,1.5697659649205256,-0.6043072429175396,1.6630645047151913,20230803 141047 1.1,0.0,acquisition
3,1.902844949329502,0.7845643289149815,5.139088125575896,9.353505050993743e-14,312.2648921771925,0.03332164078559875,-0.6391308718274334,2.2576866265877396,-0.6043072429176506,1.739515346988425,20230803 141047 1.1,3.0,acquisition
6,1.939786423715329,0.7441784674792962,5.974139667536427,5.751368354282084e-14,321.6590806865943,0.057379321326458246,-0.664883204774641,2.7389499698580795,-0.6043072429176706,1.834500221982817,20230803 141047 1.1,6.0,acquisition
9,1.9846968524562936,0.7015141606224136,6.4388634776621325,1.3896078858708857e-13,332.6049329799632,0.08662668531411985,-0.692088404715466,3.0067831793307933,-0.6043072429176254,1.9451740083085012,20230803 141047 1.1,9.0,acquisition
12,2.0240887042230002,0.6596440616845278,6.529326795687585,1.4971007678225683e-13,344.6377207004057,0.11228014396654191,-0.71878717230516,3.058919696286432,-0.6043072429176194,2.0668378259247246,20230803 141047 1.1,12.0,acquisition
...,...,...,...,...,...,...,...,...,...,...,...,...,...
260646,2.680220375351813,0.39846203788788515,2.1609482174213253,0.241310650536071,189.61627017914333,0.5395778195247586,-0.8853317549271719,0.541302071976576,-0.47041680668683095,0.4994120621693083,20230818 133620 1.1,62996.0,Familiar
260649,2.656705954634155,0.8789878498431072,2.513605092513178,0.5439395196012053,183.15645213924506,0.5242643424333887,-0.578921039412169,0.7445480013661628,-0.30250415465438635,0.4340966803891604,20230818 133620 1.1,62999.0,Familiar
260652,2.741594511946337,1.2734117638837499,2.6887806828645098,0.9556570852255327,176.93568955587156,0.5795469706268763,-0.32741381397162733,0.8455065482768183,-0.07406399114360257,0.37119839430940293,20230818 133620 1.1,63002.0,Familiar
260655,2.966592811740249,1.6006959237382608,2.8413686418100546,1.4641919610805927,172.37930667198694,0.7260743442271896,-0.11871873224500365,0.9334472172953076,0.20809494471599932,0.3251286930021286,20230818 133620 1.1,63005.0,Familiar


# Phase 3

#TODO 
edit functions to handle one mouse - i think this is done 

## uploading h5 files

In [9]:
path = r""
ending = "*.h5"
search_path = os.path.join(path, ending)
info ={}
total_frames = 0
for filename in glob.glob(search_path):
    file = os.path.basename(filename)
    file = file[:file.index('.')]
    info[file] = sleap_vid(filename, track_no=2)
    total_frames = info[file].locations.shape[0] + total_frames
    info[file].smooth_locations()

## feature extraction

### feature list

1. subject orientation to cup 1
2. subject distance to cup 1
3. subject orientation to cup 2 
4. subject distance to cup 2
5. subject orientation to cup 3
6. subject distance to cup 3
7. subject orientation to cup 4
8. subject distance to cup 4
9. subject velocity

In [10]:
is_first = True
recording_labels = []
frame_indice_labels = []
for file, recording in info.items():
    recording_labels += [file] * recording.locations.shape[0]
    frame_indice_labels = np.concatenate([frame_indice_labels,np.arange(0, recording.locations.shape[0])], axis = 0)
    f1 = recording.point_angles('nose', 'thorax', cup1)
    f2 = recording.distances_to_point('nose', cup1)
    f3 = recording.point_angles('nose', 'thorax', cup2)
    f4 = recording.distances_to_point('nose', cup2)
    f5 = recording.point_angles('nose', 'thorax', cup3)
    f6 = recording.distances_to_point('nose', cup3)
    f7 = recording.point_angles('nose', 'thorax', cup4)
    f8 = recording.distances_to_point('nose', cup4)
    f9 = recording.node_velocity('thorax')
    if is_first:
        features = np.stack([f1, f2, f3, f4, f5])
    else:
        temp_features =  np.stack([f1, f2, f3, f4, f5])
        features = np.concatenate([features,temp_features], axis = 1)
    is_first = False
recording_labels = np.array(recording_labels)
zscored_features = []
for i in range(features.shape[0]):
    mean = np.mean(features[i])
    std_dev = np.std(features[i])
    normalized = (features[i]-mean)/std_dev
    zscored_features.append(normalized)
features = np.concatenate([features, np.stack(zscored_features)], axis = 0)
feature_names = ['sub to SA orientation',
                 'SA  to sub orientation',
                 'subject velocity',
                 'SA velocity', 
                 'distance between thoraxes']
for name in range(len(feature_names)):
    zscore_name = feature_names[name]+'_zscore'
    feature_names.append(zscore_name)
new_columns = ['Recording', 'frame indice']
column_names = feature_names + new_columns
data = np.column_stack([features.T, 
                        recording_labels[:, None],
                        frame_indice_labels[:, None]])
df = pd.DataFrame(data, columns = column_names)
reduced_frames = len(df) 
every_third_index = np.arange(0, reduced_frames, 3)
subsample_df = df.iloc[every_third_index]

TypeError: list indices must be integers or slices, not tuple