DLC_JAABA_pipeline Step 1 
Confirm that all identity swaps between M and F are corrected, based on fact the healthy females are bigger than males, namely with longer abdomen lengths

Related to Cell manuscript “Male-Male Interactions Shape Mate Selection in Drosophila” 
by Tom Hindmarsh Sten, Rufei Li, Florian Hollunder, Shade Eleazer, and Vanessa Ruta

Description: 
This script was provided along with other scripts to show how we converted DeepLabCut pose tracking output to JAABA-classified behavioral epochs.

Input: 
.h5 files from DeepLabCut pose tracking 

Output: 
Plots of abdomen lengths of three individuals in the MMF assay

Dependencies: Python packages see below, no required versions

Last updated on 2024-12-09

In [10]:
import pandas as pd
import numpy as np
from scipy import signal
import os
import plotly.io as pio
from plotly.subplots import make_subplots
pio.renderers.default = "plotly_mimetype+notebook"

In [11]:
def getBodypartDistance(dataframe, partname1, partname2):
    # retrieves the pixel distance between two bodyparts

    bpt1 = dataframe.xs(partname1, level='bodyparts', axis=1).to_numpy()
    bpt2 = dataframe.xs(partname2, level='bodyparts', axis=1).to_numpy()

    bptDistance = np.sqrt(np.sum(np.square(bpt1[:, [0, 1]] - bpt2[:, [0, 1]]), axis=1))
    return bptDistance


def removeJumps(dataframe, maxJumpLength):
    # removes large jumps in the x/y position of bodyparts, usually resulting from swaps between animals

    # get all column names
    scorer = dataframe.columns.get_level_values(0)[0]
    bps = list(dataframe.columns.levels[1])
    params = list(dataframe.columns.levels[2])
    dataframeMod = dataframe.copy()

    for i, partName in enumerate(bps):

        xDiff = pd.Series(np.diff(dataframe[scorer][partName]['x']))
        yDiff = pd.Series(np.diff(dataframe[scorer][partName]['y']))

        xJumpsPositive = signal.find_peaks(xDiff.interpolate(), threshold=200)
        xJumpsNegative = signal.find_peaks(xDiff.interpolate() * -1, threshold=200)
        yJumpsPositive = signal.find_peaks(yDiff.interpolate(), threshold=200)
        yJumpsNegative = signal.find_peaks(yDiff.interpolate() * -1, threshold=200)

        toKill = np.zeros((len(yDiff),), dtype=bool)

        for j in range(len(xJumpsPositive[0])):
            if np.any((xJumpsNegative[0] > xJumpsPositive[0][j]) & (
                    xJumpsNegative[0] < xJumpsPositive[0][j] + maxJumpLength)):
                endIdx = np.where((xJumpsNegative[0] > xJumpsPositive[0][j]) & (
                        xJumpsNegative[0] < xJumpsPositive[0][j] + maxJumpLength))
                toKill[xJumpsPositive[0][j]:xJumpsNegative[0][endIdx[0][0]]] = True
            else:
                toKill[xJumpsPositive[0][j]] = True

        for j in range(len(xJumpsNegative[0])):

            if np.any((xJumpsPositive[0] > xJumpsNegative[0][j]) & (
                    xJumpsPositive[0] < xJumpsNegative[0][j] + maxJumpLength)):
                endIdx = np.where((xJumpsPositive[0] > xJumpsNegative[0][j]) & (
                        xJumpsPositive[0] < xJumpsNegative[0][j] + maxJumpLength))
                toKill[xJumpsNegative[0][j]:xJumpsPositive[0][endIdx[0][0]]] = True
            else:
                toKill[xJumpsNegative[0][j]] = True

        for j in range(len(yJumpsPositive[0])):
            if np.any((yJumpsNegative[0] > yJumpsPositive[0][j]) & (
                    yJumpsNegative[0] < yJumpsPositive[0][j] + maxJumpLength)):
                endIdx = np.where((yJumpsNegative[0] > yJumpsPositive[0][j]) & (
                        yJumpsNegative[0] < yJumpsPositive[0][j] + maxJumpLength))
                toKill[yJumpsPositive[0][j]:yJumpsNegative[0][endIdx[0][0]]] = True
            else:
                toKill[yJumpsPositive[0][j]] = True

        for j in range(len(yJumpsNegative[0])):
            if np.any((yJumpsPositive[0] > yJumpsNegative[0][j]) & (
                    yJumpsPositive[0] < yJumpsNegative[0][j] + maxJumpLength)):
                endIdx = np.where((yJumpsPositive[0] > yJumpsNegative[0][j]) & (
                        yJumpsPositive[0] < yJumpsNegative[0][j] + maxJumpLength))
                toKill[yJumpsNegative[0][j]:yJumpsPositive[0][endIdx[0][0]]] = True
            else:
                toKill[yJumpsNegative[0][j]] = True

        toKill = np.insert(toKill, 1, False)

        dataframeMod.loc[toKill, (scorer, partName, params)] = np.nan

    return dataframeMod



In [14]:
# change pathname and filename to the assay you are checking
pathname = '/Users/rufeili/Documents/Test/Example_MMF/assay3'
filename = '030322_Canton-S_age4_m_sh_f_gh_2DLC_dlcrnetms5_MMFFeb15shuffle1_200000_el_IDcorrected.h5'

tracks = pd.read_hdf(os.path.join(pathname, filename))
scorer = tracks.columns.get_level_values(0)[0]
sampleRate = 60  # Hz
maxJump = 6
tstamp = np.linspace(0, len(tracks) * 1 / sampleRate, len(tracks))
nFrames = len(tracks)

ind1Name = 'ind1' # can be M or F
ind2Name = 'ind2' # can be M or F
ind3Name = 'ind3' # can be M or F

# get abdomen length for each individuals
ind1Positions = tracks.xs(ind1Name, level='individuals', axis=1)
ind1Positions = removeJumps(ind1Positions, maxJump)
ind1_abdomenlength = getBodypartDistance(ind1Positions, 'abdomenTop', 'genitalia')

ind2Positions = tracks.xs(ind2Name, level='individuals', axis=1)
ind2Positions = removeJumps(ind2Positions, maxJump)
ind2_abdomenlength = getBodypartDistance(ind2Positions, 'abdomenTop', 'genitalia')

ind3Positions = tracks.xs(ind3Name, level='individuals', axis=1)
ind3Positions = removeJumps(ind3Positions, maxJump)
ind3_abdomenlength = getBodypartDistance(ind3Positions, 'abdomenTop', 'genitalia')

# Plot abdomen length of three individuals
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_scatter(y=ind1_abdomenlength, mode='lines',
                name='ind1 abdomen length (px)', secondary_y=False)

fig.add_scatter(y=ind2_abdomenlength, mode='lines',
                name='ind2 abdomen length (px)', secondary_y=False)

fig.add_scatter(y=ind3_abdomenlength, mode='lines',
                name='ind3 abdomen length (px)', secondary_y=False)

fig.show()
