In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

from scipy.stats import ttest_ind

from shared.helper_functions import *
from settings_general import subject_id_batch_cage_dict

Get the path to the folder that holds the behaviour data exported from BORIS software.

In [2]:
behaviour_data_dir = select_folder("Please select the sociability behaviour data directory")

If you want to save plots, select a directory you want to save them to.

In [3]:
plot_output_dir = select_or_create_folder("Please select or create a folder you wish to save the plots")

## Load the animal metadata

First, let's load the metadata that tells us which batch/cage combination is which animal (id).

In [4]:
all_animals_metadata = select_file("Please select the file holding metadata for all animals")
metadata = pd.read_excel(all_animals_metadata)
metadata

## Data pre-processing

The data that is exported from the BORIS tracking application provides two rows per state-event. One represents the start of the event, and one the stop, each having a single timestamp.

We want to merge these rows and immediately calculate the duration of the (non)social cup state-event interaction. Let's do this first.

To do so, we define a function that merges some information from the START and the STOP rows.

In [5]:
def merge_event_rows(beh_data):
    merged_df = pd.concat([
        beh_data.iloc[::2].reset_index(drop=True),  # only keep each start row
        beh_data.iloc[::2].reset_index(drop=True)['Image index'].rename('Frame start'),  # interaction start frame, name it 'Frame start'
        beh_data.iloc[1::2].reset_index(drop=True)['Image index'].rename('Frame stop'),  # interaction stop frame, name it 'Frame stop'
        beh_data.iloc[1::2].reset_index(drop=True)['Time'] - beh_data.iloc[::2]['Time'].reset_index(drop=True),  # interaction duration
    ], axis=1)
    # rename the last column as it represents the duration of the interaction
    merged_df = merged_df.set_axis([*merged_df.columns[:-1], 'Interaction duration'], axis=1)
    # drop the columns we don't need
    cols_to_drop = ['Image index', 'Time', 'Observation type', 'Source', 'Time offset (s)', 'Subject', 'Comment', 'Image file path', 'Description', 'Behavioral category', 'Behavior type']
    return merged_df.drop(columns=cols_to_drop)

Generate a df holding a single event line per event

In [43]:
beh_df = pd.DataFrame()

# for each behaviour file (which holds behaviour data on one animal only)
for file in os.listdir(behaviour_data_dir):
    if not file.endswith('.xlsx') and 'batch' not in file:
        continue
        
    beh_dat = pd.read_excel(os.path.join(behaviour_data_dir, file))
    
    # get the subject id
    batch_cage = file.split('.')[0]
    subject_id = [x for x in subject_id_batch_cage_dict.keys() if subject_id_batch_cage_dict[x] == batch_cage][0]
    genotype = metadata[metadata.mouseId == subject_id]['genotype'].iloc[0]

    for event_type in beh_dat['Behavior'].unique():
        beh_dat_event = beh_dat[beh_dat['Behavior'] == event_type]
        starts = beh_dat_event[beh_dat_event['Behavior type'] == 'START']
        stops = beh_dat_event[beh_dat_event['Behavior type'] == 'STOP']
        
        if len(stops) < len(starts):
            print(f'({batch_cage}, {subject_id}) Number of STOPs is smaller than number of STARTs for {event_type}')
            if beh_dat_event.iloc[-1]['Behavior type'] == 'START':
                print('Removing last row because it is of type START')
                beh_dat_event = beh_dat_event.drop(beh_dat_event.index[-1])
        if len(starts) < len(stops):
            print(f'({batch_cage}, {subject_id}) Number of STARTs is smaller than number of STOPs for {event_type}')
            if beh_dat_event.iloc[0]['Behavior type'] == 'STOP':
                print('Removing first row because it is of type STOP')
                beh_dat_event = beh_dat_event.drop(beh_dat_event.index[0])

        beh_dat_event = merge_event_rows(beh_dat_event)
        # merge the start and stop rows and calculate some stuff (interaction duration etc)
        beh_dat_event.insert(1, 'subject_id', subject_id)
        beh_dat_event.insert(2, 'genotype', genotype)
        beh_df = pd.concat([beh_df, beh_dat_event], axis=0)

Let's inspect the dataframe

In [44]:
beh_df

In [45]:
print(f'Subjects/animals present (n={len(np.unique(beh_df["subject_id"]))}):\n{np.unique(beh_df["subject_id"])} ')

## Sociability metric calculations

In [46]:
behaviour_stats = pd.DataFrame()

# for each behaviour file (which holds behaviour data on one animal only)
for subject_id in np.unique(beh_df["subject_id"]):
    
    subject_data = beh_df[beh_df["subject_id"] == subject_id]
    genotype = subject_data['genotype'].iloc[0]
    
    for behaviour in np.unique(subject_data["Behavior"]):
        
        behaviour_data = subject_data[subject_data["Behavior"] == behaviour]
        
        temp = pd.DataFrame({
            'subject_id': subject_id,
            'genotype': genotype,
            'event_type': behaviour,
            'event_count' : len(behaviour_data),
            'total_event_duration': np.sum(behaviour_data['Interaction duration']),
            'average_event_duration': np.mean(subject_data['Interaction duration'])
        }, index=[0])
        behaviour_stats = pd.concat([behaviour_stats, temp], ignore_index=True)

In [47]:
behaviour_stats

In [48]:
np.unique(behaviour_stats.genotype, return_counts=True)

In [49]:
behaviour_stats = behaviour_stats.sort_values(by='genotype', ascending=False)
boxplot_cols = behaviour_stats.columns[3:]
boxplot_cols

In [51]:
swarmplot_palette = {'DRD2-WT':'#AEC2B3', 'DRD2-KO':'#C68083'}
violin_palette = {'DRD2-WT':'#6A8D73', 'DRD2-KO':'#984447'}

for event_type in behaviour_stats.event_type.unique():
        
    fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(20, 7))
    axs = axs.ravel()
    
    event_type_behaviour_stats = behaviour_stats[behaviour_stats.event_type == event_type]
    
    for i, metric in enumerate(boxplot_cols):
        wt_metric_data = event_type_behaviour_stats[event_type_behaviour_stats.genotype == 'DRD2-WT'][metric]
        ko_metric_data = event_type_behaviour_stats[event_type_behaviour_stats.genotype == 'DRD2-KO'][metric]
    
        t_statistic, p_val = ttest_ind(wt_metric_data, ko_metric_data)
        
        sns.boxplot(data=event_type_behaviour_stats, x='genotype', hue='genotype', y=metric, ax=axs[i], linewidth=2, palette=violin_palette)
        sns.swarmplot(data=event_type_behaviour_stats, x='genotype', hue='genotype', y=metric, ax=axs[i], color="white", edgecolor="auto", s=6, palette=swarmplot_palette)
        
        y_range = axs[i].get_ylim()
        offset = 0.08 * (y_range[1] - y_range[0])
        
        axs[i].text(0.5, y_range[1] - offset, f'T-stat: {round(t_statistic, 3)}\nP-value: {round(p_val, 3)}', ha='center', va='top', fontsize=9, fontweight='bold')
        
    fig.tight_layout()
    plt.suptitle(f'Comparison of {event_type} metrics in DRD2-WT and DRD2-KO Mice', fontsize=14, fontweight='bold', fontstyle='italic', y=1.025)
    save_figure(os.path.join(plot_output_dir, f'behavioural_analysis/violin_grid_{event_type}.pdf'))