In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

from scipy.stats import ttest_ind

from shared.helper_functions import *
from settings_general import subject_id_batch_cage_dict

Get the path to the folder that holds the behaviour data exported from BORIS software.

In [2]:
behaviour_data_dir = select_folder("Please select the sociability behaviour data directory")

2024-05-06 09:25:02.357 python[1072:15056] +[CATransaction synchronize] called within transaction


If you want to save plots, select a directory you want to save them to.

In [3]:
plot_output_dir = select_or_create_folder("Please select or create a folder you wish to save the plots")

2024-05-06 09:25:13.439 python[1072:15056] +[CATransaction synchronize] called within transaction
2024-05-06 09:25:13.527 python[1072:15056] +[CATransaction synchronize] called within transaction
2024-05-06 09:25:16.941 python[1072:15056] +[CATransaction synchronize] called within transaction


## Load the animal metadata

First, let's load the metadata that tells us which batch/cage combination is which animal (id).

In [4]:
all_animals_metadata = select_file("Please select the file holding metadata for all animals")
metadata = pd.read_excel(all_animals_metadata)
metadata

2024-05-06 09:25:30.709 python[1072:15056] +[CATransaction synchronize] called within transaction
2024-05-06 09:25:30.737 python[1072:15056] +[CATransaction synchronize] called within transaction
2024-05-06 09:25:39.034 python[1072:15056] +[CATransaction synchronize] called within transaction


Unnamed: 0,mouseId,genotype,mouseName,arena,batch,RFID,birthday,weight,sex,species,type,notes
0,78211,DRD2-KO,1.1,1.0,1,4518,2023-04-03,26.6,M,MusMusculus,experimental,
1,78212,DRD2-HET,1.2,1.0,1,4513,2023-04-03,25.6,M,MusMusculus,stimulus,
2,78213,CreWT,1.3,1.0,1,4517,2023-04-03,27.7,M,MusMusculus,stimulus,
3,78210,CreWT,1.4,1.0,1,4521,2023-04-03,26.1,M,MusMusculus,stimulus,
4,78233,DRD2-WT,1.5,2.0,1,4507,2023-04-05,24.6,M,MusMusculus,experimental,
...,...,...,...,...,...,...,...,...,...,...,...,...
91,39498,CreWT,6.4,,6,4553,2023-08-16,,M,MusMusculus,stimulus,
92,39508,DRD2-WT,6.5,,6,4564,2023-08-15,,M,MusMusculus,experimental,
93,39499,DRD-HET,6.6,,6,4557,2023-08-15,,M,MusMusculus,stimulus,
94,39507,CreWT,6.7,,6,4571,2023-08-16,,M,MusMusculus,stimulus,


## Data pre-processing

The data that is exported from the BORIS tracking application provides two rows per state-event. One represents the start of the event, and one the stop, each having a single timestamp.

We want to merge these rows and immediately calculate the duration of the (non)social cup state-event interaction. Let's do this first.

To do so, we define a function that merges some information from the START and the STOP rows.

In [5]:
def merge_event_rows(beh_data):
    merged_df = pd.concat([
        beh_data.iloc[::2].reset_index(drop=True),  # only keep each start row
        beh_data.iloc[::2].reset_index(drop=True)['Image index'].rename('Frame start'),  # interaction start frame, name it 'Frame start'
        beh_data.iloc[1::2].reset_index(drop=True)['Image index'].rename('Frame stop'),  # interaction stop frame, name it 'Frame stop'
        beh_data.iloc[1::2].reset_index(drop=True)['Time'] - beh_data.iloc[::2]['Time'].reset_index(drop=True),  # interaction duration
    ], axis=1)
    # rename the last column as it represents the duration of the interaction
    merged_df = merged_df.set_axis([*merged_df.columns[:-1], 'Interaction duration'], axis=1)
    # drop the columns we don't need
    cols_to_drop = ['Image index', 'Time', 'Observation type', 'Source', 'Time offset (s)', 'Subject', 'Comment', 'Image file path', 'Description', 'Behavioral category', 'Behavior type']
    return merged_df.drop(columns=cols_to_drop)

Generate a df holding a single event line per event

In [43]:
beh_df = pd.DataFrame()

# for each behaviour file (which holds behaviour data on one animal only)
for file in os.listdir(behaviour_data_dir):
    if not file.endswith('.xlsx') and 'batch' not in file:
        continue
        
    beh_dat = pd.read_excel(os.path.join(behaviour_data_dir, file))
    
    # get the subject id
    batch_cage = file.split('.')[0]
    subject_id = [x for x in subject_id_batch_cage_dict.keys() if subject_id_batch_cage_dict[x] == batch_cage][0]
    genotype = metadata[metadata.mouseId == subject_id]['genotype'].iloc[0]

    for event_type in beh_dat['Behavior'].unique():
        beh_dat_event = beh_dat[beh_dat['Behavior'] == event_type]
        starts = beh_dat_event[beh_dat_event['Behavior type'] == 'START']
        stops = beh_dat_event[beh_dat_event['Behavior type'] == 'STOP']
        
        if len(stops) < len(starts):
            print(f'({batch_cage}, {subject_id}) Number of STOPs is smaller than number of STARTs for {event_type}')
            if beh_dat_event.iloc[-1]['Behavior type'] == 'START':
                print('Removing last row because it is of type START')
                beh_dat_event = beh_dat_event.drop(beh_dat_event.index[-1])
        if len(starts) < len(stops):
            print(f'({batch_cage}, {subject_id}) Number of STARTs is smaller than number of STOPs for {event_type}')
            if beh_dat_event.iloc[0]['Behavior type'] == 'STOP':
                print('Removing first row because it is of type STOP')
                beh_dat_event = beh_dat_event.drop(beh_dat_event.index[0])

        beh_dat_event = merge_event_rows(beh_dat_event)
        # merge the start and stop rows and calculate some stuff (interaction duration etc)
        beh_dat_event.insert(1, 'subject_id', subject_id)
        beh_dat_event.insert(2, 'genotype', genotype)
        beh_df = pd.concat([beh_df, beh_dat_event], axis=0)

(batch2_cage3, 79604) Number of STOPs is smaller than number of STARTs for sniff
Removing last row because it is of type START
(batch2_cage2, 79592) Number of STARTs is smaller than number of STOPs for sniff
Removing first row because it is of type STOP
(batch1_cage1, 78211) Number of STARTs is smaller than number of STOPs for sniff
Removing first row because it is of type STOP
(batch2_cage4, 79602) Number of STOPs is smaller than number of STARTs for follow
Removing last row because it is of type START
(batch5b_cage1, 81217) Number of STARTs is smaller than number of STOPs for sniff
Removing first row because it is of type STOP
(batch3_cage3, 80108) Number of STARTs is smaller than number of STOPs for sniff
Removing first row because it is of type STOP


Let's inspect the dataframe

In [44]:
beh_df

Unnamed: 0,Observation id,subject_id,genotype,Observation date,Observation duration,Media duration (s),FPS,Behavior,Media file name,Frame start,Frame stop,Interaction duration
0,B5C4 redo,81193,DRD2-KO,2024-02-16 12:23:23.147,274.502,1629.924,30.0,sniff,C:/Users/rebek/Downloads/drd2_batch5_social-in...,39276,39288,0.400
1,B5C4 redo,81193,DRD2-KO,2024-02-16 12:23:23.147,274.502,1629.924,30.0,sniff,C:/Users/rebek/Downloads/drd2_batch5_social-in...,39802,39823,0.701
2,B5C4 redo,81193,DRD2-KO,2024-02-16 12:23:23.147,274.502,1629.924,30.0,sniff,C:/Users/rebek/Downloads/drd2_batch5_social-in...,39990,39996,0.199
3,B5C4 redo,81193,DRD2-KO,2024-02-16 12:23:23.147,274.502,1629.924,30.0,sniff,C:/Users/rebek/Downloads/drd2_batch5_social-in...,40587,40596,0.300
4,B5C4 redo,81193,DRD2-KO,2024-02-16 12:23:23.147,274.502,1629.924,30.0,sniff,C:/Users/rebek/Downloads/drd2_batch5_social-in...,41150,41157,0.234
...,...,...,...,...,...,...,...,...,...,...,...,...
11,B1C3,78227,DRD2-KO,2024-02-15 13:41:11.468,284.706,1607.620,30.0,follow,D:/BORIS videos/B1C3.mp4,43053,43073,0.667
12,B1C3,78227,DRD2-KO,2024-02-15 13:41:11.468,284.706,1607.620,30.0,follow,D:/BORIS videos/B1C3.mp4,43078,43093,0.500
13,B1C3,78227,DRD2-KO,2024-02-15 13:41:11.468,284.706,1607.620,30.0,follow,D:/BORIS videos/B1C3.mp4,43110,43117,0.233
14,B1C3,78227,DRD2-KO,2024-02-15 13:41:11.468,284.706,1607.620,30.0,follow,D:/BORIS videos/B1C3.mp4,43190,43243,1.767


In [45]:
print(f'Subjects/animals present (n={len(np.unique(beh_df["subject_id"]))}):\n{np.unique(beh_df["subject_id"])} ')

Subjects/animals present (n=19):
[39489 39508 78211 78227 78233 78244 79592 79593 79602 79604 80108 80620
 80625 80630 81175 81193 81207 81217 81218] 


## Sociability metric calculations

In [46]:
behaviour_stats = pd.DataFrame()

# for each behaviour file (which holds behaviour data on one animal only)
for subject_id in np.unique(beh_df["subject_id"]):
    
    subject_data = beh_df[beh_df["subject_id"] == subject_id]
    genotype = subject_data['genotype'].iloc[0]
    
    for behaviour in np.unique(subject_data["Behavior"]):
        
        behaviour_data = subject_data[subject_data["Behavior"] == behaviour]
        
        temp = pd.DataFrame({
            'subject_id': subject_id,
            'genotype': genotype,
            'event_type': behaviour,
            'event_count' : len(behaviour_data),
            'total_event_duration': np.sum(behaviour_data['Interaction duration']),
            'average_event_duration': np.mean(subject_data['Interaction duration'])
        }, index=[0])
        behaviour_stats = pd.concat([behaviour_stats, temp], ignore_index=True)

In [47]:
behaviour_stats

Unnamed: 0,subject_id,genotype,event_type,event_count,total_event_duration,average_event_duration
0,39489,DRD2-KO,follow,4,5.435,0.633645
1,39489,DRD2-KO,sniff,27,14.208,0.633645
2,39508,DRD2-WT,follow,1,1.234,0.57616
3,39508,DRD2-WT,sniff,24,13.17,0.57616
4,78211,DRD2-KO,follow,12,14.772,1.024163
5,78211,DRD2-KO,groom,8,10.836,1.024163
6,78211,DRD2-KO,sniff,29,24.576,1.024163
7,78227,DRD2-KO,follow,16,15.075,0.861662
8,78227,DRD2-KO,groom,13,18.039,0.861662
9,78227,DRD2-KO,sniff,39,25.479,0.861662


In [48]:
np.unique(behaviour_stats.genotype, return_counts=True)

(array(['DRD2-KO', 'DRD2-WT'], dtype=object), array([26, 23]))

In [49]:
behaviour_stats = behaviour_stats.sort_values(by='genotype', ascending=False)
boxplot_cols = behaviour_stats.columns[3:]
boxplot_cols

Index(['event_count', 'total_event_duration', 'average_event_duration'], dtype='object')

In [51]:
swarmplot_palette = {'DRD2-WT':'#AEC2B3', 'DRD2-KO':'#C68083'}
violin_palette = {'DRD2-WT':'#6A8D73', 'DRD2-KO':'#984447'}

for event_type in behaviour_stats.event_type.unique():
        
    fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(20, 7))
    axs = axs.ravel()
    
    event_type_behaviour_stats = behaviour_stats[behaviour_stats.event_type == event_type]
    
    for i, metric in enumerate(boxplot_cols):
        wt_metric_data = event_type_behaviour_stats[event_type_behaviour_stats.genotype == 'DRD2-WT'][metric]
        ko_metric_data = event_type_behaviour_stats[event_type_behaviour_stats.genotype == 'DRD2-KO'][metric]
    
        t_statistic, p_val = ttest_ind(wt_metric_data, ko_metric_data)
        
        sns.boxplot(data=event_type_behaviour_stats, x='genotype', hue='genotype', y=metric, ax=axs[i], linewidth=2, palette=violin_palette)
        sns.swarmplot(data=event_type_behaviour_stats, x='genotype', hue='genotype', y=metric, ax=axs[i], color="white", edgecolor="auto", s=6, palette=swarmplot_palette)
        
        y_range = axs[i].get_ylim()
        offset = 0.08 * (y_range[1] - y_range[0])
        
        axs[i].text(0.5, y_range[1] - offset, f'T-stat: {round(t_statistic, 3)}\nP-value: {round(p_val, 3)}', ha='center', va='top', fontsize=9, fontweight='bold')
        
    fig.tight_layout()
    plt.suptitle(f'Comparison of {event_type} metrics in DRD2-WT and DRD2-KO Mice', fontsize=14, fontweight='bold', fontstyle='italic', y=1.025)
    save_figure(os.path.join(plot_output_dir, f'behavioural_analysis/violin_grid_{event_type}.pdf'))