# Pipeline analysis for supervised data
Let's start simple by analyzing the data from a single individual, considering: total distance, look-around, sniffing, inner-zone time, wall-climbing, and huddle. We start by importing the necessary packages and directories.

In [None]:
import os
import pandas as pd
import pickle
import deepof.data
import deepof.visuals
import visuals_customized
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Open an existing DeepOF project
directory_output = '//folder/becell/Lab Collaborative Projects/Cancer+Mood - IGM/Behaviour/3rd round/SOC INT/'
my_deepof_project = deepof.data.load_project(directory_output + "deepof_tutorial_project")

In [None]:
# Open an existing supervised analysis
with open(directory_output + 'supervised_annotation.pkl', 'rb') as file:
    supervised_annotation = pickle.load(file)

In [None]:
# Load conditions
my_deepof_project.load_exp_conditions(directory_output + 'conditions.csv')

In [None]:
# Define colors
white = '#FFFFFF'
grey_soft = '#D3D3D3'
grey_stark = '#636466'
blue = '#194680'
red = '#801946'

## Plot PCA between conditions

In [None]:
# Define the coordinates for the labels of each condition
coords_dict = {
    'hc_ind': ['name1', [4,5]],
    'hc_ee': ['name2', [4,5]],
    }

In [None]:
# Define the colors for each condition
color_dict = {
    'hc_ind': sns.color_palette("Set1")[0],,
    'hc_ee': sns.color_palette("Set1")[1],
    }

In [None]:
ax, embedding_dataset, rotated_loading_scores, dataframe_for_titles = visuals_customized.plot_embeddings(
    my_deepof_project,
    supervised_annotations=supervised_annotation,
    bin_size=120,
    bin_index=0,
    my_title = 'PCA first time bin',
    my_color_dict = color_dict,
    my_coords_dict = coords_dict
)

### Analyze individual PC and rotated_loading_scores

In [None]:
# Perform statistics
import pingouin as pg
pg.normality(embedding_dataset, 'PCA-2', 'experimental condition')
pg.homoscedasticity(embedding_dataset, 'PCA-2', 'experimental condition')

x = embedding_dataset[embedding_dataset['experimental condition'] == 'hc_ee']['PCA-2']
y = embedding_dataset[embedding_dataset['experimental condition'] == 'hc_ind']['PCA-2']
pg.ttest(x, y).round(3)

# pg.anova(embedding_dataset, 'PCA-1', 'experimental condition')
# pg.pairwise_tukey(embedding_dataset, 'PCA-1', 'experimental condition')

In [None]:
# Save your statistics in a dictionary
stats_dict = {
    '*': ['hc_ee', 'hc_ind']
    }

In [None]:
# Plot the PC components
visuals_customized.boxplot(embedding_dataset, color_dict, 'PCA-1', stats_dict)

In [None]:
# Plot the rotated loading scores of PC components
visuals_customized.lollipop(dataframe_for_titles, rotated_loading_scores, 'PCA-1')

## Plot PCA between time points

In [None]:
# Define the coordinates for the labels of each condition
coords_dict = {
    'bin0': ['name1', [4,5]],
    'bin1': ['name2', [4,5]],
    'bin2': ['name3', [4,5]],
    'bin3': ['name4', [4,5]],
    }

In [None]:
# Define the colors for each condition
color_dict = {
    'bin0': sns.color_palette("Set1")[0],
    'bin1': sns.color_palette("Set1")[1],
    'bin2': sns.color_palette("Set1")[2],
    'bin3': sns.color_palette("Set1")[3]
    }

In [None]:
ax, embedding_dataset, rotated_loading_scores, dataframe_for_titles, concat_hue = visuals_customized.plot_embeddings_timelapse(
    my_deepof_project,
    supervised_annotations=supervised_annotation,
    bin_size=60,
    bin_index_list=[0,1,2,3],
    my_title = '',
    my_color_dict = color_dict,
    my_coords_dict = coords_dict,
    specific_condition = 'hc_ee',
)

### Analyze individual PC, rotated_loading_scores (across time) 

In [None]:
# Perform statistics
import pingouin as pg
pg.normality(embedding_dataset, 'PCA-1', 'experimental condition')
pg.homoscedasticity(embedding_dataset, 'PCA-1', 'experimental condition')

# x = embedding_dataset[embedding_dataset['experimental condition'] == 'hc_ee']['PCA-2']
# y = embedding_dataset[embedding_dataset['experimental condition'] == 'hc_ind']['PCA-2']
# pg.ttest(x, y).round(3)

pg.anova(embedding_dataset, 'PCA-1', 'experimental condition')
pg.pairwise_tukey(embedding_dataset, 'PCA-1', 'experimental condition')

In [None]:
# Save your statistics in a dictionary
stats_dict = {
    #'**': ['bin0', 'bin1'],
    #'***': ['bin0', 'bin2'],
    '***': ['bin0', 'bin3'],
    }

In [None]:
# Plot the PC components
visuals_customized.boxplot(embedding_dataset, color_dict, 'PCA-1', stats_dict)

In [None]:
# Plot the rotated loading scores of PC components
visuals_customized.lollipop(dataframe_for_titles, rotated_loading_scores, 'PCA-1')

In [None]:
# Plot an individual behavior over bins
visuals_customized.timelapse(dataframe_for_titles, concat_hue, 'colortail_nocolor_nose2nose')

In [None]:
# Plot some individual behaviors over bins
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(9, 3))

visuals_customized.timelapse(dataframe_for_titles, concat_hue, 'colortail_nocolor_sidereside', ax=ax1)
visuals_customized.timelapse(dataframe_for_titles, concat_hue, 'colortail_nocolor_nose2nose', ax=ax2)
visuals_customized.timelapse(dataframe_for_titles, concat_hue, 'colortail_speed', ax=ax3)

# ax1.set_title("supervised embeddings of full videos")
# ax2.set_title("supervised embeddings of first two minutes")

plt.tight_layout()
plt.show()