# Pipeline analysis for supervised data
Let's start simple by analyzing the data from a single individual, considering: total distance, look-around, sniffing, inner-zone time, wall-climbing, and huddle. We start by importing the necessary packages and directories.

In [4]:
import os
import pandas as pd
import pickle
import deepof.data
import deepof.visuals
import visuals_customized

In [6]:
# Open an existing DeepOF project
directory_output = '//folder/becell/Lab Collaborative Projects/Cancer+Mood - IGM/Behaviour/3rd round/SOC INT/'
my_deepof_project = deepof.data.load_project(directory_output + "deepof_tutorial_project")

In [7]:
# Open an existing supervised analysis
with open(directory_output + 'supervised_annotation.pkl', 'rb') as file:
    supervised_annotation = pickle.load(file)

In [8]:
# Load conditions
my_deepof_project.load_exp_conditions(directory_output + 'conditions.csv')

In [9]:
# Define colors
white = '#FFFFFF'
grey_soft = '#D3D3D3'
grey_stark = '#636466'
blue = '#194680'
red = '#801946'

## Plot PCA

In [10]:
# Define the coordinates for the labels of each condition
coords_dict = {
    'hc_ind': [1.5,3],
    'hc_ee': [3,-1]
    }

In [11]:
# Define the colors for each condition
color_dict = {
    'hc_ind': blue,
    'hc_ee': red
    }

In [None]:
ax, embedding_dataset, rotated_loading_scores, dataframe_for_titles = visuals_customized.plot_embeddings(
    my_deepof_project,
    supervised_annotations=supervised_annotation,
    bin_size=120,
    bin_index=0,
    my_title = 'PCA first time bin',
    my_color_dict = color_dict,
    my_coords_dict = coords_dict
)

## Analyze individual PC and rotated_loading_scores

In [None]:
# Perform statistics
import pingouin as pg
pg.normality(embedding_dataset, 'PCA-2', 'experimental condition')
pg.homoscedasticity(embedding_dataset, 'PCA-2', 'experimental condition')

x = embedding_dataset[embedding_dataset['experimental condition'] == 'hc_ee']['PCA-2']
y = embedding_dataset[embedding_dataset['experimental condition'] == 'hc_ind']['PCA-2']
pg.ttest(x, y).round(3)

# pg.anova(embedding_dataset, 'PCA-1', 'experimental condition')
# pg.pairwise_tukey(embedding_dataset, 'PCA-1', 'experimental condition')

In [None]:
# Save your statistics in a dictionary
stats_dict = {
    '*': ['hc_ee', 'hc_ind']
    }

In [None]:
# Plot the PC components
visuals_customized.boxplot(embedding_dataset, color_dict, 'PCA-1', stats_dict)

In [None]:
# Plot the rotated loading scores of PC components
visuals_customized.lollipop(dataframe_for_titles, rotated_loading_scores, 'PCA-1')

## 