In [377]:
import yaml
import os
from towbintools.foundation import file_handling as file_handling
import pandas as pd
import shutil

import numpy as np
import argparse
import matplotlib.pyplot as plt

from scipy import interpolate
import seaborn as sns
import starbars
from itertools import combinations
from scipy.stats import ttest_ind, mannwhitneyu

from scipy.signal import savgol_filter, medfilt
from towbintools.foundation.utils import interpolate_nans

from plotting_functions import combine_experiments
from plotting_functions import plot_aggregated_series, plot_correlation, plot_correlation_at_ecdysis, boxplot_at_molt, plot_growth_curves_individuals, plot_deviation_from_model_at_ecdysis, plot_normalized_proportions, plot_deviation_from_model, get_most_average_deviations_at_ecdysis, get_most_average_proportions_at_ecdysis, get_most_average_size_at_ecdysis, plot_developmental_success, plot_arrests, plot_heterogeneity_at_ecdysis, build_legend
from towbintools.data_analysis.growth_rate import compute_instantaneous_growth_rate_classified

from towbintools.data_analysis import (
    compute_larval_stage_duration,
    compute_series_at_time_classified,
    filter_series_with_classification,
    rescale_and_aggregate,
    rescale_series,
)

# make warnings happen only once
import warnings
warnings.filterwarnings("once")

In [None]:
filemap_paths = [
    # "/mnt/towbin.data/shared/kstojanovski/20240202_Orca_10x_yap-1del_col-10-tir_wBT160-186-310-337-380-393_25C_20240202_171239_051/analysis_sacha/report/analysis_filemap.csv",
    "/mnt/towbin.data/shared/kstojanovski/20240212_Orca_10x_yap-1del_col-10-tir_wBT160-186-310-337-380-393_25C_20240212_164059_429/analysis_sacha/report/analysis_filemap_annotated.csv",
]

conditions_paths = [
    # "/mnt/towbin.data/shared/kstojanovski/20240202_Orca_10x_yap-1del_col-10-tir_wBT160-186-310-337-380-393_25C_20240202_171239_051/doc/conditions.yaml",
    "/mnt/towbin.data/shared/kstojanovski/20240212_Orca_10x_yap-1del_col-10-tir_wBT160-186-310-337-380-393_25C_20240212_164059_429/doc/conditions.yaml",
]

organ_channels = [{'body': 2, 'pharynx' : 1}]

backup_path = "/mnt/towbin.data/shared/spsalmon/plotting_backups/yap1_deletion/"

# filemap_paths = [
#     "/mnt/towbin.data/shared/kstojanovski/20240429_Orca_10x_20h_IAA_1_min_sampling_wBT160-182-186-190-25C_20240429_115434_078/analysis_sacha/report/analysis_filemap_annotated.csv",
# ]

# conditions_paths = [
#     "/mnt/towbin.data/shared/kstojanovski/20240429_Orca_10x_20h_IAA_1_min_sampling_wBT160-182-186-190-25C_20240429_115434_078/doc/conditions.yaml",
# ]

# organ_channels = [{'body': 2, 'pharynx' : 1}]

# backup_path = "/mnt/towbin.data/shared/spsalmon/plotting_backups/1min_sampling/"

filemap_backup_path = os.path.join(backup_path, "filemaps")
conditions_backup_path = os.path.join(backup_path, "conditions")
structure_backup_path = os.path.join(backup_path, "structure")

os.makedirs(backup_path, exist_ok=True)
os.makedirs(filemap_backup_path, exist_ok=True)
os.makedirs(conditions_backup_path, exist_ok=True)
os.makedirs(structure_backup_path, exist_ok=True)

for i, (filemap_path, config_path) in enumerate(zip(filemap_paths, conditions_paths)):
    output_name = f"analysis_filemap_{i}.csv"
    output_name_config = f"config_{i}.yaml"
    
    # Copy the filemap to the backup path
    shutil.copy(filemap_path, os.path.join(filemap_backup_path, output_name))
    shutil.copy(config_path, os.path.join(conditions_backup_path, output_name_config))

# Copy the current working directory to the backup path
current_dir = os.getcwd()
current_dir_name = os.path.basename(current_dir)
destination = os.path.join(backup_path, current_dir_name)

shutil.copytree(current_dir, destination, dirs_exist_ok=True)

print("Backup completed successfully!")

In [None]:
import pickle
conditions_struct = combine_experiments(filemap_paths, conditions_paths, organ_channels=organ_channels, recompute_values_at_molt=True)

# backup the conditions struct
with open(os.path.join(structure_backup_path, "plotting_struct.pkl"), "wb") as f:
    pickle.dump(conditions_struct, f)

In [None]:
for condition in conditions_struct:
    print(f"Condition: {condition['condition_id'], {condition['description']}}")

In [381]:
# change figure size to your liking
plt.rcParams['figure.figsize'] = [12, 10]

# make lines thicker
plt.rcParams['lines.linewidth'] = 2

# make font size bigger
plt.rcParams.update({'font.size': 12})

In [382]:
def combine_series(conditions_struct, series_one, series_two, operation, new_series_name):
    for condition in conditions_struct:
        series_one_values = condition[series_one]
        series_two_values = condition[series_two]

        if operation == 'add':
            new_series_values = np.add(series_one_values, series_two_values)
        elif operation == 'subtract':
            new_series_values = series_one_values - series_two_values
        elif operation == 'multiply':
            new_series_values = series_one_values * series_two_values
        elif operation == 'divide':
            new_series_values = np.divide(series_one_values, series_two_values)
        condition[new_series_name] = new_series_values
    return conditions_struct

conditions_struct = combine_series(conditions_struct, 'body_seg_str_width_mean_at_ecdysis', 'body_seg_str_length_at_ecdysis', 'divide', 'body_seg_str_aspect_ratio_at_ecdysis')
conditions_struct = combine_series(conditions_struct, 'body_seg_str_width_mean', 'body_seg_str_length', 'divide', 'body_seg_str_aspect_ratio')
conditions_struct = combine_series(conditions_struct, 'body_seg_str_width_max_at_ecdysis', 'body_seg_str_width_mean_at_ecdysis', 'subtract', 'body_seg_str_width_max_minus_mean_at_ecdysis')

In [438]:
import statsmodels.api as sm
from scipy.ndimage import uniform_filter1d
from towbintools.data_analysis.time_series import correct_series_with_classification

def compute_instantaneous_growth_rate(series, time, smoothing_method = "savgol", savgol_filter_window=15, savgol_filter_order=3, moving_average_window=15, lowess_frac=0.1):
    """
    Compute the instantaneous growth rate of a time series.

    Parameters:
        series (np.ndarray): The series of values to compute the growth rate of.
        time (np.ndarray): The time data corresponding to the series.
        smoothing_method (str): The method to use for smoothing the series. Can be either 'savgol' or 'moving_average'.
        savgol_filter_window (int): The window size of the Savitzky-Golay filter.
        savgol_filter_order (int): The order of the Savitzky-Golay filter.
        moving_average_window (int): The window size of the moving average filter.

    Returns:
        np.ndarray: The instantaneous growth rate of the time series.
    """

    # Assert that the series and time have the same length
    assert len(series) == len(time), "The series and time must have the same length."
    
    # Remove extreme outliers with a small median filter
    plt.plot(series)
    series = medfilt(series, 3)

    if smoothing_method == "savgol":
        # Smooth the series time series a bit more with a Savitzky-Golay filter
        series = savgol_filter(series, savgol_filter_window, savgol_filter_order)
    elif smoothing_method == "moving_average":
        # Smooth the series time series a bit more with a moving average filter
        series = uniform_filter1d(series, size=moving_average_window)
    elif smoothing_method == "none":
        pass
    plt.plot(series)
    plt.show()
    # Compute the instantaneous growth rate
    growth_rate = np.gradient(series, time)

    return growth_rate

def new_compute_instantaneous_growth_rate_classified(series, time, worm_type, smoothing_method = "savgol", savgol_filter_window=15, savgol_filter_order=3, moving_average_window=15):
    """
    Compute the instantaneous growth rate of a time series after correcting the non-worm points by removing them and interpolating them back.

    Parameters:
        series (np.ndarray): The time series of values.
        time (np.ndarray): The time data corresponding to the series.
        worm_type (np.ndarray): The classification of the points as either 'worm' or 'egg' or 'error'.
        smoothing_method (str): The method to use for smoothing the series. Can be either 'savgol' or 'moving_average'.
        savgol_filter_window (int): The window size of the Savitzky-Golay filter.
        savgol_filter_order (int): The order of the Savitzky-Golay filter.
        moving_average_window (int): The window size of the moving average filter.

    Returns:
        np.ndarray: The instantaneous growth rate of the time series.
    """

    # Assert that the series, time, and worm_type have the same length
    assert len(series) == len(time) == len(worm_type), "The series, time, and worm_type must have the same length."

    # Correct the series time series
    series_worms = correct_series_with_classification(series, worm_type)
    growth_rate = compute_instantaneous_growth_rate(series_worms, time, smoothing_method, savgol_filter_window, savgol_filter_order, moving_average_window)
    
    return growth_rate

def compute_growth_rate(conditions_struct, series_name, gr_series_name, experiment_time=True):
    for condition in conditions_struct:
        series_values = condition[series_name]
        # TEMPORARY, ONLY WORKS WITH SINGLE CLASSIFICATION, FIND A WAY TO GENERALIZE
        worm_type_key = [key for key in condition.keys() if "worm_type" in key][0]
        worm_type = condition[worm_type_key]

        if experiment_time:
            time = condition['experiment_time']
        else:
            time = condition['time']


        growth_rate = []
        for i in range(series_values.shape[0]):
            gr = new_compute_instantaneous_growth_rate_classified(series_values[i], time[i], worm_type[i], smoothing_method = 'savgol', savgol_filter_window = 7)
            growth_rate.append(gr)

        growth_rate = np.array(growth_rate)

        condition[gr_series_name] = growth_rate

    return conditions_struct

def rescale(conditions_struct, series_name, rescaled_series_name, experiment_time=True, n_points=100):
    for condition in conditions_struct:
        series_values = condition[series_name]
        # TEMPORARY, ONLY WORKS WITH SINGLE CLASSIFICATION, FIND A WAY TO GENERALIZE
        worm_type_key = [key for key in condition.keys() if "worm_type" in key][0]
        worm_type = condition[worm_type_key]
        ecdysis = condition["ecdysis_time_step"]

        if experiment_time:
            time = condition['experiment_time']
        else:
            time = condition['time']

        _, rescaled_series = rescale_series(
        series_values, time, ecdysis, worm_type, n_points=n_points) # shape (n_worms, 4, n_points)

        # reshape into (n_worms, 4*n_points)

        rescaled_series = rescaled_series.reshape(rescaled_series.shape[0], -1)

        condition[rescaled_series_name] = rescaled_series

    return conditions_struct

# conditions_struct = compute_growth_rate(conditions_struct, 'body_seg_str_volume', 'body_seg_str_volume_growth_rate')
# conditions_struct = compute_growth_rate(conditions_struct, 'body_seg_str_width_mean', 'body_seg_str_width_mean_growth_rate')
# conditions_struct = compute_growth_rate(conditions_struct, 'body_seg_str_length', 'body_seg_str_length_growth_rate')
conditions_struct = compute_growth_rate(conditions_struct, 'body_seg_str_aspect_ratio', 'body_seg_str_aspect_ratio_growth_rate')

conditions_struct = rescale(conditions_struct, 'body_seg_str_volume_growth_rate', 'body_seg_str_volume_growth_rate_rescaled')
conditions_struct = rescale(conditions_struct, 'body_seg_str_width_mean_growth_rate', 'body_seg_str_width_mean_growth_rate_rescaled')
conditions_struct = rescale(conditions_struct, 'body_seg_str_length_growth_rate', 'body_seg_str_length_growth_rate_rescaled')
conditions_struct = rescale(conditions_struct, 'body_seg_str_aspect_ratio_growth_rate', 'body_seg_str_aspect_ratio_growth_rate_rescaled')

plt.plot(conditions_struct[0]['body_seg_str_aspect_ratio_growth_rate_rescaled'][0])
plt.show()
# conditions_to_plot = [4]
# colors = None
# plot_aggregated_series(conditions_struct, ['body_seg_str_aspect_ratio_growth_rate'], conditions_to_plot, colors=colors, experiment_time = True, aggregation='mean', n_points=100, time_step = 14, log_scale = False, legend = {'description': ''})

In [None]:
import numpy
import matplotlib.pyplot as plt

def autocorr1(x,lags):
    '''numpy.corrcoef, partial'''

    corr=[1. if l==0 else numpy.corrcoef(x[l:],x[:-l])[0][1] for l in lags]
    return numpy.array(corr)

def plot_autocorrelation(conditions_struct, series_name, conditions_to_plot, lags,
    colors=None,
    legend=None,
    x_axis_label=None,
    y_axis_label=None,
):
    if colors is None:
        color_palette = sns.color_palette("colorblind", len(conditions_to_plot))
    else:
        color_palette = colors

    for i, condition_id in enumerate(conditions_to_plot):
        condition = conditions_struct[condition_id]
        series = condition[series_name]

        autocorr = []

        for j in range(series.shape[0]):
            autocorr.append(autocorr1(series[j], lags))
        
        autocorr = np.array(autocorr)

        autocorr_mean = np.nanmean(autocorr, axis=0)
        autocorr_std = np.nanstd(autocorr, axis=0)
        autocorr_ste = autocorr_std / np.sqrt(autocorr.shape[0])

        label = build_legend(condition, legend)

        plt.fill_between(lags, autocorr_mean - 1.96 * autocorr_ste, autocorr_mean + 1.96 * autocorr_ste, alpha=0.5, color=color_palette[i])
        plt.plot(lags, autocorr_mean, label=label, color=color_palette[i])

    plt.xlabel(x_axis_label)
    plt.ylabel(y_axis_label)
    plt.legend()
    plt.show()

lags = np.arange(0, 125, 1)
plot_autocorrelation(conditions_struct, 'body_seg_str_aspect_ratio_growth_rate_rescaled', [0, 4], lags, x_axis_label='Lag (0.25% of development)', y_axis_label='Autocorrelation of length growth rate', colors=None, legend={'description': ''})

In [404]:
# conditions_to_plot = [0, 2, 4]
# colors = None
# # colors = ['blue']
# # colors += sns.color_palette("Blues", len(conditions_to_plot) - 1)

# plot_correlation(conditions_struct, 'body_seg_str_length', 'body_seg_str_width_mean', conditions_to_plot, log_scale = (False, True), legend = {'description': ''}, colors = colors)

In [None]:
conditions_to_plot = [0, 2, 4, 5]
colors = None


plot_correlation_at_ecdysis(conditions_struct, 'body_seg_str_length_at_ecdysis', 'body_seg_str_width_mean_at_ecdysis', conditions_to_plot, colors=colors, log_scale = (False, True), legend = {'description': ''}, y_axis_label='Width at ecdysis', x_axis_label='Length at ecdysis')

In [406]:
# boxplot_at_molt(conditions_struct, 'larval_stage_durations_time_step', [0, 1, 2, 3], log_scale = False, plot_significance=True, legend = {'description': ''}, y_axis_label="Larval stage duration (time step)", titles = ['L1', 'L2', 'L3', 'L4'])

In [None]:
plot_developmental_success(conditions_struct, [0, 1, 4, 5], legend = {'description': ''})

In [408]:
# plot_arrests(conditions_struct, [0, 2, 4, 5], legend = {'description': ''})

In [None]:
conditions_to_plot = [0, 2, 4]
control_condition = 0
colors = None

plot_deviation_from_model_at_ecdysis(conditions_struct, "body_seg_str_length_at_ecdysis", "body_seg_str_width_mean_at_ecdysis", control_condition, conditions_to_plot, colors=colors, remove_hatch=True, exclude_arrests = True, log_scale = False, legend = {'description': ''}, x_axis_label='Length at ecdysis (µm)', y_axis_label='Deviation from modeled width at ecdysis (%)')

In [None]:
conditions_to_plot = [0, 2, 4]
control_condition = 0
colors = None

plot_deviation_from_model_at_ecdysis(conditions_struct, "body_seg_str_width_mean_at_ecdysis", "body_seg_str_width_max_at_ecdysis", control_condition, conditions_to_plot, colors=colors, remove_hatch=True, exclude_arrests = True, log_scale = False, legend = {'description': ''}, x_axis_label='Mean width at ecdysis (µm)', y_axis_label='Deviation from modeled max width at ecdysis (%)')

In [None]:
plot_aggregated_series(conditions_struct, ['body_seg_str_aspect_ratio'], conditions_to_plot, colors=colors, experiment_time = True, aggregation='mean', n_points=100, time_step = 14, log_scale = False, legend = {'description': ''})

In [None]:
boxplot_at_molt(conditions_struct, 'body_seg_str_width_max_minus_mean_at_ecdysis', [0, 4], log_scale = False, plot_significance=True, legend = {'description': ''}, y_axis_label="Difference between max and mean width (µm)", titles = ['Hatch', 'M1', 'M2', 'M3', 'M4'])

In [None]:
plot_heterogeneity_at_ecdysis(conditions_struct, 'body_seg_str_aspect_ratio_at_ecdysis', [0, 1, 4], legend = {'description': ''}, exclude_arrests = False, y_axis_label='CV of aspect ratio at ecdysis')   

In [None]:
plot_heterogeneity_at_ecdysis(conditions_struct, 'body_seg_str_length_at_ecdysis', [0, 1, 4], legend = {'description': ''}, exclude_arrests = False, y_axis_label='CV of aspect ratio at ecdysis')   

In [None]:
plot_heterogeneity_at_ecdysis(conditions_struct, 'body_seg_str_width_mean_at_ecdysis', [0, 1, 4], legend = {'description': ''}, exclude_arrests = False, y_axis_label='CV of aspect ratio at ecdysis')   

In [416]:
# def plot_heterogeneity_at_ecdysis(conditions_struct, column, conditions_to_plot, remove_hatch = True, legend = None, x_axis_label = None, y_axis_label = None, keep_only_complete_development = False):
#     for condition in conditions_to_plot:
#         condition_dict = conditions_struct[condition]

#         values = condition_dict[column]

#         base_shape = values.shape[0]

#         if remove_hatch:
#             values = values[:, 1:]

#         if keep_only_complete_development:
#             values = values[~np.isnan(values).any(axis=1)]
#             print(f"Removed {base_shape - values.shape[0]} worms that did not complete development")


#         cvs = []
#         for i in range(values.shape[1]):
#             values_at_ecdysis = values[:, i]
#             cv = np.nanstd(values_at_ecdysis) / np.nanmean(values_at_ecdysis)
#             cvs.append(cv)

#         plt.plot(cvs, label = condition_dict['description'], marker = 'o')
#     plt.show()

# plot_heterogeneity_at_ecdysis(conditions_struct, 'body_seg_str_volume_at_ecdysis', [3, 2, 6, 10], legend = {'strain': ''}, keep_only_complete_development = True)   
# plot_heterogeneity_at_ecdysis(conditions_struct, 'body_seg_str_volume_at_ecdysis', [3, 2, 6, 10], legend = {'strain': ''}, keep_only_complete_development = True)

In [417]:
# get_most_average_size_at_ecdysis(
#     conditions_struct,
#     'body_seg_str_width_mean_at_ecdysis',
#     ['analysis_sacha/ch2_seg_str'],
#     [4],
#     remove_hatch=False,
#     exclude_arrests=True,
#     dpi=600,
#     nb_per_condition=2,
#     cmap=['pure_red'],
# )

In [418]:
# get_most_average_size_at_ecdysis(
#     conditions_struct,
#     'body_seg_str_volume_at_ecdysis',
#     ['analysis/ch2_raw_str', 'analysis/ch1_raw_str'],
#     [4],
#     remove_hatch=False,
#     exclude_arrests=True,
#     dpi=600,
#     nb_per_condition=2,
#     cmap=['pure_red', 'pure_green'],
# )

In [419]:
# get_most_average_proportions_at_ecdysis(
#     conditions_struct,
#     'body_seg_str_length_at_ecdysis',
#     'body_seg_str_mean_width_at_ecdysis',
#     ['analysis/ch2_raw_str', 'analysis/ch1_raw_str'],
#     [0],
#     remove_hatch=False,
#     exclude_arrests=True,
#     dpi=600,
#     nb_per_condition=2,
#     cmap=['pure_red', 'pure_green'],
# )

In [420]:
# get_most_average_proportions_at_ecdysis(
#     conditions_struct,
#     'body_seg_str_length_at_ecdysis',
#     'body_seg_str_volume_at_ecdysis',
#     ['analysis/ch2_raw_str', 'analysis/ch1_raw_str'],
#     [5],
#     remove_hatch=False,
#     exclude_arrests=True,
#     dpi=600,
#     nb_per_condition=2,
#     cmap=['pure_red', 'pure_green'],
# )