# Deviation of development between groups of embryos

# Table of Contents
* [General](#first-bullet)
* [Load paths](#second-bullet)
* [Load model](#third-bullet)
* [Calculate embeddings](#fourth-bullet)
* [(Optional) Save embeddings](#fifth-bullet)
* [Calculate similarities](#sixth-bullet)
* [(Optional) Save similarities](#sixth-bullet)
* [Overview of plot parameters ](#seventh-bullet)
* [Plot similarities of batch comparisons with batch of untreated embryos ](#eigth-bullet)
* [Plot similarities of all comparisons as individual curves ](#ninth-bullet)

## General <a class="anchor" id="first-bullet"></a>

General imports and class definitions

In [None]:
%load_ext autoreload
%autoreload 2

Load paths from config file

In [None]:
from twinnet_tools.tnconfig import ProjectConfig

config = ProjectConfig("twinnet_config")

Import Python packages

In [None]:
import cv2
import glob
import json
import math
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pathlib
import shutil
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_io as tfio
from tensorflow.keras import applications, layers, models
from scipy.stats import mannwhitneyu
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm  # Import tqdm for the progress bar
import matplotlib.pylab as pl

from twinnet_tools.tngeneral import TNToolsGeneral
from twinnet_tools.tnbatchcomparison import TNToolsBatchComparison
from twinnet_tools.tninference import TNToolsEmbeddings
from twinnet_tools.tninference import TNToolsSimilarities
from twinnet_tools.tnmodel import TNToolsNetwork
from twinnet_tools.tnplot import TNToolsPlot
from twinnet_tools.tnplotcompare import TNToolsPlotCompare

from pathlib import Path

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

Prepare class instances

In [None]:
tools_general = TNToolsGeneral()
tools_embeddings = TNToolsEmbeddings(size_img=224, size_img_min=250)
tools_model = TNToolsNetwork()
tools_similarities = TNToolsSimilarities()
tools_plot = TNToolsPlot()
tools_plot_compare = TNToolsPlotCompare()
tools_batchcomparison = TNToolsBatchComparison()

In [None]:
def fn_image_tiff_parse(path_img, img_size=224, img_size_min=250):
    """Load TIFF image from path."""
    img1 = tf.io.read_file(path_img)
    img2 = tfio.experimental.image.decode_tiff(img1)
    img3 = tf.image.resize_with_crop_or_pad(img2, img_size_min, img_size_min)
    img4 = tf.reshape(img3, (img_size_min, img_size_min, 4))
    img5 = tf.image.resize(img4, (img_size, img_size))
    img6 = tfio.experimental.color.rgba_to_rgb(img5)
    img7 = tf.keras.applications.resnet50.preprocess_input(img6)
    return img7

def fn_images_tiff_parse(paths_images, **kwargs):
    """Load multiple tiff images from paths to numpy array with tfio."""
    image_segments = list()
    num_images = len(paths_images)

    for i in range(num_images):
        print(f'[LOADING] Image arrays {i + 1}/{num_images} ...'.ljust(50), end='\r')
        path_image = paths_images[i]
        try:
            image_segment = fn_image_tiff_parse(path_image, **kwargs)
            
            image_segments.append(image_segment)
        except cv2.error:
            pass
    return np.array(image_segments)

def list_to_embeddings(list_embryos_images, model_embedding):
    """Generate embeddings for an image set of embryos."""
    array_imgs = fn_images_tiff_parse(list_embryos_images)
    embeds_imgs = tools_embeddings.imgs_to_embeddings(model_embedding, array_imgs)
    return embeds_imgs


def get_all_embeddings(dir_segments, tn_model):

    # Get the list of folder names and sort them
    sorted_folder_names = sorted(os.listdir(dir_segments))

    # Get  embeddings
    embeddings_reference = []
    for folder_name in sorted_folder_names:
        subfolder_path = os.path.join(dir_segments, folder_name)
        # Check if the current path is a directory
        if os.path.isdir(subfolder_path):
            print("Processing subfolder:", folder_name)
            # Load test image paths
            imgs_src = sorted(glob.glob(f'{subfolder_path}/*.tif'))
            # Calculate embedings
            embeddings_reference.append(list_to_embeddings(imgs_src, tn_model))
            
    return embeddings_reference

def cosine_similarity(A, B):
    # Calculate the dot product between each pair of rows
    dot_product = np.sum(A * B, axis=1)
    
    # Calculate the norms of each row using np.linalg.norm
    norm_A = np.linalg.norm(A, axis=1)
    norm_B = np.linalg.norm(B, axis=1)
    
    # Calculate the cosine similarity
    similarity = dot_product / (norm_A * norm_B)
    
    return similarity

def get_similarities_to_reference(embeddings_reference, embeddings_test):

    Number_anchors = len(embeddings_reference)
    similarities = [] 
    for k in range(0, Number_anchors):
        if np.array_equal(embeddings_reference[k],embeddings_test) == False: 
            similarities.append(cosine_similarity(embeddings_reference[k], embeddings_test))    
    
    similarities = np.vstack(similarities)
    
    return similarities

# Estimates the median distance per embryo and the the average of all distances 
def get_median_similarities_to_reference(embeddings_reference, embeddings_evaluation):

    N_embryos = len(embeddings_evaluation)
    median_similarities = [None] * N_embryos

    for counter in range(0,N_embryos):
        similarities_eval = get_similarities_to_reference(embeddings_reference, embeddings_evaluation[counter])
        median_similarities[counter] = np.median(similarities_eval, axis=0)

    median_similarities = np.vstack(median_similarities) 


    return median_similarities


# Takes all combinations
def get_ensamble_similarities_to_reference(embeddings_reference, embeddings_evaluation):

    N_embryos = len(embeddings_evaluation)
    similarities = [None] * N_embryos

    for counter in range(0,N_embryos):
        similarities_eval = get_similarities_to_reference(embeddings_reference, embeddings_evaluation[counter])
        similarities[counter] = similarities_eval

    similarities = np.concatenate(similarities, axis=0)

    
    return similarities


# function to get ramdom samples
def generate_unique_integers(N, M):
    # genraes N ramdom numbers between 0 and M
    if N > M + 1:
        raise ValueError("N must be less than or equal to M + 1 to ensure unique integers.")
    
    unique_integers = np.random.choice(np.arange(M+1), N, replace=False)
    return unique_integers

def randomly_choose_elements(original_list, N):
    indices = generate_unique_integers(N, len(original_list)-1)
    new_list = [original_list[i] for i in indices]
    return new_list, indices




def calculate_statistics(ground_truth, predicted_classes):
    # Accuracy: Fraction of correctly classified samples
    accuracy = accuracy_score(ground_truth, predicted_classes)
    
    return accuracy


Adjust matplotlib parameters to save plots as .svg files.

In [None]:
new_rc_params = {'text.usetex': False,
                'svg.fonttype': 'none'}
mpl.rcParams.update(new_rc_params)

## Load paths <a class="anchor" id="second-bullet"></a>

In [None]:
# Input Data : DataSet to analyze and reference information
# Information for trained example models and download links
# can be found in the Twin Network Github repository
# (https://github.com/mueller-lab/TwinNet/tree/main/data)

# The model used in this script is the trained example model
# 'model_trained_more_data' for zebrafish embryos. Change the
# Change the name to fit the Twin Network model version to be used
modelName = 'ModelTN_v2'
data_to_analyze = 'segments_EN_Acquifer'
data_path = ' '

ref_name = 'NORMAL'
eval_names = ['BMP', 'NODAL', 'FGF','PCP', 'RA', 'SSH', 'WNT']

# For calssification
N_samples = 20  # number of random samples
alpha = 0.01    # max p-value
min_frac_diff = 0.3 # minimum fraction of frame deviated from normal
max_N_embryos = 44
n_rep = 5 # number of repetitions


# Set input paths
srcdir                 = data_path + data_to_analyze
dir_segments_ref       = srcdir + '/segments_'+ref_name+'/'



## Output paths
outName = data_to_analyze + '_' + modelName
dir_dst_embeddings =  Path(data_path + '/embeddings_new/embeddings_' + outName + '/')

outpath       = data_path + 'Results/'
outpathDetail = Path(outpath + 'AllGraphs/' + outName + '/')
outNameSufix  = '_' + outName + '.svg'
outFileNameV1 = outpath + 'results' + outNameSufix


srcVideopath = srcdir+'/Representative/aligned/'

np.random.seed(1) # for reproducibility 


Prepare directory to save output files to

In [None]:
outpathDetail.mkdir(parents=True, exist_ok=True)
dir_dst_embeddings.mkdir(parents=True, exist_ok=True)


## Print analysis description
modelpath = os.path.join(data_path, modelName+'.h5')

print('Analizing ' + data_to_analyze + 'with model: ' + modelName)
print(data_to_analyze + ': ' + srcdir)
print(modelName + ' : ' + modelpath)
print('Saving detail results at : ' +str(outpathDetail))    

## Load model <a class="anchor" id="third-bullet"></a>

In [None]:
tn_model_embedding = tools_model.tn_embedding_load(modelpath)

## Calculate embeddings <a class="anchor" id="fourth-bullet"></a>

In [None]:
# Get reference embeddings
All_embeddings_reference  = get_all_embeddings(dir_segments_ref,  tn_model_embedding)

All_embeddings_evaluation = []

for eval_name in eval_names:
    dir_segments_eval      = srcdir + '/segments_'+eval_name+'/'
    All_embeddings_evaluation.append(get_all_embeddings(dir_segments_eval, tn_model_embedding))
    print(f"Analyzing : {len(All_embeddings_evaluation)} embryos against {len(All_embeddings_reference)} reference ones")

## Calculate similarities <a class="anchor" id="sixth-bullet"></a>

In [None]:
All_accuracy = []
for All_embeddings_eval, name in zip(All_embeddings_evaluation, eval_names):

    if max_N_embryos > len(All_embeddings_eval):
        max_N_embryos_i = len(All_embeddings_eval)
    else:
        max_N_embryos_i = max_N_embryos
        

    accuracy = np.full((n_rep, max_N_embryos), np.nan)

    for i  in tqdm(range(n_rep), desc='Processing '+name):
        for N_embryos in np.arange(3, max_N_embryos_i+1, 1):

            ground_truth = np.ones((N_samples,), dtype=int)  # 1 abnormal, for all
            predicted_classes = np.zeros((N_samples,), dtype=int)  # 0 normal, 1 abnormal

            for itt in range(N_samples):
                # get a ramdon sample of N_embryos
                embeddings_reference, indexes_ref = randomly_choose_elements(All_embeddings_reference, N_embryos)
                embeddings_evaluation , indexes_eval = randomly_choose_elements(All_embeddings_eval, N_embryos)

                # get all similarities
                similarities     = get_median_similarities_to_reference(embeddings_reference, embeddings_evaluation)
                similarities_ref = get_median_similarities_to_reference(embeddings_reference, embeddings_reference)

                # Perform the Mann-Whitney U test
                statistic, p_value = mannwhitneyu(similarities, similarities_ref, alternative='less', axis=0)   

                # get percentage of embryo frames different from the control
                frac_diff = sum(1 for x in p_value if x < alpha)
                frac_diff = frac_diff / len(p_value)

                # classify sample_eval
                if frac_diff > min_frac_diff:
                    predicted_classes[itt] = 1

            # get stats fo the classification
            #accuracy[i, N_embryos-1], sensitivity[i, N_embryos-1], precision[i, N_embryos-1], f_score[i, N_embryos-1] = calculate_statistics(ground_truth, predicted_classes)
            accuracy[i, N_embryos-1] = calculate_statistics(ground_truth, predicted_classes)

            #print(f"Analyzing: {N_samples} samples with {N_embryos} embryos, accuracy:{accuracy[i, N_embryos]:.3f}, sensitivity:{sensitivity[i, N_embryos]:.3f}, precision:{precision[i, N_embryos]:.3f}, f_score:{f_score[i, N_embryos]:.3f}")

    # Store data
    All_accuracy.append(accuracy)
                       
    # Plotting 
    N_values = np.arange(1, max_N_embryos_i+1, 1)[2:max_N_embryos_i]
    accuracies = np.nanmean(accuracy[:,2:max_N_embryos_i], axis=0)
    std_accuracies = np.nanstd(accuracy[:,2:max_N_embryos_i], axis=0)
    # Plotting the metrics
    fig, axs = plt.subplots(dpi=300, figsize=(4,3))
    plt.plot(N_values, accuracies, label='Acuracy '+ name)
    plt.fill_between(N_values,accuracies-std_accuracies, accuracies+std_accuracies,  alpha=0.2)

    plt.xlabel('N_embryos')
    plt.ylabel('Metric Value')
    plt.title('Metrics vs N_embryos')
    plt.legend()
    plt.show()
    

In [None]:
# Plot all accuracy
name2plot  = ['BMP', 'PCP', 'FGF', 'SSH',  'NODAL',  'RA',  'WNT']
indexesT = [eval_names.index(name) for name in name2plot]

print(indexesT)

#colors = pl.cm.tab20(np.linspace(0,1,len(indexesT)))
colors = pl.cm.viridis(np.linspace(0,1,len(indexesT)+1))


fig, axs = plt.subplots(dpi=300, figsize=(5,3))
N_values = np.arange(1, max_N_embryos+1, 1)
for i, (idx) in enumerate(indexesT, 0):
    accuracy = All_accuracy[idx]
    name = eval_names[idx]
    
    print(name)
#for i, (accuracy, name) in enumerate(zip(All_accuracy, eval_names), 0):

    accuracies = np.nanmean(accuracy, axis=0)
    std_accuracies = np.nanstd(accuracy, axis=0)
    plt.plot(N_values, accuracies, label=name, color=colors[i+1])
    plt.fill_between(N_values,accuracies-std_accuracies, accuracies+std_accuracies, facecolor=colors[i+1], alpha=0.2)

    
plt.xlim(0, max_N_embryos)
plt.ylim(0, 1)
plt.legend(fontsize='xx-small')
plt.xlabel('# embryos per sample',  fontsize=10)
plt.ylabel('Detection accuracy', fontsize=10)
#plt.xticks(np.arange(0, max_N_embryos, 4), fontsize=6)          
#plt.yticks(np.arange(0, 1, 10), fontsize=6)

outname =  os.path.join(outpath, data_to_analyze+'_accuracy.svg')
plt.savefig(outname, bbox_inches='tight', dpi=300)      
#plt.show()  
plt.close()  

In [None]:
#Plot some examples
# Calculate similarities for N randomly chosen embryos
np.random.seed(1) # for reproducibility 
embeddings_reference, index_ref = randomly_choose_elements(All_embeddings_reference,max_N_embryos)


for i, (idx) in enumerate(indexesT, 0):
    All_embeddings_eval = All_embeddings_evaluation[idx]
    name = eval_names[idx]
    
#for i, (All_embeddings_eval, name) in enumerate(zip(All_embeddings_evaluation, eval_names),0):

    if max_N_embryos > len(All_embeddings_eval):
        N_embryos = len(All_embeddings_eval)
    else:
        N_embryos = max_N_embryos
            
    embeddings_evaluation, index_eval = randomly_choose_elements(All_embeddings_eval, N_embryos)

    print(f"Analyzing {name}: {len(embeddings_evaluation)} embryos against {len(embeddings_reference)} reference ones")
    print(f"    from  : {len(All_embeddings_eval)} embryos against {len(All_embeddings_reference)} reference ones")


    similarities     = get_median_similarities_to_reference(embeddings_reference, embeddings_evaluation)
    similarities_ref = get_median_similarities_to_reference(embeddings_reference, embeddings_reference)

    average = np.mean(similarities, axis=0)
    std_dev = np.std(similarities, axis=0)
    average_ref = np.mean(similarities_ref, axis=0)
    std_dev_ref = np.std(similarities_ref, axis=0)
    
    vals_up = average - std_dev
    vals_down = average + std_dev
    vals_up_ref = average_ref - std_dev_ref
    vals_down_ref = average_ref + std_dev_ref

    
    # Perform the Mann-Whitney U test
    statistic, p_value = mannwhitneyu(similarities, similarities_ref, alternative='less', axis=0)
                       
    frac_diff = sum(1 for x in p_value if x < alpha)
    frac_diff = 100 * frac_diff / len(p_value)

    print(f"    frac_diff: {frac_diff:.3f} for {name}")

    x = np.array(range(0,similarities.shape[1]))*2.0
    
    fig, axs = plt.subplots(dpi=300, figsize=(4,2))
    
    plt.scatter(x, average, label=f"{name}: {frac_diff:.2f} % diff", color=colors[i+1], marker='.', s=1)
    plt.fill_between(x, vals_up, vals_down, alpha=0.2, facecolor=colors[i+1])
    plt.ylim(0.5, 1.0)
    plt.xlim(0, 1440)
    plt.xticks(np.arange(0, 1600, 200), fontsize=6)
    plt.yticks(np.arange(0.5, 1.01, 0.1), fontsize=6)
    plt.legend()
    plt.xlabel('Time (min)',  fontsize=8)
    plt.ylabel('Cosine similarity ϕ', fontsize=8)
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    
    outname =  os.path.join(outpath, data_to_analyze+'_'+name+'_similarity.svg')
    plt.savefig(outname, bbox_inches='tight', dpi=300)      
    #    plt.show()  
    plt.close() 
    
    fig, axs = plt.subplots(dpi=300, figsize=(4,1))
    plt.scatter(x, np.log10(p_value), color=colors[i+1], marker='.', s=1)
    plt.axhline(y=np.log10(alpha), color='r', linestyle='--', label=f"p-value: {alpha:.3f}")
    plt.xlabel('Time (min)',  fontsize=8)
    plt.ylabel('ln(p-value)', fontsize=8) 
    plt.ylim(-20, 0)
    plt.xlim(0, 1440)
    plt.xticks(np.arange(0, 1600, 200), fontsize=6)
    plt.yticks(np.arange(-20, 0, 5), fontsize=6)
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)

    outname =  os.path.join(outpath, data_to_analyze+'_'+name+'_p_values.svg')
    plt.savefig(outname, bbox_inches='tight', dpi=300)      
    #    plt.show()  
    plt.close() 
    
    # save plo refrence
    if i == 0:
        fig, axs = plt.subplots(dpi=300, figsize=(4,2))
        plt.scatter(x, average_ref, label='Untreated', color="#c2df23", marker='.', s=1)
        plt.fill_between(x, vals_up_ref, vals_down_ref, alpha=0.2, color="#c2df23")
        plt.ylim(0.5, 1.0)
        plt.xlim(0, 1440)
        plt.xticks(np.arange(0, 1600, 200), fontsize=6)
        plt.yticks(np.arange(0.5, 1.01, 0.1), fontsize=6)
        plt.legend()
        plt.xlabel('Time (min)',  fontsize=8)
        plt.ylabel('Cosine similarity', fontsize=8)
        plt.gca().spines['top'].set_visible(False)
        plt.gca().spines['right'].set_visible(False)
        outname =  os.path.join(outpath, data_to_analyze+'_Untreated_similarity.svg')
        plt.savefig(outname, bbox_inches='tight', dpi=300)      
        #    plt.show()  
        plt.close()        
        
        

In [None]:
from matplotlib.animation import FuncAnimation

eval_names_Title = ['-BMP', '-NODAL', '-FGF','-PCP', '+RA', '-SSH', '-WNT']

# Calculate similarities for N randomly chosen embryos
np.random.seed(1) # for reproducibility 

# For each phenotype
for i, (idx) in enumerate(indexesT, 0):


    All_embeddings_eval = All_embeddings_evaluation[idx]
    nameId = eval_names[idx]
    titleId = eval_names_Title[idx]
    
    if nameId == 'SSH':
        nameId = 'SHH'

    if max_N_embryos > len(All_embeddings_eval):
            N_embryos = len(All_embeddings_eval)
    else:
        N_embryos = max_N_embryos

    embeddings_evaluation, indexes = randomly_choose_elements(All_embeddings_eval, N_embryos)

    print(f"Analyzing {nameId}: {len(embeddings_evaluation)} embryos against {len(embeddings_reference)} reference ones")
    print(f"    from  : {len(All_embeddings_eval)} embryos against {len(All_embeddings_reference)} reference ones")

    similarities     = get_median_similarities_to_reference(embeddings_reference, embeddings_evaluation)

    average = np.mean(similarities, axis=0)
    std_dev = np.std(similarities, axis=0)    
    vals_up = average - std_dev
    vals_down = average + std_dev


    folder_path = srcVideopath+nameId
    folder_path_ref = srcVideopath+'NORMAL/'
    out_name_video = outpath+nameId+'_aligned.mp4'

    from matplotlib.animation import FuncAnimation

    fig, axs = plt.subplots(1, 3, dpi=300, figsize=(12, 4))

    # Get a list of image files in the folder
    tiff_images_list = sorted([os.path.join(folder_path, filename) for filename in os.listdir(folder_path) if filename.lower().endswith((".tiff", ".tif"))])
    tiff_images_list_ref = sorted([os.path.join(folder_path_ref, filename) for filename in os.listdir(folder_path_ref) if filename.lower().endswith((".tiff", ".tif"))])

    def update(frame):

        axs[0].clear()
        axs[0].scatter(x, average, color=colors[i+1], marker='.', s=1)
        axs[0].fill_between(x, vals_up, vals_down, alpha=0.2, facecolor=colors[i+1])
        axs[0].set_ylim(0.5, 1.0)
        axs[0].set_xlim(0, 1440)
        axs[0].set_xticks(np.arange(0, 1600, 200))
        axs[0].set_yticks(np.arange(0.5, 1.01, 0.1))
        axs[0].set_xlabel('Time (min)', fontsize=8)
        axs[0].set_ylabel('Cosine similarity ϕ', fontsize=8)
        axs[0].spines['top'].set_visible(False)
        axs[0].spines['right'].set_visible(False)
        axs[0].axvline(x=2*frame, color='red', linestyle='--', label='Perpendicular Line')
        #axs[0].legend()

        # Load and display different images for each frame
        image = cv2.imread(tiff_images_list[frame])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image_ref = cv2.imread(tiff_images_list_ref[frame])
        image_ref = cv2.cvtColor(image_ref, cv2.COLOR_BGR2RGB)

        axs[1].clear()
        axs[1].imshow(image_ref)
        axs[1].set_title('Untreated', fontsize=10)

        axs[2].clear()
        axs[2].imshow(image)
        axs[2].set_title(nameId, fontsize=10)
        axs[2].set_title(titleId, fontsize=10, style='italic')


        # Remove ticks from the image subplots
        axs[1].set_xticks([])
        axs[1].set_yticks([])
        axs[2].set_xticks([])
        axs[2].set_yticks([])

        # Adjust the layout to fill the upper panel
        plt.subplots_adjust(top=0.8, bottom=0.2)


    animation = FuncAnimation(fig, update, frames=len(tiff_images_list), interval=50)

    # Save the animation as a video
    animation.save(out_name_video, writer='ffmpeg', dpi=300)

    plt.show()

    plt.close()

In [None]:
# get representative example
np.random.seed(1) # for reproducibility 
for i, (idx) in enumerate(indexesT, 0):
    All_embeddings_eval = All_embeddings_evaluation[idx]
    name = eval_names[idx]
    
#for i, (All_embeddings_eval, name) in enumerate(zip(All_embeddings_evaluation, eval_names),0):

    if max_N_embryos > len(All_embeddings_eval):
        N_embryos = len(All_embeddings_eval)
    else:
        N_embryos = max_N_embryos
            
    embeddings_evaluation, indexes = randomly_choose_elements(All_embeddings_eval, N_embryos)

    print(f"Analyzing {name}: {len(embeddings_evaluation)} embryos against {len(embeddings_reference)} reference ones")
    print(f"    from  : {len(All_embeddings_eval)} embryos against {len(All_embeddings_reference)} reference ones")

    similarities     = get_median_similarities_to_reference(embeddings_reference, embeddings_evaluation)
    

    average = np.mean(similarities, axis=0)
    std_dev = np.std(similarities, axis=0)    
    vals_up = average - std_dev
    vals_down = average + std_dev

    errors = np.linalg.norm(similarities - average, axis=1)
    closest_row_index = np.argmin(errors)
    
    dir_segments_eval      = srcdir + '/segments_'+name+'/'
    subfolders = sorted(item for item in os.listdir(dir_segments_eval) if os.path.isdir(os.path.join(dir_segments_eval, item)))

    print(f"    Closest embryo to average: {subfolders[indexes[closest_row_index]]}")

    
    # Perform the Mann-Whitney U test
    statistic, p_value = mannwhitneyu(similarities, similarities_ref, alternative='less', axis=0)
                       
    frac_diff = sum(1 for x in p_value if x < alpha)
    frac_diff = 100 * frac_diff / len(p_value)

    print(f"    frac_diff: {frac_diff:.3f} for {name}")

    x = np.array(range(0,similarities.shape[1]))*2.0
    
    fig, axs = plt.subplots(dpi=300, figsize=(4,2))
    
    plt.scatter(x, average, label=f"{name}: {frac_diff:.2f} % diff", color=colors[i+1], marker='.', s=1)
    plt.fill_between(x, vals_up, vals_down, alpha=0.2, facecolor=colors[i+1])
    plt.plot(x, similarities[closest_row_index], lw=0.5, color='black')
    plt.ylim(0.5, 1.0)
    plt.xlim(0, 1440)
    plt.xticks(np.arange(0, 1600, 200), fontsize=6)
    plt.yticks(np.arange(0.5, 1.01, 0.1), fontsize=6)
    plt.legend()
    plt.xlabel('Time (min)',  fontsize=8)
    plt.ylabel('Cosine similarity', fontsize=8)
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    
    outname =  os.path.join(outpath, data_to_analyze+'_'+name+'_similarity.svg')
    #plt.savefig(outname, bbox_inches='tight', dpi=300)      
    plt.show()  
    #plt.close() 
    
        
   