# Evaluations of variability in self-similarity matrices: Zebrafish (Danio rerio)

# Table of Contents
* [General](#first-bullet)
* [Load paths](#second-bullet)
* [Load model](#third-bullet)
* [Calculate embeddings and similarities](#fourth-bullet)

## General <a class="anchor" id="first-bullet"></a>

General imports and class definitions

In [None]:
%load_ext autoreload
%autoreload 2

Import Python packages

In [None]:
import glob
from IPython.display import HTML, Image,SVG,display
import matplotlib as mpl
import pandas as pd
from pathlib import Path
import sys
import os
import time
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable


from twinnet_tools.tnautoregression import TNToolsAutoregression
from twinnet_tools.tngeneral import TNToolsGeneral
from twinnet_tools.tninference import TNToolsEmbeddings
from twinnet_tools.tninference import TNToolsImages
from twinnet_tools.tninference import TNToolsSimilarities
from twinnet_tools.tnmodel import TNToolsNetwork
from twinnet_tools.tnplot import TNToolsPlot

from twinnet_tools.tnconfig import ProjectConfig

Load config file and paths from config file

In [None]:
config = ProjectConfig("twinnet_config")

In [None]:
srcpath = '../TwinNet-main/'
dir_root_scripts = os.path.join(srcpath, config.json["dir_scripts"])

path_models_zebrafish = '../temperature_zFish/models/'
model_names = ['Model_zfish_var01', 'Model_zfish_var02', 'Model_zfish_var03','Model_zfish_var04','Model_zfish_var05','Model_zfish_var06', 'Model_zfish_var07', 'Model_zfish_var08', 'Model_zfish_var09', 'Model_zfish_var10']

config_paths_script = config.json["Autoregression_zebrafish"]
dir_data = '../temperature_zFish/testData/'
path_src_data_test_json = os.path.join(dir_data,'test_embryos_sorted.json')
path_dst = '../temperature_zFish/results_Autoregression_variability/'


num_images = 720
time_interval = 2 #in minutes

Prepare class instances

In [None]:
tools_autoregression = TNToolsAutoregression()
tools_general = TNToolsGeneral()
tools_model = TNToolsNetwork()
tools_plot = TNToolsPlot()

Adjust matplotlib parameters to save plots as .svg files.

In [None]:
new_rc_params = {'text.usetex': False,
                'svg.fonttype': 'none'}
mpl.rcParams.update(new_rc_params)

## Load paths <a class="anchor" id="second-bullet"></a>

Load test data paths

In [None]:
embryos_test = [f"{dir_data}/{p[2:]}/".replace('//', '/').replace('./', '') 
                for p in sorted(tools_general.fn_json_load(
                    path_src_data_test_json)['normal_bright_complete'])
               ]

In [None]:
#embryo_test = embryos_test[2]
#print(embryo_test1)

Specify directory to outputs

In [None]:
#dir_dst_similarities = f"{path_dst}/similarities"
#dir_dst_similarities_path = Path( dir_dst_similarities )
#dir_dst_similarities_path.mkdir(parents=True, exist_ok=True)

## Load models <a class="anchor" id="third-bullet"></a>

In [None]:
all_models = []

for model_name in sorted(model_names):
    path_model_zebrafish = os.path.join(path_models_zebrafish, model_name+".h5")
    print(path_model_zebrafish)
    tn_model_embedding = tools_model.tn_embedding_load(path_model_zebrafish)
    all_models.append(tn_model_embedding)

## Calculate embeddings and similarities <a class="anchor" id="fourth-bullet"></a>

In [None]:
all_similarities = []
all_paths_imgs = []

start_time = time.time()

for embryo_test in embryos_test:
    sims = []
    paths_i = []
    for counter, tn_model_embedding in enumerate(all_models):
        print(f"Calculating: {embryo_test} with model {counter}")
        similarities_test, paths_imgs = tools_autoregression.similarities_self_calculate(embryo_test, tn_model_embedding)
        sims.append(similarities_test)
        paths_i.append(paths_imgs)
        
    all_similarities.append(sims) 
    all_paths_imgs.append(paths_i)
    
end_time = time.time()
processing_time = end_time - start_time    
print(processing_time)

Transform similarities to 2D arrays

In [None]:
# function to plot 2d matrix and exprot svg

def plot_similarity_2d(xs_grid, ys_grid, zs_grid_plot, time_interval, outpath, maxVal):
        """
        Plot similarities of images from a time-series image sequence
        of an embryo as two-dimensional matrix.

        Parameters
        ----------

        intervals: int/float
            Time intervals (min), at which test embryo images
            were acquired. Use this
            to adjust axis labels to be labelled with minutes.
            
        outpath: string
            Path to save the svg image
        """
        #print(f" min {np.amin(zs_grid_plot)}, max {np.amax(zs_grid_plot)} ")
        
        num_imgs_test = len(zs_grid_plot[0]) + 1
        plot_atp_min_correction = time_interval
        plot_xy_ticks = list(range(0, num_imgs_test, 100))

        plot_colors_hlf = plt.cm.viridis  #mpl.colormaps['viridis'] 
        plot_fig_dpi = 300
        plot_fig_size_square = (4, 4)
        plot_fontsize_small = 8
        plot_fontsize_large = 6
        
        # Figure
        f, ax = plt.subplots(dpi=plot_fig_dpi,
                             figsize=plot_fig_size_square)
        f.tight_layout(rect=[0.0, 0.0, 0.8, 1.0])

        # Plot
        ax.imshow(zs_grid_plot,
                  cmap=plot_colors_hlf, 
                  vmin=0, vmax=maxVal)

        # Labels and titles
        ax.tick_params(axis='both',
                       which='both',
                       labelsize=plot_fontsize_small)
        ax.set_xlabel('Time (min)',
                      fontsize=plot_fontsize_large)
        ax.set_ylabel('Time (min)',
                      fontsize=plot_fontsize_large)
        ax.set_xlim(left=0,
                    right=num_imgs_test-1)
        ax.set_ylim(bottom=0,
                    top=num_imgs_test-1)
        ax.set_xticks(plot_xy_ticks,
                      [str(a * plot_atp_min_correction)
                       for a in plot_xy_ticks],
                      fontsize=plot_fontsize_small)
        ax.set_yticks(plot_xy_ticks,
                      [str(a * plot_atp_min_correction)
                       for a in plot_xy_ticks],
                      fontsize=plot_fontsize_small)

        divider = make_axes_locatable(ax)
        sm = plt.cm.ScalarMappable(cmap=plot_colors_hlf, norm=plt.Normalize(vmin=0, vmax=maxVal))
        cax = divider.append_axes("right",
                                  size="5%",
                                  pad=0.05)
        cax.tick_params(axis='both',
                        which='both',
                        labelsize=plot_fontsize_small)
        cbar = f.colorbar(sm,
                          cmap=plot_colors_hlf,
                          cax=cax)
        cbar.ax.set_ylabel('Cosine similarity ϕ',
                           fontsize=plot_fontsize_large)

        plt.savefig(outpath)
        plt.close()
        
        
def get_arrays_stats(similarities):
    
    # Stack the arrays along a new axis (axis=0)
    stacked_arrays = np.stack(similarities, axis=0)

    print(f"Embryos similarities: {stacked_arrays.shape} ")

    # Calculate the average array while handling NaN values
    average_array = np.nanmean(stacked_arrays, axis=0)

    # Calculate the median array while handling NaN values
    median_array = np.nanmedian(stacked_arrays, axis=0)

    # Calculate the min array while handling NaN values
    min_array = np.nanmin(stacked_arrays, axis=0)

    # Calculate the max array while handling NaN values
    max_array = np.nanmax(stacked_arrays, axis=0)

    # Calculate the standard deviation while handling NaN values
    std_array = np.nanstd(stacked_arrays, axis=0)

    # Calculate the coefficient of variation (CV) in percentage
    cv_array = (std_array / average_array)

    return average_array, median_array, min_array, max_array, std_array, cv_array

def export_images_stats(xs_grid, ys_grid, embryo_similarities_list, path_dst, name, time_interval):
 
    average_array, median_array, min_array, max_array, std_array, cv_array = get_arrays_stats(embryo_similarities_list)

    # Export images
    path_save=f"{path_dst}/{name}_avg_similarities.svg"
    plot_similarity_2d(xs_grid, ys_grid, average_array, time_interval, path_save, 1.0)
    path_save=f"{path_dst}/{name}_median_similarities.svg"
    plot_similarity_2d(xs_grid, ys_grid, median_array, time_interval, path_save, 1.0)
    path_save=f"{path_dst}/{name}_min_similarities.svg"
    plot_similarity_2d(xs_grid, ys_grid, min_array, time_interval, path_save, 1.0)
    path_save=f"{path_dst}/{name}_max_similarities.svg"
    plot_similarity_2d(xs_grid, ys_grid, max_array, time_interval, path_save, 1.0)
    path_save=f"{path_dst}/{name}_std_similarities.svg"
    plot_similarity_2d(xs_grid, ys_grid, std_array, time_interval, path_save, 1.0)
    path_save=f"{path_dst}/{name}_cv_similarities.svg"
    plot_similarity_2d(xs_grid, ys_grid, cv_array, time_interval, path_save, 2.0)

    return average_array, median_array, min_array, max_array, std_array, cv_array


def get_closest_element_array(stacked_arrays, average_array):

    # Calculate the absolute differences between each array in stacked_arrays and average_array
    differences = np.abs(stacked_arrays - average_array)

    # Sum the absolute differences along both axes to get the Manhattan distances
    distances = np.sum(differences, axis=(1, 2)) 

    # Normalize distances by the total number of elements in each 2D array
    total_elements_per_array = stacked_arrays.shape[1] * stacked_arrays.shape[2]
    distances = distances / total_elements_per_array


    # Find the index of the array that is closest to the average
    closest_index = np.argmin(distances)
    
    
    return closest_index



In [None]:
print(f"Similarities calculated for: {len(all_similarities)} embryo with {len(all_similarities[0])} models")
#from skimage.filters import threshold_triangle
all_similarities_array = []


for counterEmbryo, embryo_sim in enumerate(all_similarities):
    simsEmbryo = []
    for counterModel, similarities_test in enumerate(embryo_sim):
        num_imgs = len(all_paths_imgs[counterEmbryo][counterModel])
        #print(f"Embryo: {counterEmbryo} model {counterModel} : {num_imgs} images")
        if num_imgs == num_images :
            xs_grid, ys_grid, similarities_test_array = tools_autoregression.fn_2d_sims_to_arrays(
                similarities_test,
                num_imgs,
                square=True
            )
            
            # Create a mask for NaN values
            #nan_mask = np.isnan(similarities_test_array)
            # Calculate Triangle threshold for non-NaN values
            #threshold_value = threshold_triangle(similarities_test_array[~nan_mask])
            # Set elements below the threshold to zero
            #similarities_test_array[similarities_test_array < threshold_value] = 0

            simsEmbryo.append(np.array(similarities_test_array)) 
            
        else:
            print("ERROR in number of images!!!!!!!!!!!!!!!")

    all_similarities_array.append(simsEmbryo)        

print(f"2D Similarities calculated for: {len(all_similarities_array)} embryo with {len(all_similarities_array[0])} models")    

In [None]:
# Get average ensamble similarities for each embryo

avg_similarities = []
std_similarities = []
cv_similarities = []

xs = list(range(1, num_images))
ys = list(range(1, num_images))
xs_grid, ys_grid = np.meshgrid(xs, ys)

out_path_AllEmbryos = f"{path_dst}/AllEmbryos/"

for counterEmbryo, embryo_similarities_list in enumerate(all_similarities_array):
    
    # Export calculate stats and image
    embryoId = f"embryo_{counterEmbryo:03d}"
    average_array, median_array, min_array, max_array, std_array, cv_array = export_images_stats(xs_grid, ys_grid, embryo_similarities_list, out_path_AllEmbryos, embryoId, time_interval)

    # store data
    avg_similarities.append(average_array)
    std_similarities.append(std_array)
    cv_similarities.append(cv_array)
    print(f"Embryo: {counterEmbryo}: min {np.amin(std_array)}, max {np.amax(std_array)} ")
    

    
    #print(f"Embryo: {counterEmbryo}: {average_array.shape} ")
 

In [None]:
# Get average similarities over all embryos
stacked_arrays = np.stack(avg_similarities, axis=0)
average_array, median_array, min_array, max_array, std_array, cv_array = export_images_stats(xs_grid, ys_grid, avg_similarities, path_dst, "all_embryos", time_interval)
