In [1]:
import matplotlib.pyplot as plt
from natsort import natsorted
from glob import glob
from utils import *
import pandas as pd
import numpy as np
import os

# Keep data until time point
keep_until=200

# Time in min per frame (data point)
time_per_frame = 2


In [2]:

# Path to store reports
report_repository_dir = './report_repository'

# List of genotypes
genotype_list = ['FTD', 'WT']

for genotype in genotype_list:
    for aquisition in os.listdir(os.path.join(report_repository_dir, genotype)):
        # print(aquisition)
        tmp_list_scenes = []
        if aquisition.find('_figures')==-1 and aquisition.find('_data')==-1 and  aquisition.find('DS_Store')==-1:
            # Preparing the output paths to store data
            all_figures_output_dir = os.path.join(report_repository_dir, genotype, aquisition+'_figures', 'all_scenes')
            mean_figures_output_dir = os.path.join(report_repository_dir, genotype, aquisition+'_figures', 'scenes_mean')
            data_output_dir    = os.path.join(report_repository_dir, genotype, aquisition+'_data')

            # Creating a directory to store data and figures
            os.makedirs(all_figures_output_dir, exist_ok = True)
            os.makedirs(mean_figures_output_dir, exist_ok = True)
            os.makedirs(data_output_dir   , exist_ok = True)

            # Loading csv files for all scenes in this acquisition file
            scenes_csv_paths = natsorted(glob(os.path.join(report_repository_dir, genotype, aquisition,'scene_*.csv')))

            for path in scenes_csv_paths:
                scence_name = os.path.splitext(os.path.basename(path))[0]
                tmp_list_scenes.append(int(scence_name[6:len(scence_name)]))

            # Computing the mean and std data frames for the current acquisition
            acquisition_mean, acquisition_std = csv_list_to_mean_std(scenes_csv_paths, aquisition, data_output_dir)

            # Plotting all scenes per acquisition
            plot_all_scenes(scenes_csv_paths, all_figures_output_dir)

            # Plotting the scenes mean data per acquisition
            plot_mean_scenes(acquisition_mean, acquisition_std, aquisition, mean_figures_output_dir)

In [3]:

# List of genotypes
genotype_list = ['FTD', 'WT']

for genotype in genotype_list:
    # Preparing the output paths to store data
    all_figures_output_dir = os.path.join(report_repository_dir, genotype+'_figures', 'all_scenes')
    mean_figures_output_dir = os.path.join(report_repository_dir, genotype+'_figures', 'scenes_mean')
    data_output_dir    = os.path.join(report_repository_dir, genotype+'_data')

    # Creating a directory to store data and figures
    os.makedirs(all_figures_output_dir, exist_ok = True)
    os.makedirs(mean_figures_output_dir, exist_ok = True)
    os.makedirs(data_output_dir   , exist_ok = True)

    # Loading csv files for all scenes in this acquisition file
    acquisition_csv_paths = natsorted(glob(os.path.join(report_repository_dir, genotype, '*_data','*_mean.csv')))

    # Computing the mean and std data frames for the current acquisition
    genotype_mean, genotype_std = csv_list_to_mean_std(acquisition_csv_paths, genotype, data_output_dir)

    # Plotting all scenes per acquisition
    plot_all_scenes(acquisition_csv_paths, all_figures_output_dir)

    # Plotting the scenes mean data per acquisition
    plot_mean_scenes(genotype_mean, genotype_std, genotype, mean_figures_output_dir)

In [4]:

# List of genotypes
genotype_list = ['FTD', 'WT']

csv_sample_file = pd.read_csv( os.path.join(report_repository_dir, 'WT_data', 'WT_mean.csv') )

# Retrieving the colum names
col_names = csv_sample_file.keys()

for i in range(1, len(col_names)):
    max_data = 0
    plt.figure(figsize=(8,6))
    for genotype in genotype_list:
        # Preparing the output paths to store data
        all_figures_output_dir = os.path.join(report_repository_dir, 'genotypes_comparison_figures')

        # Creating a directory to store data and figures
        os.makedirs(all_figures_output_dir, exist_ok = True)

        # Loading csv file for the mean data
        mean_genotype_csv = os.path.join(report_repository_dir, genotype+'_data',genotype+'_mean.csv')

        # Loading csv file for the std data
        std_genotype_csv = os.path.join(report_repository_dir, genotype+'_data',genotype+'_std.csv')

        # Reading the csv files
        mean_genotype, std_genotype = pd.read_csv(mean_genotype_csv), pd.read_csv(std_genotype_csv)

        # Looking for max value (for nice plots)
        tmp_max = np.max(list(mean_genotype[col_names[i]]))
        if tmp_max>max_data: max_data = tmp_max

        if genotype=='FTD': color = 'tab:green'
        elif genotype=='WT': color = 'tab:gray'

        plt.plot(list(mean_genotype[col_names[0]]), list(mean_genotype[col_names[i]]), color=color, label=genotype)
        plt.fill_between(list(mean_genotype[col_names[0]]),
                         list(mean_genotype[col_names[i]]-std_genotype[col_names[i]]),
                         list(mean_genotype[col_names[i]]+std_genotype[col_names[i]]),
                         alpha=0.1, linewidth=0, color=color)
    
    plt.xlabel("Time [min]")
    plt.xlim((0, np.max(list(csv_sample_file[col_names[0]]))))
    plt.ylim((0, max_data+0.2*max_data))

    plt.yticks(fontsize=12, alpha=.7)
    plt.ylabel(col_names[i])
    plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')

    plt.title(col_names[i])
    # Lighten borders
    plt.gca().spines["top"].set_alpha(.0)
    plt.gca().spines["bottom"].set_alpha(.3)
    plt.gca().spines["right"].set_alpha(.0)
    plt.gca().spines["left"].set_alpha(.3)
    plt.grid(axis='y', alpha=.3)

    plt.tight_layout()
    
    plt.savefig(os.path.join(all_figures_output_dir, str(i)+'.pdf'), dpi=200)
    plt.close()

In [5]:
# FTD-mutants = GRN + C9
ftd_acquisitions = os.path.join('./report_repository', 'FTD', '*_data','*_mean.csv')
wt_acquisitions = os.path.join('./report_repository', 'WT', '*_data','*_mean.csv')
data_paths = glob(ftd_acquisitions)+ glob(wt_acquisitions)

# print(data_paths)

ftd_wt_list = []
# genotypes
for path in data_paths:
    # Reading the csv file
    csv_df = pd.read_csv(path)

    # Retrieving the colum names
    col_names = list(csv_df.keys())

    col_names = col_names[1:len(col_names)]

    col_names.append('Genotype')

    df = csv_df.mean()
    tmp_data_list = list(df.values)

    tmp_data_list = tmp_data_list[1:len(tmp_data_list)]
    
    if path.find('FTD') != -1: tmp_data_list.append('FTD mutant')
    else: tmp_data_list.append('WT')

    ftd_wt_list.append(tmp_data_list)


col_names[0] = "$\\text{Aggregates ratio (area eaten/cell count) } (µm^2)$"
col_names[1] = "$\\text{Aggregates ratio (area eaten/cell area)}$"
col_names[2] = "$\\text{Aggregates count } (n)$"
col_names[3] = "$\\text{Aggregates total area } (µm^2)$"
col_names[4] = "$\\text{Mean speed tracking } (µm/min)$"
col_names[5] = "$\\text{Mean area tracking } (µm^2)$"
col_names[6] = "$\\text{Mean cell area } (µm^2)$"
col_names[7] = "$\\text{Total area } (µm)$"
col_names[8] = "$\\text{Cell count } (n)$"
col_names[9] = "$\\text{Total movement tracking } (µm)$"
final_data = pd.DataFrame(data=ftd_wt_list,columns=col_names)
# stack the four DataFrames horizontally
final_data

Unnamed: 0,$\text{Aggregates ratio (area eaten/cell count) } (µm^2)$,$\text{Aggregates ratio (area eaten/cell area)}$,$\text{Aggregates count } (n)$,$\text{Aggregates total area } (µm^2)$,$\text{Mean speed tracking } (µm/min)$,$\text{Mean area tracking } (µm^2)$,$\text{Mean cell area } (µm^2)$,$\text{Total area } (µm)$,$\text{Cell count } (n)$,$\text{Total movement tracking } (µm)$,Genotype
0,6.141787,0.010375,71.064356,170.900605,0.823453,766.203091,593.414058,16306.854807,28.027503,44.221151,FTD mutant
1,5.333506,0.010862,79.325083,126.514391,0.64525,651.028463,491.132621,11760.605293,24.550605,31.295564,FTD mutant
2,4.376694,0.008529,58.478548,158.330904,1.132272,717.905953,519.555495,18420.338319,36.066007,82.069016,FTD mutant
3,5.425472,0.009464,88.58494,212.812781,0.823481,774.466343,568.567576,22062.931476,39.916623,67.541643,FTD mutant
4,5.447428,0.011084,99.658663,162.898002,0.750205,662.917271,492.080582,14327.572964,29.239604,43.618391,FTD mutant
5,2.795431,0.007212,95.776238,71.738175,0.600774,521.075684,390.83959,10004.253471,25.909406,31.627279,WT
6,2.944248,0.008742,131.861634,138.829017,0.828722,479.58663,338.363548,15310.816155,46.182673,81.083953,WT
7,3.688197,0.008613,80.105999,140.656284,0.661341,589.065601,436.073171,16228.965022,37.578334,51.182455,WT
8,3.776721,0.00957,112.079562,139.505335,0.845332,553.607247,401.070466,14292.495849,37.243281,69.178032,WT
9,3.562611,0.008384,57.598267,112.017324,0.768644,573.652624,412.546825,13562.957313,33.50198,53.496401,WT


In [7]:
from scipy import stats
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import os

os.makedirs('./final_results', exist_ok= True)

col_names = list(final_data.keys())

for key in range(len(col_names)-1):
    layout = go.Layout(
        autosize=False,
        width=400,
        height=500
    )

    fig = go.Figure(layout=layout)
    for geno in ['WT', 'FTD mutant']:

        if geno == 'WT': color ='rgb(7,40,30)'
        else: color = '#3D9970' 
        fig.add_trace(go.Box(
            y=final_data[final_data['Genotype'] == geno][col_names[key]],
            name=geno,
                
            jitter=0.3,
            pointpos=-1.8,
            boxpoints='all', # represent all points
            marker_color=color,
            line_color=color
        ))

    fig = add_p_value_annotation(fig, [[0,1]])

    fig.update_xaxes(tickangle=0)
    fig.update_layout(
        # yaxis_range = [0,30],
        xaxis_title="$\\text{Genotypes}$",
        yaxis_title=col_names[key],
        showlegend=False,
        # boxmode='group' # group together boxes of the different traces for each value of x
    )

    fig.show()
    fig.write_image('./final_results/'+str(key)+'.pdf')