# Forewords

Assembling supplementary tables:

- S3: raw measurement values for all samples
- S4: aggregated DEA results from all comparisons

# Imports and definitions

In [1]:
import os 
import sys
from pathlib import Path

import pandas as pd
import numpy as np
from dotmap import DotMap



In [2]:
import matplotlib as mpl 
import matplotlib.pyplot as plt
import seaborn as sns

# Load

In [3]:
DATA = DotMap()

## Params

In [4]:
condition_renaming = {
    'mTORi_ESC': 'mTORi_ESC',
    'mTORi_ICM': 'mTORi_Blastocyst-Polar',
    'diapaused_ICM': 'diapaused_Blastocyst-Polar',
    'mTORi_TSC': 'mTORi_TSC',
    'mTORi_TE': 'mTORi_Blastocyst-Mural',
    'diapaused_TE': 'diapaused_Blastocyst-Mural',
}

## Raw

In [6]:
path = "../../miRNA_ivv_ivt_DEA/datasets/metadata.tsv"
samples_codes_and_details = pd.read_csv(path, sep="\t", index_col=None, header=None)

display(samples_codes_and_details)

# from this file content I could manually build the following dicts:
ivv_codes_to_name = {
    'di1':'Blastocyst-Polar_diapaused_rep1',
    'di2':'Blastocyst-Polar_diapaused_rep2',
    'di3':'Blastocyst-Polar_diapaused_rep3',
    'dt1':'Blastocyst-Mural_diapaused_rep1',
    'dt2':'Blastocyst-Mural_diapaused_rep2',
    'dt3':'Blastocyst-Mural_diapaused_rep3',
    'mi1':'Blastocyst-Polar_mTORi_rep1',
    'mi2':'Blastocyst-Polar_mTORi_rep2',
    'mi3':'Blastocyst-Polar_mTORi_rep3',
    'mt1':'Blastocyst-Mural_mTORi_rep1',
    'mt2':'Blastocyst-Mural_mTORi_rep2',
    'mt3':'Blastocyst-Mural_mTORi_rep3',
    'wi1':'Blastocyst-Polar_WT_rep1',
    'wi2':'Blastocyst-Polar_WT_rep2',
    'wt1':'Blastocyst-Mural_WT_rep1',
    'wt2':'Blastocyst-Mural_WT_rep2',
}


ivt_codes_to_name = {
    'tp1':'TSC_mTORi_rep1',
    'tp2':'TSC_mTORi_rep2',
    'tp3':'TSC_mTORi_rep3',
    'tw1':'TSC_WT_rep1',
    'tw2':'TSC_WT_rep2',
    'tw3':'TSC_WT_rep3',
    'ep1':'ESC_mTORi_rep1',
    'ep2':'ESC_mTORi_rep2',
    'ep3':'ESC_mTORi_rep3',
    'ew1':'ESC_WT_rep1',
    'ew2':'ESC_WT_rep2',
    'ew3':'ESC_WT_rep3',
}

DATA['raw']['ivv']['renamer'] = ivv_codes_to_name
DATA['raw']['ivt']['renamer'] = ivt_codes_to_name

Unnamed: 0,0,1,2,3,4,5,6,7
0,index,id,sample,celltype,condition,replicate,experiment,cond_ct
1,0,tw1,wtTS1,TS,wt,R1,ivt,wt_TS
2,1,tp1,pausedTS1,TS,paused,R1,ivt,paused_TS
3,2,tw2,wtTS2,TS,wt,R2,ivt,wt_TS
4,3,tp2,pausedTS2,TS,paused,R2,ivt,paused_TS
5,4,tw3,wtTS3,TS,wt,R3,ivt,wt_TS
6,5,tp3,pausedTS3,TS,paused,R3,ivt,paused_TS
7,6,ew1,wtE141,E14,wt,R1,ivt,wt_E14
8,7,ep1,pausedE141,E14,paused,R1,ivt,paused_E14
9,8,ew2,wtE142,E14,wt,R2,ivt,wt_E14


In [8]:
path = "../../miRNA_ivv_ivt_DEA/datasets/raw_counts_noDup_FULL.tsv" 
full_table_counts = pd.read_csv(path, header=0, index_col=None, sep="\t")

ivt_table_counts = full_table_counts.loc[:, ['#miRNA']+list(DATA['raw']['ivt']['renamer'].keys())].copy()
ivv_table_counts = full_table_counts.loc[:, ['#miRNA']+list(DATA['raw']['ivv']['renamer'].keys())].copy()


DATA['raw']['ivv']['cts'] = ivv_table_counts
DATA['raw']['ivt']['cts'] = ivt_table_counts


In [9]:
DATA['raw']['ivv']['cts'].head(3)

Unnamed: 0,#miRNA,di1,di2,di3,dt1,dt2,dt3,mi1,mi2,mi3,mt1,mt2,mt3,wi1,wi2,wt1,wt2
0,mmu-let-7a-1-3p,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,mmu-let-7a-2-3p,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,mmu-let-7b-5p,71,32,55,35,171,49,18,14,97,11,20,31,10,24,13,28


## DEA

In [10]:
path = "../../miRNA_dea_transfered/results/ivt_samples/noThreshold/results/differential-expression-tables/paused_E14_VS_wt_E14_FC_shrink.tsv"
tmp_dea_es = pd.read_csv(path, header=0, index_col=0, sep="\t")
DATA['DEA']['mTORi_ESC'] = tmp_dea_es

path = "../../miRNA_dea_transfered/results/ivv_samples/noThreshold/results/differential-expression-tables/mTor_ICM_VS_wt_ICM_FC_shrink.tsv"
tmp_dea_icm_mtori = pd.read_csv(path, header=0, index_col=0, sep="\t")
DATA['DEA']['mTORi_ICM'] = tmp_dea_icm_mtori

path = "../../miRNA_dea_transfered/results/ivv_samples/noThreshold/results/differential-expression-tables/diapaused_ICM_VS_wt_ICM_FC_shrink.tsv"
tmp_dea_icm_diapaused = pd.read_csv(path, header=0, index_col=0, sep="\t")
DATA['DEA']['diapaused_ICM'] = tmp_dea_icm_diapaused

In [11]:
path = "../../miRNA_dea_transfered/results/ivt_samples/noThreshold/results/differential-expression-tables/paused_TS_VS_wt_TS_FC_shrink.tsv"
tmp_dea_ts = pd.read_csv(path, header=0, index_col=0, sep="\t")
DATA['DEA']['mTORi_TSC'] = tmp_dea_ts

path = "../../miRNA_dea_transfered/results/ivv_samples/noThreshold/results/differential-expression-tables/mTor_TE_VS_wt_TE_FC_shrink.tsv"
tmp_dea_te_mtori = pd.read_csv(path, header=0, index_col=0, sep="\t")
DATA['DEA']['mTORi_TE'] = tmp_dea_ts

path = "../../miRNA_dea_transfered/results/ivv_samples/noThreshold/results/differential-expression-tables/diapaused_TE_VS_wt_TE_FC_shrink.tsv"
tmp_dea_te_diapaused = pd.read_csv(path, header=0, index_col=0, sep="\t")
DATA['DEA']['diapaused_TE'] = tmp_dea_te_diapaused

## clusterings

In [30]:
# Added on the 2023-10-16 : we have reindexed the clusters so as to reflect the cluster size
reindex_clusters = {}

path = "./ICM_ES_CLUSTER_REINDEX_ON_SIZE.tsv"
tmp = pd.read_csv(path, sep="\t").set_index('old_index')['new_index'].to_dict()
reindex_clusters['IVV-IVT_E14-ICM'] = tmp

path = "./TS_TE_CLUSTER_REINDEX_ON_SIZE.tsv"
tmp = pd.read_csv(path, sep="\t").set_index('old_index')['new_index'].to_dict()
reindex_clusters['IVV-IVT_TS-TE'] = tmp

path = "./IVV_CLUSTER_REINDEX_ON_SIZE.tsv"
tmp = pd.read_csv(path, sep="\t").set_index('old_index')['new_index'].to_dict()
reindex_clusters['IVV'] = tmp

In [35]:
parent_dir = Path("../../miRNA_ivv_ivt_INTEGRATION/results/RNAseq-DEA_IVV-IVT_clustering/latest/")

experiments = [
    'IVV-IVT_E14-ICM',
    'IVV-IVT_TS-TE',
    'IVV',
]

fname_table_clusters = "table_w_annotated_clusters.raw_logFC.tsv"
fname_summary_clusters = "summary_table.raw_logFC.tsv"


for experiment in experiments:
    for table_name, fname in zip(['table_clusters', 'summary_clusters'], [fname_table_clusters, fname_summary_clusters]):
        path = os.path.join(parent_dir, experiment, fname)
        tmp_df = pd.read_csv(path, header=0, index_col=0, sep="\t").reset_index()

        tmp_df['cluster'] = tmp_df['cluster'].map(reindex_clusters[experiment])
        tmp_df = tmp_df.sort_values(by='cluster')

        DATA['clusters'][experiment][table_name] = tmp_df

In [36]:
display(DATA['clusters']['IVV']['table_clusters'].head(3))
display(DATA['clusters']['IVV']['summary_clusters'].head(3))

Unnamed: 0,index,diapaused_ICM,mTor_ICM,diapaused_TE,mTor_TE,cluster
57,mmu-miR-148a-3p,0.838747,1.161704,1.401338,1.89852,0
2,mmu-let-7d-5p,1.994476,0.39209,2.430239,1.615721,0
15,mmu-miR-10b-5p,1.877927,2.811418,0.788374,1.965641,0


Unnamed: 0,cluster,mean.diapaused_ICM,mean.mTor_ICM,mean.diapaused_TE,mean.mTor_TE,mean_profile,N_tot
2,0,1.436497,1.964691,1.466011,2.0187,positive,10
8,1,-0.345574,1.934089,-0.662244,0.751572,discordant,11
7,2,-1.282754,-1.184942,-1.704106,-0.637479,negative,13


# Assemble tables

## Supp table S3 : raw counts

In [89]:
tmp_ivv = DATA['raw']['ivv']['cts'].copy()
tmp_ivv.columns = [DATA['raw']['ivv']['renamer'].get(c, c) for c in tmp_ivv.columns]
ivv_name = 'counts_invivo'

tmp_ivt = DATA['raw']['ivt']['cts'].copy()
tmp_ivt.columns = [DATA['raw']['ivt']['renamer'].get(c, c) for c in tmp_ivt.columns]
ivt_name = 'counts_invitro'

In [23]:
EXPORT = False

output_rawcts = "./SupplementaryTable_S3_sRNAseq_miRNAs_counts.xlsx"

if EXPORT:
    print("Exporting")
    with pd.ExcelWriter(output_rawcts, date_format=None, mode='w', engine="openpyxl") as writer:
        tmp_ivt.to_excel(writer, sheet_name=ivt_name, index=False)
        tmp_ivv.to_excel(writer, sheet_name=ivv_name, index=False)


## Supp table S4 : DEA tables

In [24]:
DATA['clusters'].keys()

odict_keys(['IVV-IVT_E14-ICM', 'IVV-IVT_TS-TE', 'IVV'])

In [None]:

display(DATA['clusters']['IVV']['table_clusters'].head(3))
display(DATA['clusters']['IVV']['summary_clusters'].head(3))

In [39]:
EXPORT = True

output_dea = "./SupplementaryTable_S4_sRNAseq_differential_expression_results.xlsx"

if EXPORT:
    with pd.ExcelWriter(output_dea, date_format=None, mode='w', engine="openpyxl") as writer:

        ivt_comparisons = ['mTORi_ESC', 'mTORi_TSC']
        ivt_comparisons_full_names = {
            'mTORi_ESC': 'IVT_mTORi_ESC_VS_WT_ESC',
            'mTORi_TSC': 'IVT_mTORi_TSC_VS_WT_TSC',
        }

        for comparison in ivt_comparisons:
            print(comparison)
            tmp = DATA['DEA'][comparison].replace(np.nan, 1.0)
            tmp.to_excel(writer, sheet_name=ivt_comparisons_full_names[comparison], index=True, header=True)


        ivv_comparisons = ['mTORi_ICM', 'mTORi_TE', 'diapaused_ICM', 'diapaused_TE']
        ivv_comparisons_full_names = {
            'mTORi_ICM': 'IVV_mTORi_Bcyst-Polar_VS_WT_Bcyst-Polar',
            'mTORi_TE': 'IVV_mTORi_Bcyst-Mural_VS_WT_Bcyst-Mural',
            'diapaused_ICM': 'IVV_dp_Bcyst-Polar_VS_WT_Bcyst-Polar',
            'diapaused_TE': 'IVV_dp_Bcyst-Mural_VS_WT_Bcyst-Mural',
        }

        for comparison in ivv_comparisons:
            print(comparison)
            tmp = DATA['DEA'][comparison].replace(np.nan, 1.0)
            tmp.to_excel(writer, sheet_name=ivv_comparisons_full_names[comparison], index=True, header=True)


        cluster_experiment_rename = {
            'IVV': 'IVV_Bcyst-both_pausing_DEA',
            'IVV-IVT_E14-ICM': 'Bcyst-Polar_ESC_pausing_DEA',
            'IVV-IVT_TS-TE': 'Bcyst-Mural_TSC_pausing_DEA',
        }

        table_cluster_rename = {
            'table_clusters':'clustering',
            'summary_clusters':'cluster_sum',
        }

        for cluster_experiment in ['IVV','IVV-IVT_E14-ICM', 'IVV-IVT_TS-TE']:
            for table in ['table_clusters', 'summary_clusters']:
                name_sheet = f"{cluster_experiment_rename[cluster_experiment]}_{table_cluster_rename[table]}"
                print(name_sheet)
                tmp = DATA['clusters'][cluster_experiment][table]
                tmp.to_excel(writer, sheet_name=name_sheet, index=False, header=True)



mTORi_ESC
mTORi_TSC
mTORi_ICM
mTORi_TE
diapaused_ICM
diapaused_TE
IVV_Bcyst-both_pausing_DEA_clustering
IVV_Bcyst-both_pausing_DEA_cluster_sum
Bcyst-Polar_ESC_pausing_DEA_clustering
Bcyst-Polar_ESC_pausing_DEA_cluster_sum
Bcyst-Mural_TSC_pausing_DEA_clustering
Bcyst-Mural_TSC_pausing_DEA_cluster_sum
