In [None]:
import pandas as pd
import numpy as np
from BeyondBlooms2024.config import name_dict
from BeyondBlooms2024.config_file import (ABUNDANCES_FILE, CCMN_CON_MAP_PATH, CON_LOUVAIN_META_PATH,CON_LOUVAIN_NETWORK_PATH, ENRICH
,NUM_PERMUTATIONS, NUM_SAMPLES, NUM_CORES, METADATA_FILE, PRUNED_PVAL_CCMN_PATH,PVAL_CCMN_PATH,ENRICHED_META_PATH, RANDOM_PVAL_CCMN_PATH)

In [None]:
# Load Taxa Table
df_taxa = pd.read_csv(ENRICH, sep =',', engine="python")
selected_asv= df_taxa["Nodes"].tolist()

In [None]:
df_taxa.columns

In [None]:
def get_dict(col = "LouvainID"):
    df_taxa_temp =df_taxa[["Nodes",col]]
    df_taxa_temp.set_index("Nodes", inplace=True)
    d_ = df_taxa_temp.to_dict()[col]
    return d_

In [None]:
# Load abudance table f4
df_ab_f4 = pd.read_csv(ABUNDANCES_FILE, sep =';', index_col=0)
print(df_ab_f4)
print(df_ab_f4.shape)
df_ab_f4["time"] = pd.to_datetime(df_ab_f4.index)
df_ab_f4.set_index("time", inplace=True)
df_f4_4abu = df_ab_f4[selected_asv].sum()
df_f4_4abu

In [None]:
df_abu = pd.DataFrame(df_f4_4abu)

In [None]:
df_abu.rename(columns={0:"raw_abundance"}, inplace = True)

In [None]:
df_abu["raw_procentual_abundance"] =100*df_abu["raw_abundance"]/df_abu["raw_abundance"].sum()
df_abu["ASV"]=df_abu.index
df_abu["Clu"] = df_abu["ASV"].apply(lambda x: get_dict(col = "cluster_names")[x])
df_abu["raw_clu_abundance"] =df_abu["Clu"].apply(lambda x: df_abu[df_abu["Clu"]==x]["raw_abundance"].sum())
df_abu["raw_procentual_clu_abundance"]=100*df_abu["raw_abundance"]/df_abu["raw_clu_abundance"]

In [None]:
print(df_abu.head(20))

In [None]:
# Join Arctic Projection Data
df_pro_arctic3 = pd.read_csv("tables/Taxatable_StableStates_withArcticENV_final03.csv",sep=";")
print(df_pro_arctic3.shape)
df_pro_arctic10 = pd.read_csv("tables/Taxatable_StableStates_withArcticENV_final10.csv",sep=";")
print(df_pro_arctic10.shape)
df_pro_arctic = pd.concat([df_pro_arctic3,df_pro_arctic10],axis=0)
print(df_pro_arctic.shape)
df_pro_arctic.to_csv("tables/Taxatable_StableState_Arctic.csv", index=False,sep=";")

In [None]:
# Join Arctic Projection Data
df_pro_Atlantic3 = pd.read_csv("tables/Taxatable_StableStates_withAtlanticENV_final_03.csv",sep=";")
print(df_pro_Atlantic3.shape)
df_pro_Atlantic10 = pd.read_csv("tables/Taxatable_StableStates_withAtlanticENV_final_10.csv",sep=";")
print(df_pro_Atlantic10.shape)
df_pro_Atlantic = pd.concat([df_pro_Atlantic3,df_pro_Atlantic10],axis=0)
print(df_pro_Atlantic.shape)
df_pro_Atlantic.to_csv("tables/Taxatable_StableState_Atlantic.csv", index=False,sep=";")

In [None]:
cluster_id_dict = {
    "8": "08TS",
    "9": "09HS",
    "1": "01TA",
    "6": "06TS",
    "10": "10HS",
    "5": "05LW",
    "3": "03LW",
    "4": "04LW",
    "2": "02TA",
    "7": "07TS",
}

In [None]:
pathes_dict = {"NonProjection": "tables/Taxatable_StableState.csv", "Projection_Arctic": "tables/Taxatable_StableState_Arctic.csv",  "Projection_Atlantic": "tables/Taxatable_StableState_Atlantic.csv"}
csv_name_dict = {"NonProjection": "Main_Table_2_NonProjection", "Projection_Arctic": "Main_Sup_4_Projection_Arctic",
"Projection_Atlantic": "Main_Sup_3_Projection_Atlantic"}
# Attention SS cluster are 01 == 01TA etc.
for ent in ["NonProjection","Projection_Arctic","Projection_Atlantic"]:
    df_ss = pd.read_csv(pathes_dict[ent], sep=";")
    df_ss["cluster"] = df_ss["cluster"].apply(lambda x: cluster_id_dict[str(x)])
    df_all = pd.merge(df_ss,df_abu, on ="ASV", how="inner")
    df_all.drop(columns=["Clu"], inplace =True)
    df_ccm = pd.read_csv(PRUNED_PVAL_CCMN_PATH,sep=";")
    ccm_asv_list = df_ccm["from"].to_list()+df_ccm["to"].to_list()
    ccm_asv_list = list(np.unique(ccm_asv_list))
    ccm_dict = {asv: 1 for asv in ccm_asv_list}
    df_all["CCM_Significance"] = df_all["ASV"].map(ccm_dict)
    df_all["CCM_Significance"].fillna(0, inplace=True)
    df_all.set_index("ASV", inplace=True)
    df_all.to_csv(f"tables/{csv_name_dict[ent]}_stablestate_biomass.csv", sep=";")
    df_all["raw_procentual_abundance"] =df_all["raw_procentual_abundance"].round(4)
    df_all["raw_abundance"] = df_all["raw_abundance"].round(4)
    df_all["raw_procentual_abundance"] = df_all["raw_procentual_abundance"].round(4)
    df_all["raw_procentual_clu_abundance"] = df_all["raw_procentual_clu_abundance"].round(4)
    df_all_short = df_all#[df_all["cluster"]]
    df_all_short_ = df_all_short[["Genus", "Species", 'cluster', 'raw_abundance', 'raw_procentual_abundance', 'raw_procentual_clu_abundance',
         "CCM_Significance"]]
    df_all_short_new = df_all_short_.sort_values(by=['cluster', 'raw_abundance'], ascending=False)
    df_all_short_new.to_latex(f"tables/{csv_name_dict[ent]}_stablestate_latex_table_short.txt")
    df_all_short_new.to_csv(f"tables/{csv_name_dict[ent]}_stablestate_latex_table_short.csv")
    print(df_all_short_new.head())