In [8]:
import pandas as pd
import numpy as np
from ete3 import Tree


from tqdm.auto import tqdm

In [9]:
pd.set_option('display.max_columns', None)

In [10]:
def get_average_branch_length(tre):
    """
    Returns average branch length for given tree
    :param tre: str, .nwk tree, the tree on which we measure the branch length
    :return: float, average branch length
    """
    tree = Tree(tre)
    
    br_length = [nod.dist for nod in tree.traverse()]
    return np.average(br_length)

In [13]:
files = [
         
#         "/workspace/deep_birth_death/simulations/testing_k_sims/674_10k/BD_sim_no_fossil10000.csv",
#         "/workspace/deep_birth_death/simulations/testing_k_sims/674_10k/HE_sim_no_fossil10000.csv",
#         "/workspace/deep_birth_death/simulations/testing_k_sims/674_10k/ME_rho_PGP_sim_no_fossil10000.csv",
#         "/workspace/deep_birth_death/simulations/testing_k_sims/674_10k/SR_sim_no_fossil10000.csv",
#         "/workspace/deep_birth_death/simulations/testing_k_sims/674_10k/WW_sim_no_fossil10000.csv",
#         "/workspace/deep_birth_death/simulations/testing_k_sims/674_10k/SAT_sim_no_fossil10000.csv",     
    
    
    
         "/workspace/deep_birth_death/simulations/testing_k_sims/489_10k/BD_sim_no_fossil10000.csv",
         "/workspace/deep_birth_death/simulations/testing_k_sims/489_10k/HE_sim_no_fossil10000.csv",
         "/workspace/deep_birth_death/simulations/testing_k_sims/489_10k/ME_rho_PGP_sim_no_fossil10000.csv",
         "/workspace/deep_birth_death/simulations/testing_k_sims/489_10k/SR_sim_no_fossil10000.csv",
         "/workspace/deep_birth_death/simulations/testing_k_sims/489_10k/WW_sim_no_fossil10000.csv",
         "/workspace/deep_birth_death/simulations/testing_k_sims/489_10k/SAT_sim_no_fossil10000.csv", 
#    
#    
#    
#         "/workspace/deep_birth_death/simulations/testing_k_sims/87_10k/BD_sim_no_fossil10000.csv",
#         "/workspace/deep_birth_death/simulations/testing_k_sims/87_10k/HE_sim_no_fossil10000.csv",
#         "/workspace/deep_birth_death/simulations/testing_k_sims/87_10k/ME_rho_PGP_sim_no_fossil10000.csv",
#         "/workspace/deep_birth_death/simulations/testing_k_sims/87_10k/SR_sim_no_fossil10000.csv",
#         "/workspace/deep_birth_death/simulations/testing_k_sims/87_10k/WW_sim_no_fossil10000.csv",
#         "/workspace/deep_birth_death/simulations/testing_k_sims/87_10k/SAT_sim_no_fossil10000.csv",     
          
]

In [14]:
pbar = tqdm(files)

for file in pbar:
    pbar.set_description(file, refresh=True)
    # Read .csv
    df = pd.read_csv(file, sep = "|")
    
    # Calculate new rescale factor column
    df['resc_factor'] = df['tree'].apply(get_average_branch_length)
    
    # Add lambda and mu columns
    df["mu0"]= (df["a0"]*df["r0"]) / (1-df["a0"])
    df["mu1"]= (df["a1"]*df["r1"]) / (1-df["a1"])
    df["lambda0"]= df["r0"]+df["mu0"]
    df["lambda1"]= df["r1"]+df["mu1"]
    
    # Add rescaled columns
    df["norm_r0"]= df["r0"] * df["resc_factor"]
    df["norm_r1"]= df["r1"] * df["resc_factor"]
    df["norm_a0"]= df["a0"] * df["resc_factor"]
    df["norm_a1"]= df["a1"] * df["resc_factor"]
    df["norm_time"]= df["time"] * df["resc_factor"]
    df["norm_frac0"]= df["frac0"] * df["resc_factor"]
    df["norm_frac1"]= df["frac1"] * df["resc_factor"]
    df["norm_mu0"]= df["mu0"] * df["resc_factor"]
    df["norm_mu1"]= df["mu1"] * df["resc_factor"]
    df["norm_lambda0"]= df["lambda0"] * df["resc_factor"]
    df["norm_lambda1"]= df["lambda1"] * df["resc_factor"]
    
    # Save new csv
    df.to_csv(file[:-4] + "_rescale.csv", sep='|', header=True, index=False)

  0%|          | 0/6 [00:00<?, ?it/s]