In [1]:
import pandas as pd
import numpy as np
from ete3 import Tree
from tqdm.auto import tqdm

In [2]:
pd.set_option('display.max_columns', None)

In [3]:
def get_average_branch_length(tre):
    """
    Returns average branch length for given tree
    :param tre: str, .nwk tree, the tree on which we measure the branch length
    :return: float, average branch length
    """
    tree = Tree(tre)
    
    br_length = [nod.dist for nod in tree.traverse()]
    return np.average(br_length)

In [4]:
files = ["/workspace/coniferas/simulations/simulations_no_fossil/492/BD_sim_no_fossil1e+05.csv",
         "/workspace/coniferas/simulations/simulations_no_fossil/492/HE_sim_no_fossil1e+05.csv",
         "/workspace/coniferas/simulations/simulations_no_fossil/492/SAT_sim_no_fossil1e+05.csv",
         "/workspace/coniferas/simulations/simulations_no_fossil/492/SR_sim_no_fossil1e+05.csv",
         "/workspace/coniferas/simulations/simulations_no_fossil/492/ME_rho_sim_no_fossil1e+05.csv",]

In [5]:
pbar = tqdm(files)

for file in pbar:
    pbar.set_description(file, refresh=True)
    # Read .csv
    df = pd.read_csv(files[0], sep = "|")
    
    # Calculate new rescale factor column
    df['resc_factor'] = df['tree'].apply(get_average_branch_length)
    
    # Add lambda and mu columns
    df["mu0"]= (df["a0"]*df["r0"]) / (1-df["a0"])
    df["mu1"]= (df["a1"]*df["r1"]) / (1-df["a1"])
    df["lambda0"]= df["r0"]+df["mu0"]
    df["lambda1"]= df["r1"]+df["mu1"]
    
    # Add rescaled columns
    df["norm_r0"]= df["r0"] * df["resc_factor"]
    df["norm_r1"]= df["r1"] * df["resc_factor"]
    df["norm_a0"]= df["a0"] * df["resc_factor"]
    df["norm_a1"]= df["a1"] * df["resc_factor"]
    df["norm_time"]= df["time"] * df["resc_factor"]
    df["norm_frac0"]= df["frac0"] * df["resc_factor"]
    df["norm_frac1"]= df["frac1"] * df["resc_factor"]
    df["norm_mu0"]= df["mu0"] * df["resc_factor"]
    df["norm_mu1"]= df["mu1"] * df["resc_factor"]
    df["norm_lambda0"]= df["lambda0"] * df["resc_factor"]
    df["norm_lambda1"]= df["lambda1"] * df["resc_factor"]
    
    # Add rescaled columns (2nd method)
    df["norm_1_r0"]= df["r0"] / df["resc_factor"]
    df["norm_1_r1"]= df["r1"] / df["resc_factor"]
    df["norm_1_a0"]= df["a0"] / df["resc_factor"]
    df["norm_1_a1"]= df["a1"] / df["resc_factor"]
    df["norm_1_time"]= df["time"] / df["resc_factor"]
    df["norm_1_frac0"]= df["frac0"] / df["resc_factor"]
    df["norm_1_frac1"]= df["frac1"] / df["resc_factor"]
    df["norm_1_mu0"]= df["mu0"] / df["resc_factor"]
    df["norm_1_mu1"]= df["mu1"] / df["resc_factor"]
    df["norm_1_lambda0"]= df["lambda0"] / df["resc_factor"]
    df["norm_1_lambda1"]= df["lambda1"] / df["resc_factor"]
    
    # Save new csv
    df.to_csv(file[:-4] + "_rescale.csv", sep='|', header=True, index=False)

  0%|          | 0/5 [00:00<?, ?it/s]

KeyboardInterrupt: 