In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np

## Set up

In [3]:
treepar_file_path = "/workspace/deep_birth_death/MLE/inference_data/empiric/TreePar_empiric_inference.csv"
dd_file_path = "/workspace/deep_birth_death/MLE/inference_data/empiric/DDD_empiric_inference.csv"

div_scenarios = ["BD", "HE", "ME", "SAT", "SR", "WW"]
likelihood_model_names = ["cbd", "ME", "shift", "SAT"]

## Read the inference data

In [4]:
df_tp = pd.read_csv(treepar_file_path)
df_dd = pd.read_csv(dd_file_path)

df_merged = df_tp.merge(df_dd[['AIC']], left_index=True, right_index=True, how='inner', suffixes=('_tp', '_dd'))

## Evalute the AIC

In [6]:
for i, row in df_merged.iterrows():
    name = df_merged["name"][i]

    aic_values = np.array([
        row["aic_cbd"],
        row["aic_me"],
        row["aic_shift"],
        row["AIC"] 
    ])
    
    if name == "conifers": #We take out the DDD inference because the likelihood is 0 
        
        aic_values = np.array([
            row["aic_cbd"],
            row["aic_me"],
            row["aic_shift"]
        ])
        
        
    best_idx = np.argmin(aic_values) 
    best_model = likelihood_model_names[best_idx]

    if best_model == "cbd":
        estimated_a = row["estimated_a"]
        estimated_r = row["estimated_r"]
        
        if estimated_r < 0 or estimated_a < 0:
            best_model = "unknown"
        elif estimated_a > 0.8:
            best_model = "HE"
        else:
            best_model = "BD"
            
    if best_model == "shift":
        estimated_r0 = row["estimated_r0"]
        estimated_r1 = row["estimated_r1"]
        if estimated_r0 < 0 and estimated_r1 > 0:
            best_model = "WW"
        elif estimated_r0 > 0 and estimated_r1 > 0: 
            best_model = "SR"
        else: 
            best_model = "unknow"
            
    print("The selected diversification scenario for " + name + 
      " using model selection is : " + best_model + "\n" +
        "AIC value = " + str(np.min(aic_values)))

[2809.84369842 2771.74634978 2762.52061446 4296.24246007]
The selected diversification scenario for eucalypts using model selection is : WW
AIC value = 2762.52061445991
[3589.71642912 3575.04720436 3573.52898215]
The selected diversification scenario for conifers using model selection is : SR
AIC value = 3573.52898214668
[557.61752687 559.49650456 556.45143832 591.65509601]
The selected diversification scenario for cetaceans using model selection is : SR
AIC value = 556.451438320556


## Estimate the parameters

In [50]:
for i, row in df_merged.iterrows():
    name = row["name"]
    if name == "eucalypts":
        print(
            f"{name} estimated parameters:\n"
            f"r0: {row['estimated_r0']}\n"
            f"r1: {row['estimated_r1']}\n"
            f"a0: {row['estimated_a0']}\n"
            f"a1: {row['estimated_a1']}\n"
            f"t: {row['estimated_t']}"
        )
    elif name == "conifers": 
        print(
            f"{name} estimated parameters:\n"
            f"r0: {row['estimated_r0']}\n"
            f"r1: {row['estimated_r1']}\n"
            f"a0: {row['estimated_a0']}\n"
            f"a1: {row['estimated_a1']}\n"
            f"t: {row['estimated_t']}"
        )        
    else: 
        print(
            f"{name} estimated parameters:\n"
            f"r0: {row['estimated_r0']}\n"
            f"r1: {row['estimated_r1']}\n"
            f"a0: {row['estimated_a0']}\n"
            f"a1: {row['estimated_a1']}\n"
            f"t: {row['estimated_t']}"
        )         

eucalypts estimated parameters:
r0: -0.261043125239362
r1: 0.0688499208827288
a0: 1.21808419387
a1: 0.962008797205983
t: 5.01415299999999
conifers estimated parameters:
r0: 0.0338362845288278
r1: 0.0125112725326837
a0: 0.763168329460883
a1: 0.703166320545217
t: 24.5506652242002
cetaceans estimated parameters:
r0: 0.0620341733585666
r1: 0.0802823874085986
a0: 0.0919229324451192
a1: 0.520238395964933
t: 2.78828300249999
