In [None]:
#Analysis for comparing results of morphological data in BIN and MULTI representation
#Consider the LH of the tree found with BIN alignment under the MULTI alignment and model
#and the other way around

#Distances of best trees / consensus trees

#Correlations

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from plotly import graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
from ete3 import Tree


In [None]:
def rf_distance(t1, t2):
    rf, max_rf, common_leaves, parts_t1, parts_t2,discard_t1, discart_t2 = t1.robinson_foulds(t2, unrooted_trees = True)
    if max_rf == 0:
        print("?!")
        return 0
    return rf/max_rf

def read_consensus_trees(model):
    if model == "BIN":
        d = "parquets/morph/BIN/"
    elif model == "GTR":
        d = "parquets/morph/MULTI/GTR/"
    elif model == "MK": 
        d = "parquets/morph/MULTI/MK/"
    else:
        print(model + " does not exist!")
    consensus_trees = {}
    with os.scandir(d) as it:
        for entry in it:
            if not entry.is_dir():
                continue
            tree_path = os.path.join(d, os.path.join(entry.name, "consense.raxml.consensusTreeMR"))
            if not os.path.exists(tree_path):
                print("No consensus tree for " + model + " and " + entry.name)
                tree = Tree()
            else:
                tree = Tree(tree_path)
            #tree.resolve_polytomy(recursive=True)
            name = entry.name.split(".")[0] + ".phy"
            consensus_trees[name] = tree
    return consensus_trees

def rfd_consensus_trees(model1, model2):
    consensus_trees1 = read_consensus_trees(model1)
    consensus_trees2 = read_consensus_trees(model2)
    rfd_dict = {}
    for (name, tree) in consensus_trees1.items():
        if name in consensus_trees2:
            tree2 = consensus_trees2[name]
            rfd_dict[name] = rf_distance(tree1, tree2)
        else:
            rfd_dict[name] = float("nan")
            print("No consensus rf distance for " + name)
    return rfd_dict
    
    
def rfd_eval_trees(morph_data_multistate, morph_data_binarized):
    rfd_dict = {}
    for index, row in morph_data_multistate.iterrows():
        multitree = Tree(row["MULTI_newick_eval"])
        name = row['MULTI_verbose_name'].split('.')[0] + ".BIN.phy"
        bintree =  Tree(morph_data_binarized.loc[(morph_data_binarized['BIN_verbose_name'] == name)].iloc[0]["BIN_newick_eval"])
        rfd_dict[row['MULTI_verbose_name'].split('.')[0]]= rf_distance(multitree, bintree)
    return rfd_dict

def plot_rfds(rfd_dict):
    rfds = []
    for (name, rfd) in rfd_dict.items():
        rfds.append(rfd)
    plt.xlabel("RF Distance")
    plt.ylabel("Number of datasets")
    plt.hist(rfds, bins=100)
    plt.show()
    plt.xlabel("RF Distance")
    plt.ylabel("Number of datasets")
    plt.hist(rfds, bins=30)
    plt.show()

In [None]:
#Original model: Under this model a best tree was calculated with 100 tree searches
#Cross model: Now determine the likelihood of the best tree under this model

def cross_data_csv():
    out_file = open("temp/morph/lhs/all.csv", "w+")
    out_file.write("name,original_model,cross_model,lh\n")
    lines = open("temp/morph/lhs/BIN_GTR.csv", "r").read().split("\n")[1:-1]
    for line in lines:
        data = line.split(",")
        if data[0].endswith(".BIN.phy"):
            cross_model = "BIN"
            original_model="GTR"
            name = data[1]
        else:
            cross_model = "GTR"
            original_model="BIN"
            name = data[0]
        out_file.write(name + "," + original_model  + "," + cross_model + "," + data[3] + "\n")
    lines = open("temp/morph/lhs/BIN_MK.csv", "r").read().split("\n")[1:-1]
    for line in lines:
        data = line.split(",")
        if data[0].endswith(".BIN.phy"):
            cross_model = "BIN"
            original_model="MK"
            name = data[1]
        else:
            cross_model = "MK"
            original_model="BIN"
            name = data[0]
        out_file.write(name + "," + original_model  + "," + cross_model + "," + data[3] + "\n")
    lines = open("temp/morph/lhs/GTR_MK.csv", "r").read().split("\n")[1:-1]
    for line in lines:
        data = line.split(",")
        if data[2].endswith("GTR"):
            cross_model = "GTR"
            original_model="MK"
            name = data[0]
        else:
            cross_model = "MK"
            original_model="GTR"
            name = data[0]
        out_file.write(name + "," + original_model  + "," + cross_model + "," + data[3] + "\n")
        
cross_data_csv()

In [None]:
#The column cross_lh_x contains the likelihood of the best tree found under the tree model evalut
def add_cross_data(df, eval_model, tree_model):
    lhs_df = pd.read_csv("temp/morph/lhs/all.csv")
    lhs_df = lhs_df[lhs_df["original_model"] == tree_model]
    lhs_df = lhs_df[lhs_df["cross_model"] == eval_model]
    d = {}
    for idx, row in lhs_df.iterrows():
        d[row["name"]] = float(row["lh"])
    cross_lhs = []
    diffs = []
    for idx, row in df.iterrows():
        name = row['verbose_name']
        if name in d:
            cross_lh = d[name]
            eval_lh = row["llh_eval"]
            diff = cross_lh - eval_lh
            cross_lhs.append(cross_lh)
            diffs.append(diff)
        else:
            print("For " + name + " no cross evaluation with original model " + tree_model + " and cross model " +eval_model)
            cross_lhs.append(float("nan"))
            diffs.append(float("nan"))
    df["cross_llh_" + tree_model] = cross_lhs
    df["cross_diff_" + tree_model] = diffs
    return df


def merge_dfs(morph_data_bin, morph_data_gtr, morph_data_mk):
    morph_data_bin.columns = 'BIN_' + morph_data_bin.columns.values
    morph_data_gtr.columns = 'GTR_' + morph_data_gtr.columns.values
    morph_data_mk.columns = 'MK_' + morph_data_mk.columns.values
    morph_data_bin = morph_data_bin.rename(columns={'BIN_verbose_name': 'verbose_name'})
    morph_data_gtr = morph_data_gtr.rename(columns={'GTR_verbose_name': 'verbose_name'})
    morph_data_mk = morph_data_mk.rename(columns={'MK_verbose_name': 'verbose_name'})
    df = pd.merge(morph_data_bin, morph_data_gtr, on='verbose_name', how='inner')
    df = pd.merge(df, morph_data_mk, on='verbose_name', how='inner')
    df = df[(df.BIN_cross_llh_GTR != 1) & (df.BIN_cross_llh_MK != 1) 
           & (df.GTR_cross_llh_BIN != 1) & (df.GTR_cross_llh_MK != 1)
           & (df.MK_cross_llh_BIN != 1) & (df.MK_cross_llh_GTR != 1)]
    return df


def add_rf_data(df):
    consensus_trees_bin = read_consensus_trees("BIN")
    consensus_trees_gtr = read_consensus_trees("GTR")
    consensus_trees_mk = read_consensus_trees("MK")
    consensus_dist_bin_gtr = []
    consensus_dist_bin_mk = []
    consensus_dist_gtr_mk = []
    eval_dist_bin_gtr = []
    eval_dist_bin_mk = []
    eval_dist_gtr_mk = []
    
    
    for idx, row in df.iterrows():
        name = row["verbose_name"]
        c_tree_bin = consensus_trees_bin[name]
        c_tree_gtr = consensus_trees_gtr[name]
        c_tree_mk = consensus_trees_mk[name]
        e_tree_bin = Tree(row["BIN_newick_eval"])
        e_tree_gtr = Tree(row["GTR_newick_eval"])
        e_tree_mk = Tree(row["MK_newick_eval"])
        
        consensus_dist_bin_gtr.append(rf_distance(c_tree_bin, c_tree_gtr))
        consensus_dist_bin_mk.append(rf_distance(c_tree_bin, c_tree_mk))
        consensus_dist_gtr_mk.append(rf_distance(c_tree_gtr, c_tree_mk))
        eval_dist_bin_gtr.append(rf_distance(e_tree_bin, e_tree_gtr))
        eval_dist_bin_mk.append(rf_distance(e_tree_bin, e_tree_mk))
        eval_dist_gtr_mk.append(rf_distance(e_tree_gtr, e_tree_mk))
        
    df["consensus_dist_BIN_GTR"] = consensus_dist_bin_gtr
    df["consensus_dist_BIN_MK"] = consensus_dist_bin_mk
    df["consensus_dist_GTR_MK"] = consensus_dist_gtr_mk
    df["eval_dist_BIN_GTR"] = eval_dist_bin_gtr
    df["eval_dist_BIN_MK"] = eval_dist_bin_mk
    df["eval_dist_GTR_MK"] = eval_dist_gtr_mk
    
    return df


def get_score_from_string(s, score):
    data = s.split(",")
    if score == "AIC":
        return float(data[0])
    elif score == "cAIC":
        return float(data[1])
    elif score == "BIC":
        return float(data[2])
    else:
        print(score + " does not exist!")
        return None
    
def add_aic_scores(df, model, score):
    if model == "GTR":
        idx = 1
    elif model == "MK":
        idx = 2
    elif model == "BIN":
        idx = 3
    else:
        print("Model " + model +  " does not exist")
        return
    lines = open("temp/morph/aic.scores", "r").read().split("\n")[1:-1]
    score_dict = {}
    for line in lines:
        data = line.split('|')
        name = data[0] + ".phy"
        score_dict[name] = get_score_from_string(data[idx], score)
    score_column = []
    for i, row in df.iterrows():
        name = row["verbose_name"]
        if name in score_dict:
            score_column.append(score_dict[name])
        else:
            score_column.append(0)
            print("No " + score + " score for model " + model + " and " + name)
    df[score] = score_column
    return df

def add_aic_differences(df, score):
    df[score + "_GTR_minus_BIN"] = df["GTR_" + score] - df["BIN_" + score]
    df[score + "_GTR_minus_MK"] = df["GTR_" + score] - df["MK_" + score]
    df[score + "_MK_minus_BIN"] = df["MK_" + score] - df["BIN_" + score]
    return df

def add_difficult_differences(df):
    df["difficult_GTR_minus_BIN"] = df["GTR_difficult"] - df["BIN_difficult"]
    df["difficult_GTR_minus_MK"] = df["GTR_difficult"] - df["MK_difficult"]
    df["difficult_MK_minus_BIN"] = df["MK_difficult"] - df["BIN_difficult"]
    return df

def add_avg_col_states(df):
    avg_col_states = {}
    lines = open("temp/morph/avg_col_states.csv", "r").read().split("\n")[1:-1]
    for line in lines:
        data = line.split(",")
        avg_col_states[data[0]] = float(data[1])
    column = []
    for i, row in df.iterrows():
        column.append(avg_col_states[row["verbose_name"]])
    df["avg_col_states"] = column
    return df
    
def get_df():
    morph_data_gtr = pd.read_parquet("training_data/morph/MULTI_GTR.parquet")
    morph_data_mk = pd.read_parquet("training_data/morph/full_MK.parquet")
    morph_data_mk = morph_data_mk.groupby(morph_data_mk.state_type).get_group("multistate")
    morph_data_bin = pd.read_parquet("training_data/morph/binarized.parquet")
    names = []
    for index, row in morph_data_bin.iterrows():
        names.append(row['verbose_name'].split('.')[0] + '.phy')
    morph_data_bin['verbose_name'] = names
    morph_data_gtr = add_cross_data(morph_data_gtr, "GTR", "BIN")
    morph_data_gtr = add_cross_data(morph_data_gtr, "GTR", "MK")
    morph_data_mk = add_cross_data(morph_data_mk, "MK", "BIN")
    morph_data_mk = add_cross_data(morph_data_mk, "MK", "GTR")
    morph_data_bin = add_cross_data(morph_data_bin, "BIN", "GTR")
    morph_data_bin = add_cross_data(morph_data_bin, "BIN", "MK")
    
    morph_data_gtr = add_aic_scores(morph_data_gtr, "GTR", "AIC")
    morph_data_mk = add_aic_scores(morph_data_mk, "MK", "AIC")
    morph_data_bin = add_aic_scores(morph_data_bin, "BIN", "AIC")
    
    morph_data_gtr = add_aic_scores(morph_data_gtr, "GTR", "cAIC")
    morph_data_mk = add_aic_scores(morph_data_mk, "MK", "cAIC")
    morph_data_bin = add_aic_scores(morph_data_bin, "BIN", "cAIC")
    
    morph_data_gtr = add_aic_scores(morph_data_gtr, "GTR", "BIC")
    morph_data_mk = add_aic_scores(morph_data_mk, "MK", "BIC")
    morph_data_bin = add_aic_scores(morph_data_bin, "BIN", "BIC")
    
    df = merge_dfs(morph_data_bin, morph_data_gtr, morph_data_mk)
    df = add_rf_data(df)
    df = add_avg_col_states(df)
    return df
df = get_df()

In [None]:
def scatter(df, col1, col2, factor_x=1, factor_y=1, log_x=False, log_y=False):
    df2 = df.copy()
    df2[col1] = factor_x* df2[col1]
    df2[col2] = factor_y* df2[col2]
    fig = px.scatter(df2, x=col1, y=col2, log_x=log_x, log_y=log_y)
    fig.show()
    

In [None]:
def x_values(df, col, eliminate_outlier):
    if eliminate_outlier:
        lower = df[col].quantile(0.1)
        upper = df[col].quantile(0.9)
    else:
        lower = df[col].min()
        upper = df[col].max()
    return df.loc[df[col].between(lower, upper)][col]

def compare_modelwise(df, col, eliminate_outlier):
    fig = make_subplots(
        rows=1,
        cols=1,
        subplot_titles=[col + " for models"]
    )

 
    fig.append_trace(
        go.Histogram(
            x=x_values(df, "BIN_" + col, eliminate_outlier),
            name="BIN",
            showlegend=True
            #, histnorm="percent"
        ),
        row=1,
        col=1
    )

    fig.append_trace(
        go.Histogram(
            x=x_values(df, "GTR_" + col, eliminate_outlier),
            name="GTR",
            showlegend=True
            #, histnorm="percent"
        ),
        row=1,
        col=1
    )

    fig.append_trace(
        go.Histogram(
            x=x_values(df, "MK_" + col, eliminate_outlier),
            name="MK",
            showlegend=True
            #, histnorm="percent"
        ),
        row=1,
        col=1
    )
    return fig

def modelwise_correlation(df, col, factor, log):
    scatter(df, "BIN_" + col, "GTR_" + col, factor_x=factor, factor_y=factor, log_x=log, log_y=log)
    scatter(df, "BIN_" + col, "MK_" + col, factor_x=factor, factor_y=factor, log_x=log, log_y=log)
    scatter(df, "GTR_" + col, "MK_" + col, factor_x=factor, factor_y=factor, log_x=log, log_y=log)
    
def correlation_of_columns(df, col1, factor1, log1, col2, factor2, log2):
    scatter(df, "BIN_" + col1, "BIN_" + col2, factor_x=factor1, factor_y=factor2, log_x=log1, log_y=log2)
    scatter(df, "GTR_" + col1, "GTR_" + col2, factor_x=factor1, factor_y=factor2, log_x=log1, log_y=log2)
    scatter(df, "MK_" + col1, "MK_" + col2, factor_x=factor1, factor_y=factor2, log_x=log1, log_y=log2)
    

    
compare_modelwise(df, "llh_eval", True).update_layout(template="plotly_white", height=500)

#modelwise_correlation(df, "cAIC", 1, True)

#correlation_of_columns(df, "llh_eval", -1, True, "AIC", 1, True)


In [201]:
#DIFFICULT
compare_modelwise(df, "difficult", False).update_layout(template="plotly_white", height=500)
#scatter(df, "BIN_difficult", "MK_difficult")

#fig = px.scatter(x=df["GTR_difficult"] - df["BIN_difficult"], y=df["GTR_difficult"] - df["MK_difficult"], log_x=False, log_y=False)
#fig.show()
#fig = px.scatter(x=df["BIN_difficult"] - df["MK_difficult"], y=df["BIN_difficult"] - df["GTR_difficult"], log_x=False, log_y=False)
#fig.show()
#fig = px.scatter(x=df["MK_difficult"] - df["BIN_difficult"], y=df["MK_difficult"] - df["GTR_difficult"], log_x=False, log_y=False)
#fig.show()

In [None]:
# AICs
compare_modelwise(df, "AIC", True).update_layout(template="plotly_white", height=500)

In [None]:
# TREE DIFFS
#scatter(df, "consensus_dist_BIN_GTR", "consensus_dist_BIN_MK")
#scatter(df, "consensus_dist_BIN_MK", "consensus_dist_GTR_MK")
scatter(df, "consensus_dist_GTR_MK", "consensus_dist_BIN_GTR")
# only the last one is correlated --> Indicates similar behaviour of BIN and MK 

#scatter(df, "eval_dist_BIN_GTR", "eval_dist_BIN_MK")
#scatter(df, "eval_dist_BIN_MK", "eval_dist_GTR_MK")
scatter(df, "eval_dist_GTR_MK", "eval_dist_BIN_GTR")
# same as above, correlation even stronger

In [None]:
from plotly import graph_objects as go
from plotly.subplots import make_subplots
fig = make_subplots(
    rows=1,
    cols=1,
    subplot_titles=["RF Distances between consensus trees"]
)

    
fig.append_trace(
    go.Histogram(
        #x=df['eval_dist_BIN_GTR'],
        x=df['consensus_dist_BIN_GTR'],
        name="BIN to GTR",
        showlegend=True
        #, histnorm="percent"
    ),
    row=1,
    col=1
)

fig.append_trace(
    go.Histogram(
        #x=df['eval_dist_BIN_MK'],
        x=df['consensus_dist_BIN_MK'],
        name="BIN to MK",
        showlegend=True
        #, histnorm="percent"
    ),
    row=1,
    col=1
)

fig.append_trace(
    go.Histogram(
        #x=df['eval_dist_GTR_MK'],
        x=df['consensus_dist_GTR_MK'],
        name="GTR to MK",
        showlegend=True
        #, histnorm="percent"
    ),
    row=1,
    col=1
)
fig.update_layout(template="plotly_white", height=500)

In [None]:
# CROSS DIFFS
scatter(df, "BIN_cross_diff_GTR", "GTR_cross_diff_BIN", factor_x=-1, factor_y=-1, log_x=True, log_y=True)
scatter(df, "BIN_cross_diff_MK", "MK_cross_diff_BIN", factor_x=-1, factor_y=-1, log_x=True, log_y=True)
scatter(df, "MK_cross_diff_GTR", "GTR_cross_diff_MK", factor_x=-1, factor_y=-1, log_x=True, log_y=True)

In [None]:
scatter(df, "BIN_cross_diff_MK", "BIN_cross_diff_GTR", factor_x=-1, factor_y=-1, log_x=True, log_y=True)
scatter(df, "GTR_cross_diff_MK", "GTR_cross_diff_BIN", factor_x=-1, factor_y=-1, log_x=True, log_y=True)
scatter(df, "MK_cross_diff_GTR", "MK_cross_diff_BIN", factor_x=-1, factor_y=-1, log_x=True, log_y=True)

In [None]:
scatter(df, "BIN_cross_diff_MK", "BIN_eval_llh", factor_x=-1, factor_y=-1, log_x=True, log_y=True)
scatter(df, "GTR_cross_diff_MK", "GTR_eval_llh", factor_x=-1, factor_y=-1, log_x=True, log_y=True)
scatter(df, "MK_cross_diff_GTR", "MK_eval_llh", factor_x=-1, factor_y=-1, log_x=True, log_y=True)

In [None]:
# ENTROPY / AVG_COL_STATES
fig = px.scatter(x=df["GTR_entropy"] / df["BIN_entropy"], y=df["avg_col_states"], log_x=False, log_y=False)
fig.show()

In [None]:
fig = px.scatter(x=df["BIN_AIC"] - df["GTR_AIC"], y=df["avg_col_states"], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["MK_AIC"] - df["GTR_AIC"], y=df["avg_col_states"], log_x=True, log_y=True)
fig.show()

In [None]:
scatter(df, "BIN_cross_diff_GTR", "avg_col_states", factor_x = -1, log_x = True,log_y=True)

In [None]:
scatter(df, "consensus_dist_BIN_GTR", "eval_dist_BIN_GTR")
scatter(df, "consensus_dist_BIN_MK", "eval_dist_BIN_MK")
scatter(df, "consensus_dist_GTR_MK", "eval_dist_GTR_MK")

scatter(df, "consensus_dist_BIN_GTR", "BIN_difficult")
scatter(df, "consensus_dist_BIN_GTR", "GTR_difficult")
scatter(df, "consensus_dist_BIN_MK", "BIN_difficult")
scatter(df, "consensus_dist_BIN_MK", "MK_difficult")
scatter(df, "consensus_dist_GTR_MK", "GTR_difficult")
scatter(df, "consensus_dist_GTR_MK", "MK_difficult")

scatter(df, "consensus_dist_BIN_GTR", "BIN_llh_eval", factor_y=-1, log_y=True)
scatter(df, "consensus_dist_BIN_GTR", "GTR_llh_eval", factor_y=-1, log_y=True)
scatter(df, "consensus_dist_BIN_MK", "BIN_llh_eval", factor_y=-1, log_y=True)
scatter(df, "consensus_dist_BIN_MK", "MK_llh_eval", factor_y=-1, log_y=True)
scatter(df, "consensus_dist_GTR_MK", "GTR_llh_eval", factor_y=-1, log_y=True)
scatter(df, "consensus_dist_GTR_MK", "MK_llh_eval", factor_y=-1, log_y=True)

scatter(df, "consensus_dist_BIN_GTR", "BIN_AIC", factor_y=1, log_y=True)
scatter(df, "consensus_dist_BIN_GTR", "GTR_AIC", factor_y=1, log_y=True)
scatter(df, "consensus_dist_BIN_MK", "BIN_AIC", factor_y=1, log_y=True)
scatter(df, "consensus_dist_BIN_MK", "MK_AIC", factor_y=1, log_y=True)
scatter(df, "consensus_dist_GTR_MK", "GTR_AIC", factor_y=1, log_y=True)
scatter(df, "consensus_dist_GTR_MK", "MK_AIC", factor_y=1, log_y=True)

scatter(df, "BIN_difficult", "BIN_cross_diff_GTR", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "BIN_difficult", "BIN_cross_diff_MK", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "GTR_difficult", "GTR_cross_diff_BIN", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "GTR_difficult", "GTR_cross_diff_MK", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "MK_difficult", "MK_cross_diff_BIN", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "MK_difficult", "MK_cross_diff_GTR", factor_x=1, factor_y=-1, log_x=False, log_y=True)

scatter(df, "consensus_dist_BIN_GTR", "BIN_cross_diff_GTR", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "consensus_dist_BIN_MK", "BIN_cross_diff_MK", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "consensus_dist_BIN_GTR", "GTR_cross_diff_BIN", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "consensus_dist_GTR_MK", "GTR_cross_diff_MK", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "consensus_dist_BIN_MK", "MK_cross_diff_BIN", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "consensus_dist_GTR_MK", "MK_cross_diff_GTR", factor_x=1, factor_y=-1, log_x=False, log_y=True)

scatter(df, "eval_dist_BIN_GTR", "BIN_cross_diff_GTR", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "eval_dist_BIN_MK", "BIN_cross_diff_MK", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "eval_dist_BIN_GTR", "GTR_cross_diff_BIN", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "eval_dist_GTR_MK", "GTR_cross_diff_MK", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "eval_dist_BIN_MK", "MK_cross_diff_BIN", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "eval_dist_GTR_MK", "MK_cross_diff_GTR", factor_x=1, factor_y=-1, log_x=False, log_y=True)

scatter(df, "BIN_llh_eval", "BIN_cross_diff_GTR", factor_x=-1, factor_y=-1, log_x=True, log_y=True)
scatter(df, "BIN_llh_eval", "BIN_cross_diff_MK", factor_x=-1, factor_y=-1, log_x=True, log_y=True)
scatter(df, "GTR_llh_eval", "GTR_cross_diff_BIN", factor_x=-1, factor_y=-1, log_x=True, log_y=True)
scatter(df, "GTR_llh_eval", "GTR_cross_diff_MK", factor_x=-1, factor_y=-1, log_x=True, log_y=True)
scatter(df, "MK_llh_eval", "MK_cross_diff_BIN", factor_x=-1, factor_y=-1, log_x=True, log_y=True)
scatter(df, "MK_llh_eval", "MK_cross_diff_GTR", factor_x=-1, factor_y=-1, log_x=True, log_y=True)

fig = px.scatter(x=df["BIN_AIC"] - df["GTR_AIC"], y=df["BIN_AIC"] - df["MK_AIC"], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["BIN_AIC"] - df["MK_AIC"], y=df["MK_AIC"] - df["GTR_AIC"], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["MK_AIC"] - df["GTR_AIC"], y=df["BIN_AIC"] - df["GTR_AIC"], log_x=True, log_y=True)
fig.show()

fig = px.scatter(x=df["BIN_AIC"] - df["GTR_AIC"], y=df['GTR_AIC'], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["BIN_AIC"] - df["GTR_AIC"], y=df['BIN_AIC'], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["BIN_AIC"] - df["MK_AIC"], y=df["MK_AIC"], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["BIN_AIC"] - df["MK_AIC"], y=df["BIN_AIC"], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["MK_AIC"] - df["GTR_AIC"], y=df["MK_AIC"], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["MK_AIC"] - df["GTR_AIC"], y=df["GTR_AIC"], log_x=True, log_y=True)
fig.show()

fig = px.scatter(x=df["BIN_AIC"] - df["GTR_AIC"], y=df['GTR_difficult'], log_x=True, log_y=False)
fig.show()
fig = px.scatter(x=df["BIN_AIC"] - df["GTR_AIC"], y=df['BIN_difficult'], log_x=True, log_y=False)
fig.show()
fig = px.scatter(x=df["BIN_AIC"] - df["MK_AIC"], y=df["MK_difficult"], log_x=True, log_y=False)
fig.show()
fig = px.scatter(x=df["BIN_AIC"] - df["MK_AIC"], y=df["BIN_difficult"], log_x=True, log_y=False)
fig.show()
fig = px.scatter(x=df["MK_AIC"] - df["GTR_AIC"], y=df["MK_difficult"], log_x=True, log_y=False)
fig.show()
fig = px.scatter(x=df["MK_AIC"] - df["GTR_AIC"], y=df["GTR_difficult"], log_x=True, log_y=False)
fig.show()

fig = px.scatter(x=df["BIN_AIC"] - df["GTR_AIC"], y=-df['GTR_cross_diff_BIN'], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["BIN_AIC"] - df["GTR_AIC"], y=-df['BIN_cross_diff_GTR'], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["BIN_AIC"] - df["MK_AIC"], y=-df["MK_cross_diff_BIN"], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["BIN_AIC"] - df["MK_AIC"], y=-df["BIN_cross_diff_MK"], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["MK_AIC"] - df["GTR_AIC"], y=-df["GTR_cross_diff_MK"], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["MK_AIC"] - df["GTR_AIC"], y=-df["MK_cross_diff_GTR"], log_x=True, log_y=True)
fig.show()

fig = px.scatter(x=df["BIN_AIC"] - df["GTR_AIC"], y=df['consensus_dist_BIN_GTR'], log_x=True, log_y=False)
fig.show()
fig = px.scatter(x=df["BIN_AIC"] - df["MK_AIC"], y=df["consensus_dist_BIN_MK"], log_x=True, log_y=False)
fig.show()
fig = px.scatter(x=df["MK_AIC"] - df["GTR_AIC"], y=df["consensus_dist_GTR_MK"], log_x=True, log_y=False)
fig.show()

fig = px.scatter(x=df["GTR_difficult"] - df["BIN_difficult"], y=df["GTR_difficult"], log_x=False, log_y=False)
fig.show()
fig = px.scatter(x=df["BIN_difficult"] - df["GTR_difficult"], y=df["BIN_difficult"], log_x=False, log_y=False)
fig.show()
fig = px.scatter(x=df["MK_difficult"] - df["BIN_difficult"], y=df["MK_difficult"], log_x=False, log_y=False)
fig.show()
fig = px.scatter(x=df["BIN_difficult"] - df["MK_difficult"], y=df["BIN_difficult"], log_x=False, log_y=False)
fig.show()
fig = px.scatter(x=df["GTR_difficult"] - df["MK_difficult"], y=df["GTR_difficult"], log_x=False, log_y=False)
fig.show()
fig = px.scatter(x=df["MK_difficult"] - df["GTR_difficult"], y=df["MK_difficult"], log_x=False, log_y=False)
fig.show()

fig = px.scatter(x=df["GTR_difficult"] - df["BIN_difficult"], y=df["consensus_dist_BIN_GTR"], log_x=False, log_y=False)
fig.show()
fig = px.scatter(x=df["MK_difficult"] - df["BIN_difficult"], y=df["consensus_dist_BIN_MK"], log_x=False, log_y=False)
fig.show()
fig = px.scatter(x=df["GTR_difficult"] - df["MK_difficult"], y=df["consensus_dist_GTR_MK"], log_x=False, log_y=False)
fig.show()