In [1]:
#Analysis for comparing results of morphological data in BIN and MULTI representation
#Consider the LH of the tree found with BIN alignment under the MULTI alignment and model
#and the other way around

#Distances of best trees / consensus trees

#Correlations

In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from plotly import graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
from ete3 import Tree
import numpy as np

In [2]:
from colour import Color

class color_concept:
    
    kit_green = "#009682"
    kit_orange = "#DF9B1B"
    kit_blue = "#0A64AA"
    kit_maigreen = "#8CB63C"
    kit_yellow = "#FCE500"
    kit_red = "#A22223"
    kit_lila = "#A3107C"
    kit_cyan = "#23A1E0"
    
    
    data_type = ""
    model_color = {}
    
    morph_model_colors = {
        "BIN" : kit_cyan,
        "GTR" : kit_orange,
        "MK" : kit_lila
    }
    
    morph_diff_colors = {
        "BIN_GTR" : kit_maigreen,
        "BIN_MK" : kit_blue,
        "GTR_MK" : kit_red
    }
    
    lang_model_colors = {
        "BIN" : kit_red,
        "GTR" : kit_maigreen,
        "MK" : kit_blue
    }
    
    lang_diff_colors = {
        "BIN_GTR" : kit_orange,
        "BIN_MK" : kit_lila,
        "GTR_MK" : kit_cyan
    }
    
    lang_subtype_colors = {
        "cc" : kit_lila,
        "sc" : kit_orange,
        "ms" : kit_maigreen
        #"mp" :
    }
    
    
    
    def __init__(self, data_type):
        self.data_type = data_type
        if data_type == 'morph':
            self.model_colors = self.morph_model_colors
            self.diff_colors = self.morph_diff_colors
        if data_type == 'lang':
            self.model_colors = self.lang_model_colors
            self.diff_colors = self.lang_diff_colors
    
    def get_model_color(self, model):
        return self.model_colors[model]
    
    def get_diff_color(self, model1, model2):
        #color1 = Color(self.get_model_color(model1))
        #color2 = Color(self.get_model_color(model2))
        #colors = list(color1.range_to(color2 ,5))
        #return colors[1].hex
        s = model1 + "_" + model2
        return self.diff_colors[s]
    
    def get_subtype_color(self, subtype):
        return self.lang_subtype_colors[subtype]

In [14]:
data_type = "morph"
models = ["BIN", "GTR", "MK"]
cc = color_concept("morph")
df = pd.read_parquet("training_data/" +data_type + "/extended.parquet")

In [4]:
    


    

def scatter(df, col1, col2, factor_x=1, factor_y=1, log_x=False, log_y=False):
    df2 = df.copy()
    df2[col1] = factor_x* df2[col1]
    df2[col2] = factor_y* df2[col2]
    fig = px.scatter(df2, x=col1, y=col2, log_x=log_x, log_y=log_y)
    fig.show()

def print_corr(col1, col2):
    corr = df[col1].corr(df[col2])
    print("\t".join([col1, col2, str(corr)]))

In [5]:
def x_values(df, col, eliminate_outlier):
    if eliminate_outlier:
        lower = df[col].quantile(0.1)
        upper = df[col].quantile(0.9)
    else:
        lower = df[col].min()
        upper = df[col].max()
    return df.loc[df[col].between(lower, upper)][col]

def compare_modelwise(df, col, eliminate_outlier, title):
    fig = make_subplots(
        rows=1,
        cols=1,
        subplot_titles=[title]
    )

    for (i, M) in enumerate(models):
        fig.append_trace(
            go.Histogram(
                x=x_values(df, M + "_" + col, eliminate_outlier),
                name=M,
                showlegend=True,
                marker_color = cc.get_model_color(M)
            ),
            row=1,
            col=1
        )
    return fig

def compare_modelwise_log(df, col, factor):
    fig = make_subplots(
        rows=1,
        cols=1,
        subplot_titles=[col]
    )
    arrs = []
    for M in models:
        arrs.append(df[M + '_' + col])
    vals = np.concatenate(arrs)
    print(vals.max())
    if factor == -1:
        _ , bins = np.histogram(-vals, bins=np.logspace(np.log10(-vals.max()), np.log10(-vals.min()), 20))
    else:
        _ , bins = np.histogram(vals, bins=np.logspace(np.log10(vals.min()), np.log10(vals.max()), 20))
    center_bins = 0.5 * (bins[:-1] + bins[1:])
    real_bins = np.exp(bins)-1
    ranges = np.stack((real_bins[:-1], real_bins[1:]), axis=-1)
    print(bins)
    for i, M in enumerate(models):
        vals =df[M + '_' + col]
        if factor == -1:
            counts, _ = np.histogram(-vals, bins=bins)
        else:
            counts, _ = np.histogram(vals, bins=bins)
        if factor == -1:
            counts = np.flip(counts)
            labels = ["-" + str(np.round(bini, 3)) for bini in np.flip(bins)]
        else:
            labels = [str(np.round(bini, 3)) for bini in bins]
            
        fig.append_trace(
            go.Bar(
                y=counts, x=labels,
                name=M,
                showlegend=True,
                marker_color = cc.get_model_color(M)
            ),
            row=1,
            col=1
        )

        fig.update_traces(customdata=ranges)
    return fig

def scatter_modelwise(df, col, title, log=False):
    fig = go.Figure(layout_title_text = title)
    for i, M in enumerate(models):
        vals = df[M + "_" + col]
        fig.add_trace(go.Scatter(
            x = np.arange(len(vals)),
            y=vals,
            mode='markers',
            name =M,
            marker_color=cc.get_model_color(M)
        ))
    if log:
        fig.update_yaxes(type="log")
    return fig

def modelwise_correlation(df, col, factor, log):
    scatter(df, "BIN_" + col, "GTR_" + col, factor_x=factor, factor_y=factor, log_x=log, log_y=log)
    scatter(df, "BIN_" + col, "MK_" + col, factor_x=factor, factor_y=factor, log_x=log, log_y=log)
    scatter(df, "GTR_" + col, "MK_" + col, factor_x=factor, factor_y=factor, log_x=log, log_y=log)
    
def correlation_of_columns(df, col1, factor1, log1, col2, factor2, log2):
    scatter(df, "BIN_" + col1, "BIN_" + col2, factor_x=factor1, factor_y=factor2, log_x=log1, log_y=log2)
    scatter(df, "GTR_" + col1, "GTR_" + col2, factor_x=factor1, factor_y=factor2, log_x=log1, log_y=log2)
    scatter(df, "MK_" + col1, "MK_" + col2, factor_x=factor1, factor_y=factor2, log_x=log1, log_y=log2)
    

    


In [65]:
#DIFFICULT
#compare_modelwise(df, "difficult", False, "Difficulty Score").update_layout(template="plotly_white", height=500)
#compare_modelwise(df, "num_topos_search", False, "Number of different topologies in 100 tree searches").update_layout(template="plotly_white", height=500)
compare_modelwise(df, "avg_rfdist_search", False, "Average RF Distance of trees found in 100 tree searches").update_layout(template="plotly_white", height=500)
#scatter(df, "BIN_difficult", "MK_difficult")

#fig = px.scatter(x=df["GTR_difficult"] - df["BIN_difficult"], y=df["GTR_difficult"] - df["MK_difficult"], log_x=False, log_y=False)
#fig.show()
#fig = px.scatter(x=df["BIN_difficult"] - df["MK_difficult"], y=df["BIN_difficult"] - df["GTR_difficult"], log_x=False, log_y=False)
#fig.show()
#fig = px.scatter(x=df["MK_difficult"] - df["BIN_difficult"], y=df["MK_difficult"] - df["GTR_difficult"], log_x=False, log_y=False)
#fig.show()

In [78]:
# AICs
scatter_modelwise(df, "AIC", "AIC Scores", True).update_layout(template="plotly_white", height=500)

In [49]:
fig = make_subplots(
    rows=1,
    cols=1,
    subplot_titles=["Differences of AIC Scores"]
)

df2 = df.copy()
df2["diff1"] = df['BIN_AIC'] - df['MK_AIC']
df2["diff2"] = df['MK_AIC'] - df['GTR_AIC']
    
fig.append_trace(
    go.Histogram(
        x=x_values(df2, 'diff1', True),
        name= "AIC(BIN) - AIC(MK)",
        marker_color = cc.get_diff_color("BIN", "MK"),
        showlegend=True
        #, histnorm="percent"
    ),
    row=1,
    col=1
)

fig.append_trace(
    go.Histogram(
        x=x_values(df2, 'diff2', True),
        name="AIC(MK) - AIC(GTR)",
        marker_color = cc.get_diff_color("GTR", "MK"),
        showlegend=True
        #, histnorm="percent"
    ),
    row=1,
    col=1
)
fig.update_layout(template="plotly_white", height=500)

In [68]:
# TREE DIFFS
#scatter(df, "consensus_dist_BIN_GTR", "consensus_dist_BIN_MK")
#scatter(df, "consensus_dist_BIN_MK", "consensus_dist_GTR_MK")
scatter(df, "consensus_dist_GTR_MK", "consensus_dist_BIN_GTR")
# only the last one is correlated --> Indicates similar behaviour of BIN and MK 

#scatter(df, "eval_dist_BIN_GTR", "eval_dist_BIN_MK")
#scatter(df, "eval_dist_BIN_MK", "eval_dist_GTR_MK")
scatter(df, "eval_dist_GTR_MK", "eval_dist_BIN_GTR")
# same as above, correlation even stronger

In [69]:
fig = make_subplots(
    rows=1,
    cols=1,
    #subplot_titles=["RF Distances between consensus trees"]
    subplot_titles=["RF Distances between best trees"]
)

    
fig.append_trace(
    go.Histogram(
        x=df['eval_dist_BIN_GTR'],
        #x=df['consensus_dist_BIN_GTR'],
        name="BIN to GTR",
        marker_color = cc.get_diff_color("BIN", "GTR"),
        showlegend=True
        #, histnorm="percent"
    ),
    row=1,
    col=1
)

fig.append_trace(
    go.Histogram(
        x=df['eval_dist_BIN_MK'],
        #x=df['consensus_dist_BIN_MK'],
        name="BIN to MK",
        marker_color =cc.get_diff_color("BIN", "MK"),
        showlegend=True
        #, histnorm="percent"
    ),
    row=1,
    col=1
)

fig.append_trace(
    go.Histogram(
        x=df['eval_dist_GTR_MK'],
        #x=df['consensus_dist_GTR_MK'],
        name="GTR to MK",
        marker_color = cc.get_diff_color("GTR", "MK"),
        showlegend=True
        #, histnorm="percent"
    ),
    row=1,
    col=1
)
fig.update_layout(template="plotly_white", height=500)

In [34]:
print_corr("consensus_dist_BIN_GTR", "eval_dist_BIN_GTR")
print_corr("consensus_dist_BIN_MK", "eval_dist_BIN_MK")
print_corr("consensus_dist_GTR_MK", "eval_dist_GTR_MK")
print("")
print_corr("consensus_dist_BIN_GTR", "consensus_dist_BIN_MK")
print_corr("consensus_dist_BIN_GTR", "consensus_dist_GTR_MK")
print_corr("consensus_dist_BIN_MK", "consensus_dist_GTR_MK")
print("")
print_corr("eval_dist_BIN_GTR", "eval_dist_BIN_MK")
print_corr("eval_dist_BIN_GTR", "eval_dist_GTR_MK")
print_corr("eval_dist_BIN_MK", "eval_dist_GTR_MK")

consensus_dist_BIN_GTR	eval_dist_BIN_GTR	0.7335613764565035
consensus_dist_BIN_MK	eval_dist_BIN_MK	0.4694731989927272
consensus_dist_GTR_MK	eval_dist_GTR_MK	0.7516775691190171

consensus_dist_BIN_GTR	consensus_dist_BIN_MK	0.3075558880045399
consensus_dist_BIN_GTR	consensus_dist_GTR_MK	0.8402274070523765
consensus_dist_BIN_MK	consensus_dist_GTR_MK	0.38089104815850644

eval_dist_BIN_GTR	eval_dist_BIN_MK	0.3999204002560998
eval_dist_BIN_GTR	eval_dist_GTR_MK	0.8890867121249629
eval_dist_BIN_MK	eval_dist_GTR_MK	0.44796869744144463


In [67]:
# CROSS DIFFS
scatter(df, "BIN_cross_diff_GTR", "GTR_cross_diff_BIN", factor_x=-1, factor_y=-1, log_x=True, log_y=True)
scatter(df, "BIN_cross_diff_MK", "MK_cross_diff_BIN", factor_x=-1, factor_y=-1, log_x=True, log_y=True)
scatter(df, "MK_cross_diff_GTR", "GTR_cross_diff_MK", factor_x=-1, factor_y=-1, log_x=True, log_y=True)

In [41]:
scatter(df, "BIN_cross_diff_MK", "BIN_cross_diff_GTR", factor_x=-1, factor_y=-1, log_x=True, log_y=True)
scatter(df, "GTR_cross_diff_MK", "GTR_cross_diff_BIN", factor_x=-1, factor_y=-1, log_x=True, log_y=True)
scatter(df, "MK_cross_diff_GTR", "MK_cross_diff_BIN", factor_x=-1, factor_y=-1, log_x=True, log_y=True)

In [73]:

def plot_cross_difference(M_1):
    fig = make_subplots(
        rows=1,
        cols=1,
        subplot_titles=["Cross differences M_1=" + M_1]
    )
    arrs = []
    for i, M_2 in enumerate(models):
        if M_2 == M_1:
            continue
        arrs.append(df[M_1 + '_cross_diff_' + M_2])
    vals = np.concatenate(arrs)
    print(vals.max())
    _ , bins = np.histogram(-vals, bins=np.logspace(np.log10(0.01), np.log10(500), 10))
    center_bins = 0.5 * (bins[:-1] + bins[1:])
    real_bins = np.exp(bins)-1
    ranges = np.stack((real_bins[:-1], real_bins[1:]), axis=-1)
    print(bins)
    for i, M_2 in enumerate(models):
        if M_2 == M_1:
            continue
        col =df[M_1 + '_cross_diff_' + M_2]
        counts, _ = np.histogram(-col, bins=bins)
        print(counts)
        fig.append_trace(
            go.Bar(
                #x=df[M_1 + '_cross_diff_' + M_2],
                y=np.flip(counts), x=['-' + str(np.round(bini, 3)) for bini in np.flip(bins)],
                name="M_2=" + M_2,
                showlegend=True,
                marker_color = cc.get_model_color(M_2)
            #, histnorm="percent"
        ),
            row=1,
            col=1
        )
        fig.update_traces(customdata=ranges)
    fig.update_layout(template="plotly_white", height=500)

    return fig
                      
#plot_cross_difference("BIN")
#plot_cross_difference("GTR")
plot_cross_difference("MK")

-9.999999996956888e-07
[1.00000000e-02 3.32742119e-02 1.10717318e-01 3.68403150e-01
 1.22583245e+00 4.07886087e+00 1.35720881e+01 4.51600535e+01
 1.50266519e+02 5.00000000e+02]
[  3   6  23  31 107 117  69  11   6]
[  0   0   4  15  34  71 141  96  11]


In [22]:
#scatter(df, "GTR_cross_diff_MK", "GTR_llh_eval")
fig = px.scatter(x= df["GTR_cross_diff_MK"]/df["GTR_llh_eval"], y=-df["GTR_llh_eval"], log_x=False, log_y=True)
fig.show()

fig = px.scatter(x= df["GTR_cross_diff_MK"]/df["GTR_llh_eval"], y=-df["GTR_cross_diff_MK"], log_x=False, log_y=True)
fig.show()


In [70]:
print_corr("BIN_cross_diff_MK", "MK_cross_diff_BIN")
print_corr("BIN_cross_diff_GTR", "GTR_cross_diff_BIN")
print_corr("GTR_cross_diff_MK", "MK_cross_diff_GTR")
print("")
print_corr("consensus_dist_BIN_MK", "MK_cross_diff_BIN")
print_corr("consensus_dist_BIN_GTR", "GTR_cross_diff_BIN")
print_corr("consensus_dist_GTR_MK", "MK_cross_diff_GTR")
print("")
print_corr("BIN_cross_diff_MK", "consensus_dist_BIN_MK")
print_corr("BIN_cross_diff_GTR", "consensus_dist_BIN_GTR")
print_corr("GTR_cross_diff_MK", "consensus_dist_GTR_MK")
print("")
print_corr("BIN_cross_diff_MK", "BIN_cross_diff_GTR")
print_corr("MK_cross_diff_GTR", "MK_cross_diff_BIN")
print_corr("GTR_cross_diff_MK", "GTR_cross_diff_BIN")
print("")
print_corr("BIN_cross_diff_MK", "BIN_llh_eval")
print_corr("BIN_cross_diff_GTR", "BIN_llh_eval")
print_corr("MK_cross_diff_GTR", "MK_llh_eval")
print_corr("MK_cross_diff_BIN", "MK_llh_eval")
print_corr("GTR_cross_diff_MK", "GTR_llh_eval")
print_corr("GTR_cross_diff_BIN", "GTR_llh_eval")

BIN_cross_diff_MK	MK_cross_diff_BIN	0.9719380553706726
BIN_cross_diff_GTR	GTR_cross_diff_BIN	0.8886347317992322
GTR_cross_diff_MK	MK_cross_diff_GTR	0.9241169403887765

consensus_dist_BIN_MK	MK_cross_diff_BIN	-0.031245441220729572
consensus_dist_BIN_GTR	GTR_cross_diff_BIN	-0.04349444976212199
consensus_dist_GTR_MK	MK_cross_diff_GTR	-0.06409682997417646

BIN_cross_diff_MK	consensus_dist_BIN_MK	0.0038268488246610663
BIN_cross_diff_GTR	consensus_dist_BIN_GTR	-0.03775893858869662
GTR_cross_diff_MK	consensus_dist_GTR_MK	-0.050407335292447064

BIN_cross_diff_MK	BIN_cross_diff_GTR	0.9211003190857674
MK_cross_diff_GTR	MK_cross_diff_BIN	0.3442114141370359
GTR_cross_diff_MK	GTR_cross_diff_BIN	0.72674558782105

BIN_cross_diff_MK	BIN_llh_eval	0.417099218295707
BIN_cross_diff_GTR	BIN_llh_eval	0.617820587323194
MK_cross_diff_GTR	MK_llh_eval	0.7786411828793995
MK_cross_diff_BIN	MK_llh_eval	0.4899541615020721
GTR_cross_diff_MK	GTR_llh_eval	0.7799423758776505
GTR_cross_diff_BIN	GTR_llh_eval	0.7780853126

In [82]:
# ENTROPY / AVG_COL_STATES
fig = px.scatter(x=df["GTR_entropy"] / df["BIN_entropy"], y=df["avg_col_states"], log_x=False, log_y=False, labels={
                     "x": "H*/H",
                     "y": "n_avg"})

#fig = px.scatter(x=((df["GTR_entropy"] / df["BIN_entropy"]) - df["avg_col_states"]) / df["avg_col_states"], y=df["avg_col_states"],
#                 log_x=False, log_y=False)
fig.show()

In [18]:
df["avg_col_states"].corr(df["GTR_entropy"] / df["BIN_entropy"])
#df["avg_col_states"].corr(df["GTR_entropy"] / df["BIN_entropy"], method='spearman')
#df["avg_col_states"].corr(df["GTR_entropy"] / df["BIN_entropy"], method='kendall')

0.9945543821999171

In [17]:
fig = px.scatter(x=df["BIN_AIC"] - df["GTR_AIC"], y=df["avg_col_states"], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["MK_AIC"] - df["GTR_AIC"], y=df["avg_col_states"], log_x=True, log_y=True)
fig.show()

In [None]:
scatter(df, "BIN_cross_diff_GTR", "avg_col_states", factor_x = -1, log_x = True,log_y=True)

In [94]:
def compare_alignment_property(df, col, title, eliminate_outlier=False):
    fig = make_subplots(
        rows=1,
        cols=1,
        subplot_titles=[title]
    )

    
    fig.append_trace(
        go.Histogram(
            x=x_values(df, 'BIN_' + col, eliminate_outlier),
            name= "binary",
            marker_color = cc.get_model_color("BIN"),
            showlegend=True
        ),
        row=1,
        col=1
    )

    fig.append_trace(
        go.Histogram(
            x=x_values(df, 'GTR_' + col, eliminate_outlier),
            name="multi-value",
            marker_color = cc.get_model_color("GTR"),
            showlegend=True
            #, histnorm="percent"
        ),
        row=1,
        col=1
    )
    return fig


#compare_alignment_property(df, "entropy", "Entropy in biological morphological alignments"
#                          ).update_layout(template="plotly_white", height=500)
#compare_alignment_property(df, "num_sites", "Number of sites", True
#                          ).update_layout(template="plotly_white", height=500)

#compare_alignment_property(df, "num_patterns", "Number of patterns", True
#                          ).update_layout(template="plotly_white", height=500)

compare_alignment_property(df, "proportion_invariant", "Proportion invariant", True
                          ).update_layout(template="plotly_white", height=500)




In [52]:
def ratio(df, col, name):
    fig = px.scatter(x=df["avg_col_states"], y=df["BIN_" + col] / df["GTR_" + col],  log_x=False, log_y=False, labels={
                     "x": "ratio of " + name,
                     "y": "n_avg"})
    print(df["avg_col_states"].corr(df["BIN_" + col] / df["GTR_" + col]))
    fig.show()
    
def correlate_property(df, col, name):
    fig = px.scatter(x=df["GTR_" + col], y=df["BIN_" + col],  log_x=False, log_y=False, labels={
                     "x": "multi-value",
                     "y": "binary"},
                    title=name)
    print(df["GTR_" + col].corr(df["BIN_" + col]))
    fig.show()
    
#ratio(df, "entropy", "entropy")   
ratio(df, "num_sites", "number of sites")
#correlate_property(df, "proportion_invariant", "number of patterns")



i = (df["BIN_num_sites"] * df["BIN_proportion_invariant"])
i_star = (df["GTR_num_sites"] * df["GTR_proportion_invariant"])
o = i - i_star
fig = px.scatter(x=df["BIN_num_patterns"], y=df["GTR_num_patterns"] + o,  log_x=False, log_y=False, labels={
                     "x": "p",
                     "y": "p* + (i - i*)"})
#print(df["BIN_num_patterns"].corr(df["GTR_num_patterns"] + o))
#fig.show()


    

0.9979594604643786


In [22]:
fig = make_subplots(
    rows=1,
    cols=1,
    subplot_titles=["Number of values for characteristics"]
)

    
fig.append_trace(
    go.Histogram(
        x=df['max_states'],
        name="n_max",
        showlegend=True
        #, histnorm="percent"
    ),
    row=1,
    col=1
)

fig.append_trace(
    go.Histogram(
        x=df['avg_col_states'],
        name="n_avg",
        showlegend=True
        #, histnorm="percent"
    ),
    row=1,
    col=1
)
fig.update_layout(template="plotly_white", height=500)

In [None]:
scatter(df, "consensus_dist_BIN_GTR", "eval_dist_BIN_GTR")
scatter(df, "consensus_dist_BIN_MK", "eval_dist_BIN_MK")
scatter(df, "consensus_dist_GTR_MK", "eval_dist_GTR_MK")

scatter(df, "consensus_dist_BIN_GTR", "BIN_difficult")
scatter(df, "consensus_dist_BIN_GTR", "GTR_difficult")
scatter(df, "consensus_dist_BIN_MK", "BIN_difficult")
scatter(df, "consensus_dist_BIN_MK", "MK_difficult")
scatter(df, "consensus_dist_GTR_MK", "GTR_difficult")
scatter(df, "consensus_dist_GTR_MK", "MK_difficult")

scatter(df, "consensus_dist_BIN_GTR", "BIN_llh_eval", factor_y=-1, log_y=True)
scatter(df, "consensus_dist_BIN_GTR", "GTR_llh_eval", factor_y=-1, log_y=True)
scatter(df, "consensus_dist_BIN_MK", "BIN_llh_eval", factor_y=-1, log_y=True)
scatter(df, "consensus_dist_BIN_MK", "MK_llh_eval", factor_y=-1, log_y=True)
scatter(df, "consensus_dist_GTR_MK", "GTR_llh_eval", factor_y=-1, log_y=True)
scatter(df, "consensus_dist_GTR_MK", "MK_llh_eval", factor_y=-1, log_y=True)

scatter(df, "consensus_dist_BIN_GTR", "BIN_AIC", factor_y=1, log_y=True)
scatter(df, "consensus_dist_BIN_GTR", "GTR_AIC", factor_y=1, log_y=True)
scatter(df, "consensus_dist_BIN_MK", "BIN_AIC", factor_y=1, log_y=True)
scatter(df, "consensus_dist_BIN_MK", "MK_AIC", factor_y=1, log_y=True)
scatter(df, "consensus_dist_GTR_MK", "GTR_AIC", factor_y=1, log_y=True)
scatter(df, "consensus_dist_GTR_MK", "MK_AIC", factor_y=1, log_y=True)

scatter(df, "BIN_difficult", "BIN_cross_diff_GTR", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "BIN_difficult", "BIN_cross_diff_MK", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "GTR_difficult", "GTR_cross_diff_BIN", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "GTR_difficult", "GTR_cross_diff_MK", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "MK_difficult", "MK_cross_diff_BIN", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "MK_difficult", "MK_cross_diff_GTR", factor_x=1, factor_y=-1, log_x=False, log_y=True)

scatter(df, "consensus_dist_BIN_GTR", "BIN_cross_diff_GTR", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "consensus_dist_BIN_MK", "BIN_cross_diff_MK", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "consensus_dist_BIN_GTR", "GTR_cross_diff_BIN", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "consensus_dist_GTR_MK", "GTR_cross_diff_MK", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "consensus_dist_BIN_MK", "MK_cross_diff_BIN", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "consensus_dist_GTR_MK", "MK_cross_diff_GTR", factor_x=1, factor_y=-1, log_x=False, log_y=True)

scatter(df, "eval_dist_BIN_GTR", "BIN_cross_diff_GTR", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "eval_dist_BIN_MK", "BIN_cross_diff_MK", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "eval_dist_BIN_GTR", "GTR_cross_diff_BIN", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "eval_dist_GTR_MK", "GTR_cross_diff_MK", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "eval_dist_BIN_MK", "MK_cross_diff_BIN", factor_x=1, factor_y=-1, log_x=False, log_y=True)
scatter(df, "eval_dist_GTR_MK", "MK_cross_diff_GTR", factor_x=1, factor_y=-1, log_x=False, log_y=True)

scatter(df, "BIN_llh_eval", "BIN_cross_diff_GTR", factor_x=-1, factor_y=-1, log_x=True, log_y=True)
scatter(df, "BIN_llh_eval", "BIN_cross_diff_MK", factor_x=-1, factor_y=-1, log_x=True, log_y=True)
scatter(df, "GTR_llh_eval", "GTR_cross_diff_BIN", factor_x=-1, factor_y=-1, log_x=True, log_y=True)
scatter(df, "GTR_llh_eval", "GTR_cross_diff_MK", factor_x=-1, factor_y=-1, log_x=True, log_y=True)
scatter(df, "MK_llh_eval", "MK_cross_diff_BIN", factor_x=-1, factor_y=-1, log_x=True, log_y=True)
scatter(df, "MK_llh_eval", "MK_cross_diff_GTR", factor_x=-1, factor_y=-1, log_x=True, log_y=True)

fig = px.scatter(x=df["BIN_AIC"] - df["GTR_AIC"], y=df["BIN_AIC"] - df["MK_AIC"], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["BIN_AIC"] - df["MK_AIC"], y=df["MK_AIC"] - df["GTR_AIC"], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["MK_AIC"] - df["GTR_AIC"], y=df["BIN_AIC"] - df["GTR_AIC"], log_x=True, log_y=True)
fig.show()

fig = px.scatter(x=df["BIN_AIC"] - df["GTR_AIC"], y=df['GTR_AIC'], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["BIN_AIC"] - df["GTR_AIC"], y=df['BIN_AIC'], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["BIN_AIC"] - df["MK_AIC"], y=df["MK_AIC"], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["BIN_AIC"] - df["MK_AIC"], y=df["BIN_AIC"], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["MK_AIC"] - df["GTR_AIC"], y=df["MK_AIC"], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["MK_AIC"] - df["GTR_AIC"], y=df["GTR_AIC"], log_x=True, log_y=True)
fig.show()

fig = px.scatter(x=df["BIN_AIC"] - df["GTR_AIC"], y=df['GTR_difficult'], log_x=True, log_y=False)
fig.show()
fig = px.scatter(x=df["BIN_AIC"] - df["GTR_AIC"], y=df['BIN_difficult'], log_x=True, log_y=False)
fig.show()
fig = px.scatter(x=df["BIN_AIC"] - df["MK_AIC"], y=df["MK_difficult"], log_x=True, log_y=False)
fig.show()
fig = px.scatter(x=df["BIN_AIC"] - df["MK_AIC"], y=df["BIN_difficult"], log_x=True, log_y=False)
fig.show()
fig = px.scatter(x=df["MK_AIC"] - df["GTR_AIC"], y=df["MK_difficult"], log_x=True, log_y=False)
fig.show()
fig = px.scatter(x=df["MK_AIC"] - df["GTR_AIC"], y=df["GTR_difficult"], log_x=True, log_y=False)
fig.show()

fig = px.scatter(x=df["BIN_AIC"] - df["GTR_AIC"], y=-df['GTR_cross_diff_BIN'], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["BIN_AIC"] - df["GTR_AIC"], y=-df['BIN_cross_diff_GTR'], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["BIN_AIC"] - df["MK_AIC"], y=-df["MK_cross_diff_BIN"], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["BIN_AIC"] - df["MK_AIC"], y=-df["BIN_cross_diff_MK"], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["MK_AIC"] - df["GTR_AIC"], y=-df["GTR_cross_diff_MK"], log_x=True, log_y=True)
fig.show()
fig = px.scatter(x=df["MK_AIC"] - df["GTR_AIC"], y=-df["MK_cross_diff_GTR"], log_x=True, log_y=True)
fig.show()

fig = px.scatter(x=df["BIN_AIC"] - df["GTR_AIC"], y=df['consensus_dist_BIN_GTR'], log_x=True, log_y=False)
fig.show()
fig = px.scatter(x=df["BIN_AIC"] - df["MK_AIC"], y=df["consensus_dist_BIN_MK"], log_x=True, log_y=False)
fig.show()
fig = px.scatter(x=df["MK_AIC"] - df["GTR_AIC"], y=df["consensus_dist_GTR_MK"], log_x=True, log_y=False)
fig.show()

fig = px.scatter(x=df["GTR_difficult"] - df["BIN_difficult"], y=df["GTR_difficult"], log_x=False, log_y=False)
fig.show()
fig = px.scatter(x=df["BIN_difficult"] - df["GTR_difficult"], y=df["BIN_difficult"], log_x=False, log_y=False)
fig.show()
fig = px.scatter(x=df["MK_difficult"] - df["BIN_difficult"], y=df["MK_difficult"], log_x=False, log_y=False)
fig.show()
fig = px.scatter(x=df["BIN_difficult"] - df["MK_difficult"], y=df["BIN_difficult"], log_x=False, log_y=False)
fig.show()
fig = px.scatter(x=df["GTR_difficult"] - df["MK_difficult"], y=df["GTR_difficult"], log_x=False, log_y=False)
fig.show()
fig = px.scatter(x=df["MK_difficult"] - df["GTR_difficult"], y=df["MK_difficult"], log_x=False, log_y=False)
fig.show()

fig = px.scatter(x=df["GTR_difficult"] - df["BIN_difficult"], y=df["consensus_dist_BIN_GTR"], log_x=False, log_y=False)
fig.show()
fig = px.scatter(x=df["MK_difficult"] - df["BIN_difficult"], y=df["consensus_dist_BIN_MK"], log_x=False, log_y=False)
fig.show()
fig = px.scatter(x=df["GTR_difficult"] - df["MK_difficult"], y=df["consensus_dist_GTR_MK"], log_x=False, log_y=False)
fig.show()