In [None]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib as mpl
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from mpl_toolkits.axes_grid1 import AxesGrid
import matplotlib.ticker as plticker
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
mpl.rcParams["figure.dpi"] = 300
mpl.rcParams['axes.xmargin'] = 0
mpl.rcParams['axes.ymargin'] = 0
mpl.rcParams['font.family'] = 'sans-serif'
mpl.rcParams['font.sans-serif'] = ['Arial']

In [None]:
df_syn = pd.read_csv("MUBDsyn_ML_validation.csv")

In [None]:
df_syn = df_syn.sort_values(by=["AVE bias"])

In [None]:
df_syn

In [None]:
corr_mtx_syn = df_syn.corr(method="pearson")

In [None]:
df_ave_corr_syn = corr_mtx_syn["AVE bias"].sort_values(ascending=False)
df_aaad_corr_syn = corr_mtx_syn["AA-AD"].sort_values(ascending=False)
df_ddda_corr_syn = corr_mtx_syn["DD-DA"].sort_values(ascending=False)

In [None]:
AVE = df_syn["AVE bias"].to_numpy()
AAAD = df_syn["AA-AD"].to_numpy()
DDDA = df_syn["DD-DA"].to_numpy()
data1 = np.stack((AVE, AAAD, DDDA), axis=0)

In [None]:
KNN = df_syn["KNN"].to_numpy()
LR = df_syn["LR"].to_numpy()
RF = df_syn["RF"].to_numpy()
SVM = df_syn["SVM"].to_numpy()
data2 = np.stack((KNN, LR, RF, SVM), axis=0)

In [None]:
targets= np.arange(1,11)
bias = ["AVE bias", "AA-AD", "DD-DA"]
ml = ["1NN", "LR", "RF", "SVM"]

fig, ax = plt.subplots(2, 3, figsize=(28, 14),gridspec_kw={'width_ratios': [2, 1, 1]})

im1 = ax[0,0].imshow(data1, cmap="coolwarm")
im2 = ax[1,0].imshow(data2, cmap="BuGn")
cbarlabel_1 = "Bias value"
cbarlabel_2 = "MCC"

divider_1 = make_axes_locatable(ax[0,0])
cax_1 = divider_1.new_vertical(size = '5%', pad = 0.7)
fig.add_axes(cax_1)

divider_2 = make_axes_locatable(ax[1,0])
cax_2 = divider_2.new_vertical(size = '5%', pad = 0.7)
fig.add_axes(cax_2)

cbar1 = ax[0,0].figure.colorbar(im1, cax=cax_1, orientation = "horizontal",
        ticks=[-0.09, -0.05, 0.00, 0.05, 0.10, 0.15, 0.19])
cbar1.ax.set_title(cbarlabel_1, size="26")
cbar1.ax.set_xticklabels(["-0.09", "-0.05", "0.00", "0.05", "0.10", "0.15","0.19"], fontsize=26)

cbar2 = ax[1,0].figure.colorbar(im2, cax=cax_2, orientation = "horizontal",
        ticks=[0.00, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.75])
cbar2.ax.set_title(cbarlabel_2, size="26")
cbar2.ax.set_xticklabels(["0.00", "0.10", "0.20", "0.30", "0.40", "0.50", "0.60", "0.70", "0.75"], fontsize=26)

ax[0,0].set_xticks(np.arange(data1.shape[1]))
ax[0,0].set_yticks(np.arange(data1.shape[0]))
ax[0,0].spines[:].set_visible(False)
ax[0,0].set_xticks(np.arange(data1.shape[1]+1)-.5, minor=True)
ax[0,0].set_yticks(np.arange(data1.shape[0]+1)-.5, minor=True)
ax[0,0].set_xticklabels(targets, fontsize=26)
ax[0,0].set_yticklabels(bias, fontsize=26)
ax[0,0].grid(which="minor", color="w", linestyle='-', linewidth=1.5)
ax[0,0].tick_params(which="minor", bottom=False, left=False, labelsize=26)
ax[0,0].set_xlabel("Case", size="26")

threshold = im1.norm(0)
texts = []
textcolors=("snow","black",)
kw = dict(horizontalalignment="center",
            verticalalignment="center")
valfmt = matplotlib.ticker.StrMethodFormatter("{x:.2f}")

for i in range(data1.shape[0]):
    for j in range(data1.shape[1]):
            kw.update(color=textcolors[int(im1.norm(data1[i, j]) > threshold)])
            text = im1.axes.text(j, i, valfmt(data1[i, j], None), kw, fontsize=27)
            texts.append(text)

ax[1,0].set_xticks(np.arange(data2.shape[1]))
ax[1,0].set_yticks(np.arange(data2.shape[0]))
ax[1,0].spines[:].set_visible(False)
ax[1,0].set_xticks(np.arange(data2.shape[1]+1)-.5, minor=True)
ax[1,0].set_yticks(np.arange(data2.shape[0]+1)-.5, minor=True)
ax[1,0].set_xticklabels(targets, fontsize=26)
ax[1,0].set_yticklabels(ml, fontsize=26)
ax[1,0].grid(which="minor", color="w", linestyle='-', linewidth=1.5)
ax[1,0].tick_params(which="minor", bottom=False, left=False, labelsize=26)
ax[1,0].set_xlabel("Case", size="26")

threshold_2 = im2.norm(0)
texts_2 = []
textcolors=("black","dimgrey",)
kw_2 = dict(horizontalalignment="center",
            verticalalignment="center")
valfmt = matplotlib.ticker.StrMethodFormatter("{x:.2f}")

for i in range(data2.shape[0]):
    for j in range(data2.shape[1]):
            text = im2.axes.text(j, i, valfmt(data2[i, j], None), kw_2,fontsize=27)
            texts_2.append(text)


#1NN
ax[0,1].grid(True)
sns.regplot(x=AVE, y=KNN, ax=ax[0,1],color="red", scatter_kws={"color":"blue", "s":108}, ci=None, line_kws={"lw":3, "color":"red"})
ax[0,1].set_ylabel("MCC",size=26, )
ax[0,1].set_xticks([0.08, 0.09, 0.10, 0.11, 0.12, 0.13])
ax[0,1].set_ylim(0.00, 0.12)
ax[0,1].set_title("1-NN " + "("+ r"$\rho$" + f' ={df_ave_corr_syn["KNN"]:.2f}' + ")", fontsize=26)  
plt.setp(ax[0,1].get_xticklabels(), rotation=30)
#LR
ax[0,2].grid(True)
sns.regplot(x=AVE, y=LR, ax=ax[0,2], color="red", scatter_kws={"color":"blue", "s":108}, ci=None, line_kws={"lw":3, "color":"red"})
ax[0,2].set_xticks([0.08, 0.09, 0.10, 0.11, 0.12, 0.13])
ax[0,2].set_ylim(0.2, 0.7)
ax[0,2].set_title("LR " + "("+ r"$\rho$" + f' ={df_ave_corr_syn["LR"]:.2f}' + ")", fontsize=26)
plt.setp(ax[0,2].get_xticklabels(), rotation=30)
#RF
ax[1,1].grid(True)
sns.regplot(x=AVE, y=RF, ax=ax[1,1], color="red", scatter_kws={"color":"blue", "s":108}, ci=None, line_kws={"lw":3, "color":"red"})
ax[1,1].set_xlabel("AVE bias",size=26, )
ax[1,1].set_ylabel("MCC",size=26, )
ax[1,1].set_xticks([0.08, 0.09, 0.10, 0.11, 0.12, 0.13])
ax[1,1].set_ylim(-0.05, 0.35)
ax[1,1].set_title("RF " + "("+ r"$\rho$" + f' ={df_ave_corr_syn["RF"]:.2f}' + ")", fontsize=26)
plt.setp(ax[1,1].get_xticklabels(), rotation=30)
#SVM
ax[1,2].grid(True)
sns.regplot(x=AVE, y=SVM, ax=ax[1,2],color="red", scatter_kws={"color":"blue", "s":108}, ci=None, line_kws={"lw":3, "color":"red"})
ax[1,2].set_xlabel("AVE bias",size=26, )
ax[1,2].set_xticks([0.08, 0.09, 0.10, 0.11, 0.12, 0.13])
ax[1,2].set_ylim(0.20, 0.80)
ax[1,2].set_title("SVM " + "("+ r"$\rho$" + f' ={df_ave_corr_syn["SVM"]:.2f}' + ")", fontsize=26)
plt.setp(ax[1,2].get_xticklabels(), rotation=30)

ax[0,1].tick_params(labelsize=26)
ax[0,2].tick_params(labelsize=26)
ax[1,1].tick_params(labelsize=26)
ax[1,2].tick_params(labelsize=26)
fig.tight_layout()
fig.savefig("ML_MUBDsyn.pdf", transparent=True,  bbox_inches='tight')

In [None]:
############SI############
fig, ax = plt.subplots(2,4, figsize=(28, 14))
#1NN
ax[0,0].grid(True)
sns.scatterplot(x=AAAD, y=KNN, ax=ax[0,0], color="maroon", s=108)
ax[0,0].set_ylabel("MCC",size=26,)
ax[0,0].set_xticks([-0.11, -0.08, -0.05, -0.02])
ax[0,0].set_ylim(0.00, 0.12)
ax[0,0].set_title("1-NN " + "("+ r"$\rho$" + f' ={df_aaad_corr_syn["KNN"]:.2f}' + ")", fontsize=26)
plt.setp(ax[0,0].get_xticklabels(), rotation=30)
#LR
ax[0,1].grid(True)
sns.scatterplot(x=AAAD, y=LR, ax=ax[0,1], color="maroon", s=108)
ax[0,1].scatter(x = AAAD, y = LR, color="maroon", s=96,alpha=0.7)
ax[0,1].set_xticks([-0.11, -0.08, -0.05, -0.02])
ax[0,1].set_ylim(0.2, 0.7)
ax[0,1].set_title("LR " + "("+ r"$\rho$" + f' ={df_aaad_corr_syn["LR"]:.2f}' + ")", fontsize=26)
plt.setp(ax[0,1].get_xticklabels(), rotation=30)
#RF
ax[1,0].grid(True)
sns.scatterplot(x=AAAD, y=RF, ax=ax[1,0], color="maroon", s=108)
ax[1,0].set_xlabel("AA-AD",size=26,)
ax[1,0].set_ylabel("MCC",size=26,)
ax[1,0].set_xticks([-0.11, -0.08, -0.05, -0.02])
ax[1,0].set_ylim(-0.05, 0.35)
ax[1,0].xaxis.set_label_coords(0.5, -0.2)
ax[1,0].set_title("RF " + "("+ r"$\rho$" + f' ={df_aaad_corr_syn["RF"]:.2f}' + ")", fontsize=26)
plt.setp(ax[1,0].get_xticklabels(), rotation=30)
#SVM
ax[1,1].grid(True)
sns.scatterplot(x=AAAD, y=SVM, ax=ax[1,1], color="maroon", s=108)
ax[1,1].set_xlabel("AA-AD",size=26,)
ax[1,1].set_xticks([-0.11, -0.08, -0.05, -0.02])
ax[1,1].set_ylim(0.20, 0.80)
ax[1,1].xaxis.set_label_coords(0.5, -0.2)
ax[1,1].set_title("SVM " + "("+ r"$\rho$" + f' ={df_aaad_corr_syn["SVM"]:.2f}' + ")", fontsize=26)
plt.setp(ax[1,1].get_xticklabels(), rotation=30)


#1NN
ax[0,2].grid(True)
sns.scatterplot(x=DDDA, y=KNN, ax=ax[0,2], color="teal", s=108)
ax[0,2].set_xticks([0.15, 0.18, 0.21])
ax[0,2].set_ylim(0.00, 0.12)
ax[0,2].set_title("1-NN " + "("+ r"$\rho$" + f' ={df_ddda_corr_syn["KNN"]:.2f}' + ")", fontsize=26)
plt.setp(ax[0,2].get_xticklabels(), rotation=30)
#LR
sns.scatterplot(x=DDDA, y=LR, ax=ax[0,3], color="teal", s=108)
ax[0,3].set_xticks([0.15, 0.18, 0.21])
ax[0,3].set_ylim(0.2, 0.7)
ax[0,3].set_title("LR " + "("+ r"$\rho$" + f' ={df_ddda_corr_syn["LR"]:.2f}' + ")", fontsize=26)
ax[0,3].grid(True)
plt.setp(ax[0,3].get_xticklabels(), rotation=30)
#RF
sns.scatterplot(x=DDDA, y=RF, ax=ax[1,2], color="teal", s=108)
ax[1,2].set_xlabel("DD-DA",size=26,)
ax[1,2].set_xticks([0.15, 0.18, 0.21])
ax[1,2].set_ylim(-0.05, 0.35)
ax[1,2].set_title("RF " + "("+ r"$\rho$" + f' ={df_ddda_corr_syn["RF"]:.2f}' + ")", fontsize=26)
ax[1,2].grid(True)
plt.setp(ax[1,2].get_xticklabels(), rotation=30)
ax[1,2].xaxis.set_label_coords(0.5, -0.2)
#SVM
ax[1,3].grid(True)
sns.scatterplot(x=DDDA, y=SVM, ax=ax[1,3], color="teal", s=108)
ax[1,3].set_xlabel("DD-DA",size=26,)
ax[1,3].set_xticks([0.15, 0.18, 0.21])
ax[1,3].set_ylim(0.20, 0.80)
ax[1,3].set_title("SVM " + "("+ r"$\rho$" + f' ={df_ddda_corr_syn["SVM"]:.2f}' + ")", fontsize=26)
plt.setp(ax[1,3].get_xticklabels(), rotation=30)
ax[1,3].xaxis.set_label_coords(0.5, -0.2)


ax[0,0].tick_params(labelsize=26)
ax[0,1].tick_params(labelsize=26)
ax[0,2].tick_params(labelsize=26)
ax[0,3].tick_params(labelsize=26)
ax[1,0].tick_params(labelsize=26)
ax[1,1].tick_params(labelsize=26)
ax[1,2].tick_params(labelsize=26)
ax[1,3].tick_params(labelsize=26)

fig.tight_layout()
fig.savefig("ML_MUBDsyn_SI.pdf", transparent=True, bbox_inches='tight')