In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
fp_s1 = "sup_table_s1.xlsx"

In [None]:
table_s1 = pd.read_excel(fp_s1)
# rename second column to "Task"
table_s1.rename(columns={table_s1.columns[1]: "Task"}, inplace=True)
# rename Fl to FI
table_s1.rename(columns={"Fl": "FI"}, inplace=True)
table_s1

In [None]:
# convert each of the columns Precision, Recall, FI, and MCC to mean and std separate columns
def split_mean_std(df, col):
    df[[f"{col}_mean", f"{col}_std"]] = df[col].str.split("Â±", expand=True)
    df[f"{col}_mean"] = pd.to_numeric(df[f"{col}_mean"])
    df[f"{col}_std"] = pd.to_numeric(df[f"{col}_std"])
    return df

table_s1 = split_mean_std(table_s1, "Precision")
table_s1 = split_mean_std(table_s1, "Recall")
table_s1 = split_mean_std(table_s1, "FI")
table_s1 = split_mean_std(table_s1, "MCC")

# drop the original columns
table_s1.drop(columns=["Precision", "Recall", "FI", "MCC"], inplace=True)

table_s1

Validation (DevSet1014), test (TestSet300), and a new
independent test set of proteins added to BioLiP after November 2019 and non-
redundant in itself and to the other two sets (TestSetNew46)

### Performance on different test sets

In [None]:
# plot the performance for Precision for Set and task and add error bars for std

fig = px.line(
    table_s1,
    x="Task",
    y="Precision_mean",
    error_y="Precision_std",
    color="Set",
    markers=True,
    title="Precision for Set and task",
)
fig.update_traces(marker=dict(size=10))
fig.update_layout(
    template="plotly_white",
    font={"family": "Inter", "color": "black"})
fig.show()

In [None]:
# plot the performance for Precision for Set and task and add error bars for std

fig = px.line(
    table_s1,
    x="Task",
    y="Recall_mean",
    error_y="Recall_std",
    color="Set",
    markers=True,
    title="Recall for Set and task",
)
fig.update_traces(marker=dict(size=10))
fig.update_layout(
    template="plotly_white",
    font={"family": "Inter", "color": "black"})
fig.show()

In [None]:
# plot the performance for Precision for Set and task and add error bars for std

fig = px.line(
    table_s1,
    x="Task",
    y="FI_mean",
    error_y="FI_std",
    color="Set",
    markers=True,
    title="FI for Set and task",
)
fig.update_traces(marker=dict(size=10))
fig.update_layout(
    template="plotly_white",
    font={"family": "Inter", "color": "black"})
fig.show()

In [None]:
# plot the performance for Precision for Set and task and add error bars for std

fig = px.line(
    table_s1,
    x="Task",
    y="MCC_mean",
    error_y="MCC_std",
    color="Set",
    markers=True,
    title="MCC for Set and task",
)
fig.update_traces(marker=dict(size=10))
fig.update_layout(
    template="plotly_white",
    font={"family": "Inter", "color": "black"})
fig.show()

### Performance for TestSet300

In [None]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Define metrics and colors
colors = ["#9B1B30", "#E6CBA8", "#2A4D2D"]
metrics = ["Precision_mean", "Recall_mean", "FI_mean"]

# Add traces for Precision, Recall, and FI on the primary y-axis
for metric, color in zip(metrics, colors):
    df_filtered = table_s1[table_s1["Set"] == "TestSet300"]  # Ensure only relevant data is used
    fig.add_trace(
        go.Scatter(
            x=df_filtered["Task"],
            y=df_filtered[metric],
            mode="lines+markers",
            name=metric.split("_")[0],  # Shorten legend names
            marker=dict(size=10),
            line=dict(color=color),
        ),
        secondary_y=False,
    )

# Add MCC on the secondary y-axis
df_mcc = table_s1[table_s1["Set"] == "TestSet300"]  # Filter for consistency
fig.add_trace(
    go.Scatter(
        x=df_mcc["Task"],
        y=df_mcc["MCC_mean"],
        mode="lines+markers",
        name="MCC",
        marker=dict(size=10),
        line=dict(color="#3A5E9D"),
    ),
    secondary_y=True,
)

# Update layout
fig.update_layout(
    title="Performance for TestSet300",
    template="plotly_white",
    font={"family": "Inter", "color": "black"},
    legend_title_text="Metrics",
)

# Update y-axes
fig.update_yaxes(title_text="Performance", secondary_y=False)
fig.update_yaxes(title_text="MCC", secondary_y=True, range=[0, 1], showgrid=False)

# Show the plot
fig.show()

In [None]:
metrics = ["Precision", "Recall", "FI"]
colors = ["#9B1B30", "#E6CBA8", "#2A4D2D"]

# Add traces for Precision, Recall, and FI (with error bars) on the primary y-axis
for metric, color in zip(metrics, colors):
    df_filtered = table_s1[table_s1["Set"] == "TestSet300"]
    fig.add_trace(
        go.Scatter(
            x=df_filtered["Task"],
            y=df_filtered[f"{metric}_mean"],
            mode="lines+markers",
            name=metric,
            marker=dict(size=10),
            line=dict(color=color),
            error_y=dict(
                type="data",
                array=df_filtered[f"{metric}_std"],  # Standard deviation column
                color="grey",
                thickness=1,  # Adjust thickness if needed
            ),
        ),
        secondary_y=False,
    )

# Add MCC with error bars on the secondary y-axis
df_mcc = table_s1[table_s1["Set"] == "TestSet300"]
fig.add_trace(
    go.Scatter(
        x=df_mcc["Task"],
        y=df_mcc["MCC_mean"],
        mode="lines+markers",
        name="MCC",
        marker=dict(size=10),
        line=dict(color="#3A5E9D"),
        error_y=dict(
            type="data",
            array=df_mcc["MCC_std"],  # Standard deviation for MCC
            color="grey",
            thickness=1,
        ),
    ),
    secondary_y=True,
)

# Update layout
fig.update_layout(
    title="Performance for TestSet300",
    template="plotly_white",
    font={"family": "Inter", "color": "black"},
    legend_title_text="Metrics",
)

# Update y-axes
fig.update_yaxes(title_text="Performance", secondary_y=False)
fig.update_yaxes(title_text="MCC", secondary_y=True, range=[0, 1], showgrid=False)

# Show the plot
fig.show()