In [None]:
import pandas as pd
import plotly.express as px

In [None]:
# data copy pasted from paper

knn_alignment_scores = {
    "Cyt": {
        "ProstT5": 0.32,
        "ESM2": 0.34,
        "Ankh": 0.36,
        "ProtT5": 0.38
    },
    "End": {
        "ProstT5": 0.23,
        "ESM2": 0.18,
        "Ankh": 0.23,
        "ProtT5": 0.26
    },
    "Ext": {
        "ProstT5": 0.59,
        "ESM2": 0.64,
        "Ankh": 0.68,
        "ProtT5": 0.77
    },
    "Gol": {
        "ProstT5": 0.13,
        "ESM2": 0.12,
        "Ankh": 0.14,
        "ProtT5": 0.19
    },
    "Lys": {
        "ProstT5": 0.09,
        "ESM2": 0.07,
        "Ankh": 0.08,
        "ProtT5": 0.11
    },
    "Mem": {
        "ProstT5": 0.44,
        "ESM2": 0.38,
        "Ankh": 0.48,
        "ProtT5": 0.54
    },
    "Mit": {
        "ProstT5": 0.28,
        "ESM2": 0.32,
        "Ankh": 0.30,
        "ProtT5": 0.46
    },
    "Nuc": {
        "ProstT5": 0.60,
        "ESM2": 0.63,
        "Ankh": 0.65,
        "ProtT5": 0.66
    },
    "Per": {
        "ProstT5": 0.07,
        "ESM2": 0.06,
        "Ankh": 0.09,
        "ProtT5": 0.09
    },
    "Pla": {
        "ProstT5": 0.27,
        "ESM2": 0.49,
        "Ankh": 0.31,
        "ProtT5": 0.70
    },
}

In [None]:
class_sizes = {
    "Cyt": 2180,
    "End": 689,
    "Ext": 1580,
    "Gol": 286,
    "Lys": 257,
    "Mem": 1067,
    "Mit": 1208,
    "Nuc": 3235,
    "Per": 124,
    "Pla": 605
}

In [None]:
ml_accuracy = {
        "Mem": 0.81,
        "Cyt": 0.81,
        "End": 0.70,
        "Gol": 0.33,
        "Lys": 0.12,
        "Mit": 0.88,
        "Nuc": 0.90,
        "Per": 0.17,
        "Pla": 0.92,
        "Ext": 0.95,
    }

In [None]:
# create a dataframe from knn_alignment_scores

df = pd.DataFrame.from_dict(knn_alignment_scores, orient="index")

# add class sizes to the dataframe

df["class_size"] = df.index.map(class_sizes)

# add ml_accuracy to the dataframe

df["ml_accuracy"] = df.index.map(ml_accuracy)

df

In [None]:
# plot the data

fig = px.scatter(df, x="class_size", y="ml_accuracy", size="class_size", color=df.index, text=df.index)

fig.update_traces(textposition='top center')

fig.update_layout(
    title="Class size vs. ML accuracy",
    xaxis_title="Class size",
    yaxis_title="ML accuracy",
    legend_title="Cell compartment",
    template="plotly_white",
    font=dict(
        family="Arial",
        color="black"
    ),
    width=500,
    height=400
)

fig.show()

In [None]:
# plot the data

fig = px.scatter(df, x="class_size", y="ProtT5", size="class_size", color=df.index, text=df.index)

fig.update_traces(textposition='top center')

fig.update_layout(
    title="Class size vs. ProtT5",
    xaxis_title="Class size",
    yaxis_title="ProtT5",
    legend_title="Cell compartment",
    template="plotly_white",
    font=dict(
        family="Arial",
        color="black"
    ),
    width=500,
    height=400
)
fig.show()

In [None]:
# plot the data

fig = px.scatter(df, x="class_size", y="ProstT5", size="class_size", color=df.index, text=df.index)

fig.update_traces(textposition='top center')

fig.update_layout(
    title="Class size vs. ProstT5",
    xaxis_title="Class size",
    yaxis_title="ProstT5",
    legend_title="Cell compartment",
    template="plotly_white",
    font=dict(
        family="Arial",
        color="black"
    ),
    width=500,
    height=400
)
fig.show()

In [None]:
# plot the data

fig = px.scatter(df, x="ProtT5", y="ml_accuracy", size="class_size", color=df.index, text=df.index)

fig.update_traces(textposition='top center')

fig.update_layout(
    title="ProtT5 vs. ML accuracy",
    xaxis_title="ProtT5",
    yaxis_title="ML accuracy",
    legend_title="Cell compartment",
    template="plotly_white",
    font=dict(
        family="Arial",
        color="black"
    ),
    width=500,
    height=400
)

fig.show()

In [None]:
# plot the data

fig = px.scatter(df, x="ProstT5", y="ml_accuracy", size="class_size", color=df.index, text=df.index)

fig.update_traces(textposition='top center')

fig.update_layout(
    title="ProstT5 vs. ML accuracy",
    xaxis_title="ProstT5",
    yaxis_title="ML accuracy",
    legend_title="Cell compartment",
    template="plotly_white",
    font=dict(
        family="Arial",
        color="black"
    ),
    width=500,
    height=400
)

fig.show()