## Comparison of models

In [1]:
import plotly.express as px
import pandas as pd

# Load your data
# Replace 'your_data.xlsx' with the path to your Excel file and 'Sheet1' with your sheet name
data = pd.read_excel(
    "~/Documents/NetworkMedicine/BioMedGPS/model-results.xlsx", sheet_name="Cleaned"
)

# Iterate over each metric and create a bar plot
for metric in data["Metrics"]:
    # Select the row corresponding to the metric
    row = data[data["Metrics"] == metric].iloc[0]

    # Prepare the data for plotting
    plot_data = pd.DataFrame(
        {
            "Experiment Group": row.index[
                1:
            ],  # Excludes the first index which is 'Metrics'
            "Value": row.values[
                1:
            ],  # Excludes the first value which is the metric name
        }
    )

    # Create the bar plot using Plotly
    fig = px.bar(plot_data, x="Experiment Group", y="Value", title=metric)

    # Show the plot
    fig.show()

In [2]:
import plotly.graph_objects as go

# Select the first 5 metrics
subset_data = data.head(5).set_index("Metrics")

# Remove the 'MR' row
subset_data = subset_data.drop("MR")

# Create a figure
fig = go.Figure()

# Add a bar to the figure for each experiment group column
for experiment_group in subset_data.columns:
    fig.add_trace(
        go.Bar(
            x=subset_data.index, y=subset_data[experiment_group], name=experiment_group
        )
    )

# Update the layout for a grouped bar chart
fig.update_layout(
    barmode="group",
    title={
        "text": "Comparison of the First 4 Metrics Across Different Experiment Groups.<br>Formatted means that the entity ids are mapped and aligned to ontology database we integrated and built.<br>DRKG = Formatted + Unformatted.",
        "y": 0.9,
        "x": 0.5,
        "xanchor": "center",
        "yanchor": "top",
    },
    xaxis=dict(title="Metrics"),
    yaxis=dict(title="Value"),
    legend_title_text="Experiment Group",
)

# Show the figure
fig.show()

In [4]:
import plotly.graph_objects as go

# Select the first 5 metrics
subset_data = data.set_index("Metrics")

# Remove the 'MR' row
subset_data = (
    subset_data.drop("MRR").drop("MR").drop("Hits@1").drop("Hits@5").drop("Hits@10")
)

# Transpose the data
subset_data = subset_data.T

# Create a figure
fig = go.Figure()

# Add a bar to the figure for each experiment group column
for metric in subset_data.columns:
    fig.add_trace(go.Bar(x=subset_data.index, y=subset_data[metric], name=metric))

# Update the layout for a grouped bar chart
fig.update_layout(
    barmode="group",
    title={
        "text": "Comparison of the First 4 Metrics Across Different Experiment Groups.<br>Formatted means that the entity ids are mapped and aligned to ontology database we integrated and built.<br>DRKG = Formatted + Unformatted.",
        "y": 0.9,
        "x": 0.5,
        "xanchor": "center",
        "yanchor": "top",
    },
    xaxis=dict(title="Experiment Group"),
    yaxis=dict(title="Value"),
    legend_title_text="Metrics",
)

# Show the figure
fig.show()

## [Unfinished] Similarity between embeddings generated by LLM and KGE models

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming df1 and df2 are your DataFrames and they each have a column named 'embedding'
# which contains the embeddings as lists or numpy arrays.

df1 = pd.read_csv("embeddings1.csv")
df1["embedding"] = df1["embedding"].apply(lambda x: [np.float32(i) for i in x.split("|")])

df2 = pd.read_csv("embeddings2.csv")
df2["embedding"] = df2["embedding"].apply(lambda x: [np.float32(i) for i in x.split("|")])

# Convert the embedding columns to a matrix of embeddings
embeddings_df1 = np.stack(df1["embedding"].values)
embeddings_df2 = np.stack(df2["embedding"].values)

# Compute the cosine similarity matrix
similarity_matrix = cosine_similarity(embeddings_df1, embeddings_df2)

# Create a DataFrame for the similarity matrix for better labeling in the heatmap
similarity_df = pd.DataFrame(similarity_matrix, index=df1["node"], columns=df2["node"])

# Plot the heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(similarity_df, annot=True, cmap="coolwarm")
plt.title("Heatmap of Cosine Similarity Between Nodes")
plt.show()