In [None]:
# Load judge_results.csv into a pandas DataFrame and display the first 5 rows
import pandas as pd

df = pd.read_csv('out/judge_results.csv')
df.head()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

agree_range = [0.5, 1.0]
uncertain_range = [-0.5, 0.5]
disagree_range = [-1.0, -0.5]

# FT-style theme
sns.set_theme(
    style="whitegrid",
    rc={
        "axes.facecolor": "#f5f5f5",   # light beige background
        "grid.color": "#d6d6d6",
        "axes.edgecolor": "#d6d6d6",
        "axes.labelcolor": "#333333",
        "xtick.color": "#333333",
        "ytick.color": "#333333",
        "font.family": "sans-serif",
        "font.sans-serif": ["Helvetica", "Arial", "DejaVu Sans"],
    }
)

# FT-style color palette
ft_palette = ["#0072ce", "#f58220", "#6c6f70", "#c4d600", "#9b0056"]
sns.set_palette(ft_palette)

In [None]:
import plotly.express as px
import pandas as pd

# Deduplicate by fact
df_unique = df.drop_duplicates(subset=["fact"])

# Count unique facts per type and name
counts = (
    df_unique.groupby(["type", "name"])
    .size()
    .reset_index(name="count")
    .sort_values("count", ascending=False)
)

# Interactive bar chart
fig = px.bar(
    counts,
    x="name",
    y="count",
    color="type",
    barmode="group",
    title="Unique Fact Counts by Entity Name and Type",
    labels={
        "name": "Entity Name",
        "count": "Unique Fact Count",
        "type": "Entity Type"
    },
)

# Styling
fig.update_layout(
    width=1000,
    height=600,
    template="plotly_white",
    xaxis_title="Entity Name",
    yaxis_title="Unique Fact Count",
    legend_title_text="Entity Type",
    margin=dict(l=60, r=40, t=80, b=150),
)
fig.update_xaxes(tickangle=45, tickfont=dict(size=9))

fig.show()



In [None]:
import plotly.graph_objects as go

fig = go.Figure()
fig.update_layout(width=1000, height=600)

for t in df["type"].unique():
    x = df.loc[df["type"] == t, "score"]
    kde = sns.kdeplot(x, bw_adjust=0.8).get_lines()[0]
    xs, ys = kde.get_data()
    plt.close()  # close seaborn figure
    fig.add_trace(go.Scatter(x=xs, y=ys, mode="lines", name=t, fill="tozeroy"))

fig.update_layout(
    title="KDE of Scores by Type",
    xaxis_title="Score",
    yaxis_title="Density",
    template="plotly_white"
)
fig.show()



In [None]:

df_nodes = df[df["type"] == "node"].copy()
fig = go.Figure()
fig.update_layout(width=1000, height=600)

for name in df_nodes["name"].unique():
    x = df_nodes.loc[df_nodes["name"] == name, "score"]
    kde = sns.kdeplot(x, bw_adjust=0.8)
    xs, ys = kde.get_lines()[0].get_data()
    plt.close()  # close seaborn figure
    fig.add_trace(go.Scatter(x=xs, y=ys, mode="lines", name=name, fill="tozeroy"))

fig.update_layout(
    title="KDE of Scores per Node Name",
    xaxis_title="Score",
    yaxis_title="Density",
    template="plotly_white",
    legend_title_text="Name"
)
fig.show()


In [None]:

df_nodes = df[df["type"] == "relation"].copy()
fig = go.Figure()
fig.update_layout(width=1000, height=600)

for name in df_nodes["name"].unique():
    x = df_nodes.loc[df_nodes["name"] == name, "score"]
    kde = sns.kdeplot(x, bw_adjust=0.8)
    xs, ys = kde.get_lines()[0].get_data()
    plt.close()  # close seaborn figure
    fig.add_trace(go.Scatter(x=xs, y=ys, mode="lines", name=name, fill="tozeroy"))

fig.update_layout(
    title="KDE of Scores per Relation Name",
    xaxis_title="Score",
    yaxis_title="Density",
    template="plotly_white",
    legend_title_text="Name"
)
fig.show()

In [None]:
fig = go.Figure()
fig.update_layout(width=1000, height=600)

# Violin traces for each type
for t in df["type"].unique():
    fig.add_trace(go.Violin(
        x=df.loc[df["type"] == t, "type"],
        y=df.loc[df["type"] == t, "score"],
        name=t,
        box_visible=True,
        meanline_visible=True,
        points=False
    ))

# Add shaded horizontal bands
fig.add_shape(
    type="rect",
    xref="paper", x0=0, x1=1,
    y0=agree_range[0], y1=agree_range[1],
    fillcolor="green", opacity=0.1, layer="below", line_width=0
)
fig.add_shape(
    type="rect",
    xref="paper", x0=0, x1=1,
    y0=uncertain_range[0], y1=uncertain_range[1],
    fillcolor="orange", opacity=0.1, layer="below", line_width=0
)
fig.add_shape(
    type="rect",
    xref="paper", x0=0, x1=1,
    y0=disagree_range[0], y1=disagree_range[1],
    fillcolor="red", opacity=0.1, layer="below", line_width=0
)

# Add labels as annotations for clarity
fig.add_annotation(x=1.02, y=sum(agree_range)/2, text="Agree", showarrow=False, font_color="green")
fig.add_annotation(x=1.02, y=sum(uncertain_range)/2, text="Uncertain", showarrow=False, font_color="orange")
fig.add_annotation(x=1.02, y=sum(disagree_range)/2, text="Disagree", showarrow=False, font_color="red")

# Layout styling
fig.update_layout(
    width=1000,
    height=600,
    title="Score Variability by Type (with Uncertainty Bands)",
    xaxis_title="Entity Type",
    yaxis_title="Score",
    template="plotly_white",
    legend_title_text="Type"
)

fig.show()



In [None]:
# Filter for nodes
df_node = df[df["type"] == "node"].copy()

fig = go.Figure()
fig.update_layout(width=1000, height=600)

# Add a violin trace for each entity name
for name in df_node["name"].unique():
    fig.add_trace(go.Violin(
        x=[name] * len(df_node[df_node["name"] == name]),
        y=df_node.loc[df_node["name"] == name, "score"],
        name=name,
        box_visible=True,
        meanline_visible=True,
        points=False,
    ))

# Add shaded horizontal bands for the score zones
bands = [
    ("Agree", agree_range, "green"),
    ("Uncertain", uncertain_range, "orange"),
    ("Disagree", disagree_range, "red"),
]

for label, (y0, y1), color in bands:
    fig.add_shape(
        type="rect",
        xref="paper", x0=0, x1=1,
        y0=y0, y1=y1,
        fillcolor=color,
        opacity=0.1,
        layer="below",
        line_width=0
    )
    # Add text label to the right side
    fig.add_annotation(
        x=1.02, y=(y0 + y1) / 2,
        text=label,
        showarrow=False,
        font_color=color
    )

# Update layout styling
fig.update_layout(
    width=1000,
    height=600,
    title="Score Variability by Name (with Uncertainty Bands)",
    xaxis_title="Entity Name",
    yaxis_title="Score",
    template="plotly_white",
    legend_title_text="Entity Name",
    xaxis=dict(tickangle=45),
    margin=dict(l=60, r=100, t=80, b=150)
)

fig.show()


In [None]:
# Filter for nodes
df_node = df[df["type"] == "relation"].copy()

fig = go.Figure()
fig.update_layout(width=1000, height=600)

# Add a violin trace for each entity name
for name in df_node["name"].unique():
    fig.add_trace(go.Violin(
        x=[name] * len(df_node[df_node["name"] == name]),
        y=df_node.loc[df_node["name"] == name, "score"],
        name=name,
        box_visible=True,
        meanline_visible=True,
        points=False,
    ))

# Add shaded horizontal bands for the score zones
bands = [
    ("Agree", agree_range, "green"),
    ("Uncertain", uncertain_range, "orange"),
    ("Disagree", disagree_range, "red"),
]

for label, (y0, y1), color in bands:
    fig.add_shape(
        type="rect",
        xref="paper", x0=0, x1=1,
        y0=y0, y1=y1,
        fillcolor=color,
        opacity=0.1,
        layer="below",
        line_width=0
    )
    # Add text label to the right side
    fig.add_annotation(
        x=1.02, y=(y0 + y1) / 2,
        text=label,
        showarrow=False,
        font_color=color
    )

# Update layout styling
fig.update_layout(
    width=1000,
    height=600,
    title="Score Variability by Name (with Uncertainty Bands)",
    xaxis_title="Entity Name",
    yaxis_title="Score",
    template="plotly_white",
    legend_title_text="Entity Name",
    xaxis=dict(tickangle=45),
    margin=dict(l=60, r=100, t=80, b=150)
)

fig.show()


In [None]:
import numpy as np
from plotly import express as px

# Define thresholds
thresholds = np.linspace(0.0, 1.0, 100)

# Compute acceptance ratio per type and threshold
records = []
for t in thresholds:
    for name, grp in df.groupby("type"):
        ratio = (grp["score"] >= t).mean()
        records.append({"threshold": t, "type": name, "acceptance_ratio": ratio})

curves = pd.DataFrame(records)

# Interactive Plotly line plot
fig = px.line(
    curves,
    x="threshold",
    y="acceptance_ratio",
    color="type",
    line_group="type",
    title="Acceptance Curves by Entity Type",
    labels={
        "threshold": "Score Threshold",
        "acceptance_ratio": "Acceptance Ratio",
        "type": "Entity Type"
    }
)

# Style adjustments
fig.update_layout(
    width=1000,
    height=600,
    template="plotly_white",
    yaxis=dict(range=[0, 1], title="Acceptance Ratio"),
    xaxis=dict(title="Score Threshold"),
    legend_title_text="Entity Type",
    margin=dict(l=60, r=60, t=80, b=60),
)
fig.update_traces(line=dict(width=2))
fig.show()



In [None]:
# Define thresholds
thresholds = np.linspace(0.0, 1.0, 100)

# Filter to nodes
df_node = df[df["type"] == "node"].copy()

# Compute acceptance ratio per name and threshold
records = []
for t in thresholds:
    for name, grp in df_node.groupby("name"):
        ratio = (grp["score"] >= t).mean()
        records.append({"threshold": t, "name": name, "acceptance_ratio": ratio})

curves = pd.DataFrame(records)

# Interactive line plot
fig = px.line(
    curves,
    x="threshold",
    y="acceptance_ratio",
    color="name",
    line_group="name",
    title="Acceptance Curves by Entity Name",
    labels={
        "threshold": "Score Threshold",
        "acceptance_ratio": "Acceptance Ratio",
        "name": "Entity Name"
    }
)

# Style adjustments
fig.update_layout(
    width=1000,
    height=600,
    template="plotly_white",
    yaxis=dict(range=[0, 1], title="Acceptance Ratio"),
    xaxis=dict(title="Score Threshold"),
    legend_title_text="Entity Name",
    margin=dict(l=60, r=60, t=80, b=100),
)
fig.update_traces(line=dict(width=2))

fig.show()


In [None]:
# Define thresholds
thresholds = np.linspace(0.0, 1.0, 100)

# Filter to nodes
df_node = df[df["type"] == "relation"].copy()

# Compute acceptance ratio per name and threshold
records = []
for t in thresholds:
    for name, grp in df_node.groupby("name"):
        ratio = (grp["score"] >= t).mean()
        records.append({"threshold": t, "name": name, "acceptance_ratio": ratio})

curves = pd.DataFrame(records)

# Interactive line plot
fig = px.line(
    curves,
    x="threshold",
    y="acceptance_ratio",
    color="name",
    line_group="name",
    title="Acceptance Curves by Entity Name",
    labels={
        "threshold": "Score Threshold",
        "acceptance_ratio": "Acceptance Ratio",
        "name": "Entity Name"
    }
)

# Style adjustments
fig.update_layout(
    width=1000,
    height=600,
    template="plotly_white",
    yaxis=dict(range=[0, 1], title="Acceptance Ratio"),
    xaxis=dict(title="Score Threshold"),
    legend_title_text="Entity Name",
    margin=dict(l=60, r=60, t=80, b=100),
)
fig.update_traces(line=dict(width=2))

fig.show()


In [None]:
from itertools import combinations

# Build co-occurrence counts by evidence
pairs = []
for ev, grp in df.groupby("evidence"):
    names = sorted(set(grp["name"].dropna()))
    for a, b in combinations(names, 2):
        pairs.append((a, b))

co = pd.DataFrame(pairs, columns=["a", "b"])

if co.empty:
    print("No co-occurrences found.")
else:
    # Symmetric counts
    counts = (
        pd.concat([co, co.rename(columns={"a": "b", "b": "a"})])  # mirror
        .value_counts(["a", "b"])
        .rename("w")
        .reset_index()
    )

    # Top-N nodes by total co-occurrence weight
    top_nodes = (
        counts.groupby("a")["w"].sum()
        .add(counts.groupby("b")["w"].sum(), fill_value=0)
        .sort_values(ascending=False)
        .head(30)  # adjust N as needed
        .index
    )

    sub = counts[counts["a"].isin(top_nodes) & counts["b"].isin(top_nodes)]

    # Pivot to matrix and reindex to consistent order
    mat = sub.pivot_table(index="a", columns="b", values="w", fill_value=0)
    order = sorted(top_nodes)
    mat = mat.reindex(index=order, columns=order, fill_value=0)

    # Plotly heatmap
    fig = px.imshow(
        mat.values,
        x=mat.columns,
        y=mat.index,
        color_continuous_scale="Blues",
        labels=dict(color="Co-occurrences"),
        aspect="auto",
        title="Entity Co-occurrence Heatmap (by Evidence)",
    )

    # Interactive hover
    fig.data[0].update(
        hovertemplate="Pair: %{y} × %{x}<br>Co-occurrences: %{z}<extra></extra>"
    )

    # Layout polish (size, axes, margins)
    fig.update_layout(
        width=1000, height=650, template="plotly_white",
        xaxis_title="Name", yaxis_title="Name",
        margin=dict(l=80, r=40, t=80, b=120),
    )
    fig.update_xaxes(tickangle=45)
    fig.update_yaxes(autorange="reversed")  # optional: align with seaborn’s top-left origin

    fig.show()



In [None]:
# Copy and filter nodes
dfn = df[df['type'] == 'node'].copy()

# Define score bands (6 intervals -> 6 labels)
bins = [-1.0, -0.5, -0.25, 0.0, 0.25, 0.5, 1.0]
labels = ["Very Low", "Low", "Uncertain (−)", "Uncertain (+)", "Moderate High", "Very High"]
dfn["score_band"] = pd.cut(dfn["score"], bins=bins, labels=labels, include_lowest=True)

# Order categories by mean score
order = dfn.groupby("name")["score"].mean().sort_values().index.tolist()

# Strip plot, coloring by score band
fig = px.strip(
    dfn,
    x="score",
    y="name",
    color="score_band",
    category_orders={"name": order, "score_band": labels},  # lock legend order
    hover_data={
        "name": True, "type": False,
        "score": ':.3f', "accepted": True,
        "fact": True, "evidence": False
    },
    stripmode="overlay",
    color_discrete_map={
        "Very Low": "#d73027",       # deep red
        "Low": "#fc8d59",            # orange
        "Uncertain (−)": "#fee08b",  # light yellow
        "Uncertain (+)": "#d9ef8b",  # light yellow-green
        "Moderate High": "#66bd63",  # green
        "Very High": "#1a9850"       # deep green
    }
)

# Reference lines
fig.add_vline(x=0.0, line_dash="dash", line_color="black")
fig.add_vline(x=0.5, line_dash="dot", line_color="gray")
fig.add_vline(x=-0.5, line_dash="dot", line_color="gray")

# Styling
fig.update_traces(marker_size=6, opacity=0.75)
fig.update_layout(
    title="Best and Worst Scored Node Categories (Colored by Score Band)",
    xaxis_title="Score (−1 → +1)",
    yaxis_title="Category (name)",
    xaxis=dict(range=[-1, 1], zeroline=False),
    height=650,
    legend_title_text="Score Band"
)

fig.show()

In [None]:
# Copy and filter nodes
dfn = df[df['type'] == 'relation'].copy()

# Define score bands (6 intervals -> 6 labels)
bins = [-1.0, -0.5, -0.25, 0.0, 0.25, 0.5, 1.0]
labels = ["Very Low", "Low", "Uncertain (−)", "Uncertain (+)", "Moderate High", "Very High"]
dfn["score_band"] = pd.cut(dfn["score"], bins=bins, labels=labels, include_lowest=True)

# Order categories by mean score
order = dfn.groupby("name")["score"].mean().sort_values().index.tolist()

# Strip plot, coloring by score band
fig = px.strip(
    dfn,
    x="score",
    y="name",
    color="score_band",
    category_orders={"name": order, "score_band": labels},  # lock legend order
    hover_data={
        "name": True, "type": False,
        "score": ':.3f', "accepted": True,
        "fact": True, "evidence": False
    },
    stripmode="overlay",
    color_discrete_map={
        "Very Low": "#d73027",       # deep red
        "Low": "#fc8d59",            # orange
        "Uncertain (−)": "#fee08b",  # light yellow
        "Uncertain (+)": "#d9ef8b",  # light yellow-green
        "Moderate High": "#66bd63",  # green
        "Very High": "#1a9850"       # deep green
    }
)

# Reference lines
fig.add_vline(x=0.0, line_dash="dash", line_color="black")
fig.add_vline(x=0.5, line_dash="dot", line_color="gray")
fig.add_vline(x=-0.5, line_dash="dot", line_color="gray")

# Styling
fig.update_traces(marker_size=6, opacity=0.75)
fig.update_layout(
    title="Best and Worst Scored Relation Categories (Colored by Score Band)",
    xaxis_title="Score (−1 → +1)",
    yaxis_title="Category (name)",
    xaxis=dict(range=[-1, 1], zeroline=False),
    height=650,
    legend_title_text="Score Band"
)

fig.show()