In [None]:
from sentence_transformers import SentenceTransformer
from semnet import SemanticNetwork
import networkx as nx
import matplotlib.pyplot as plt

docs = [
    "Tony Blair",
    "Anthony Blair",
    "Sir Tony Blair",
    "President Obama",
    "Barack Obama",
    "Donald J. Trump",
    "Donald Trump",
    "The Donald",
    "Joe Biden",
    "Joseph Biden",
    "Elon Musk",
]

embedding_model = SentenceTransformer("BAAI/bge-base-en-v1.5")
embeddings = embedding_model.encode(docs, show_progress_bar=True)

In [None]:
import textwrap

fig, ax = plt.subplots(2, 3, figsize=(24, 16))
fig.subplots_adjust(hspace=0.3, wspace=0.3)  # Add spacing between subplots

thresholds = [0, 0.1, 0.2, 0.3, 0.5, 0.7]

for thresh, ax in zip(thresholds, ax.flatten()):
    semnet = SemanticNetwork(thresh=thresh)
    G = semnet.fit_transform(embeddings, labels=docs)

    # Circular layout, only draw once
    if thresh == thresholds[0]:
        pos = nx.circular_layout(G, scale=0.7)  # Make circle smaller

    ax.set_title(f"Threshold: {thresh:.1f}", fontsize=24)

    labels = {}
    for i, doc in enumerate(docs):
        labels[i] = "\n".join(textwrap.wrap(doc, width=10))

    nx.draw(
        G,
        with_labels=True,
        labels=labels,
        ax=ax,
        pos=pos,
        node_size=0,
        font_size=16,
        bbox=dict(facecolor="white", edgecolor="black", boxstyle="round,pad=0.5"),
        width=[(edge["weight"] * 5) ** 2 for _, _, edge in G.edges(data=True)],
        edge_color="black",
    )
    ax.set_ylim(-1.05, 1.025)
    ax.set_xlim(-1.05, 1.025)

fig.tight_layout()
fig.suptitle(
    "Similarity threshold impact on document relationships",
    fontsize=34,
    fontweight="bold",
    y=1.05,
)