In [10]:
from semnet import SemanticNetwork
from sentence_transformers import SentenceTransformer
import networkx as nx

# Your documents
docs = [
    "The cat sat on the mat",
    "A cat was sitting on a mat",
    "The dog ran in the park",
    "I love Python",
    "Python is a great programming language",
]

# Generate embeddings (use any embedding provider)
embedding_model = SentenceTransformer("BAAI/bge-base-en-v1.5")
embeddings = embedding_model.encode(docs)

# Create and configure semantic network
sem = SemanticNetwork(thresh=0.3, verbose=True)  # Larger values give sparser networks

# Build the semantic graph from your embeddings
G = sem.fit_transform(embeddings, labels=docs)

# Analyze the graph
print(f"Nodes: {G.number_of_nodes()}")
print(f"Edges: {G.number_of_edges()}")
print(f"Connected components: {nx.number_connected_components(G)}")

# Find similar document groups
for component in nx.connected_components(G):
    if len(component) > 1:
        similar_docs = [G.nodes[i]["label"] for i in component]
        print(f"Similar documents: {similar_docs}")

# Calculate centrality measures,
# Degree centrality not that interesting in the example, but shown here for demonstration
centrality = nx.degree_centrality(G)
for node, cent_value in centrality.items():
    print(f"Document: {G.nodes[node]['label']}, Degree Centrality: {cent_value:.4f}")
    G.nodes[node]["degree_centrality"] = cent_value

# Export to pandas
nodes_df, edges_df = sem.to_pandas(G)

Adding embeddings to index:   0%|          | 0/5 [00:00<?, ?it/s]

Finding similarities:   0%|          | 0/5 [00:00<?, ?it/s]

Nodes: 5
Edges: 2
Connected components: 3
Similar documents: ['The cat sat on the mat', 'A cat was sitting on a mat']
Similar documents: ['I love Python', 'Python is a great programming language']
Document: The cat sat on the mat, Degree Centrality: 0.2500
Document: A cat was sitting on a mat, Degree Centrality: 0.2500
Document: The dog ran in the park, Degree Centrality: 0.0000
Document: I love Python, Degree Centrality: 0.2500
Document: Python is a great programming language, Degree Centrality: 0.2500


In [8]:
nodes_df

Unnamed: 0,label,id,betweenness_centrality
0,The cat sat on the mat,0,0.0
1,A cat was sitting on a mat,1,0.0
2,The dog ran in the park,2,0.0
3,Python is great for ML,3,0.0
4,Machine learning with Python,4,0.0
