In [1]:
from stream_router import StreamRouter

# Import training updates and test prompts from the external file.
from ideation import TRAIN_UPDATES, TEST_PROMPTS

# Create a router with 5 clusters and 8 diverse agents.
agents = ["HR Agent", "Code Generation Agent","Web Search Agent", "Customer Service Agent", "Database Agent"]
# agents = ["Math Agent", "Coding Agent", "HR Agent", "General", "Deep Research", "Image Gen", "GPT 4o", "GPT o3-mini-high", "Chemistry RAG"]
router = StreamRouter(agents, alpha=-0.0, embedding_dim=8, learning_rate=0.1, min_samples=8, model_name = "Alibaba-NLP/gte-large-en-v1.5")

for prompt, agent in TRAIN_UPDATES:
    router.update(prompt, agent)
print(f"Total clusters formed after training update: {len(router.clusters)}")

# Debug: Print cluster details.
# router.debug_clusters()

# Execute test inferences using the imported test prompts.
print("\nTest Inference Results:")
for i, prompt in enumerate(TEST_PROMPTS, start=1):
    predicted_agent = router.inference(prompt)
    print(f"\nTest Prompt {i}: \"{prompt}\"")
    print(f"Predicted Agent: {predicted_agent}")


ModuleNotFoundError: No module named 'torch'

In [4]:
# Calculate average intra-cluster and cross-cluster distances using cosine similarity
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Get embeddings for all training prompts
all_embeddings = []
for prompt, _ in TRAIN_UPDATES:
    emb = router._compute_prompt_embedding(prompt)
    all_embeddings.append(emb.numpy())
all_embeddings = np.array(all_embeddings)

# Split into 5 clusters of 10 prompts each
clusters = []
for i in range(0, 50, 10):
    clusters.append(all_embeddings[i:i+10])

# Calculate average intra-cluster similarities
intra_cluster_sims = []
for cluster in clusters:
    # Get all pairwise similarities within cluster
    sims = cosine_similarity(cluster)
    # Get upper triangle only (excluding diagonal)
    upper_tri = sims[np.triu_indices_from(sims, k=1)]
    if len(upper_tri) > 0:
        intra_cluster_sims.append(np.mean(upper_tri))

avg_intra_sim = np.mean(intra_cluster_sims)
print(f"Average intra-cluster cosine similarity: {avg_intra_sim:.3f}")

# Calculate average cross-cluster similarities
cross_cluster_sims = []
for i in range(len(clusters)):
    for j in range(i+1, len(clusters)):
        sims = cosine_similarity(clusters[i], clusters[j])
        cross_cluster_sims.append(np.mean(sims))

avg_cross_sim = np.mean(cross_cluster_sims)
print(f"Average cross-cluster cosine similarity: {avg_cross_sim:.3f}")


Average intra-cluster cosine similarity: 0.270
Average cross-cluster cosine similarity: 0.174


In [5]:
from sklearn.cluster import KMeans
import numpy as np

# Get embeddings for all training prompts
train_embeddings = []
train_prompts = []
for prompt, _ in TRAIN_UPDATES:
    emb = router._compute_prompt_embedding(prompt)
    train_embeddings.append(emb.numpy())
    train_prompts.append(prompt)

# Convert to numpy array
train_embeddings = np.array(train_embeddings)

# Perform KMeans clustering
kmeans = KMeans(n_clusters=8, random_state=42)
clusters = kmeans.fit_predict(train_embeddings)

# Print prompts in each cluster
for i in range(8):
    print(f"\nCluster {i}:")
    cluster_prompts = [prompt for j, prompt in enumerate(train_prompts) if clusters[j] == i]
    for prompt in cluster_prompts:
        print(f"  - {prompt}")



Cluster 0:
  - Write a Dockerfile for a Node.js application with Express and PostgreSQL.
  - Create a Terraform script to provision an AWS EC2 instance and configure security groups.
  - Generate a Kubernetes deployment YAML file for a Flask web application.
  - Find documentation for the latest version of Kubernetes.

Cluster 1:
  - Create a SQL query to retrieve the top 10 highest-paying customers from our database.
  - Find the total number of employees working in the company and provide a breakdown by department.
  - List all employees who joined the company after January 1, 2023.
  - Get the email addresses of all team leads in the engineering department.
  - Show me the employee with the highest salary in the company.
  - Provide a list of employees along with their job titles and phone numbers.
  - Fetch all employees who report to [Manager Name].
  - Generate a list of employees whose work anniversary is this month.
  - Find employees who have been with the company for more th

In [2]:
len(router.clusters)

32