In [4]:
# Import required libraries
import requests
import numpy as np

# Configure the services
SCRIPT_SERVICE_URL = "http://localhost:8001"
EMBEDDING_INDEX_URL = "http://localhost:8000"

In [65]:
# Function to fetch all scripts
def fetch_all_scripts():
    """
    Fetch metadata for all scripts from the script service.
    """
    response = requests.get(f"{SCRIPT_SERVICE_URL}/scripts")
    response.raise_for_status()
    return response.json()

# Function to fetch a specific script by ID
def fetch_script_by_id(script_id):
    """
    Fetch the details of a script (including lines) by its ID.
    """
    response = requests.get(f"{SCRIPT_SERVICE_URL}/scripts/{script_id}")
    response.raise_for_status()
    return response.json()

def fetch_chunk_by_id(chunk_id):
    response = requests.get(f"{SCRIPT_SERVICE_URL}/chunks/{chunk_id}")
    response.raise_for_status()
    return response.json()

# Function to search for similar lines in the FAISS index
def search_similar_lines(query_embedding, top_k=5):
    """
    Query the embedding index service to perform a similarity search.
    """
    response = requests.post(
        f"{EMBEDDING_INDEX_URL}/search",
        json={
            "query_embedding": query_embedding,
            "top_k": top_k
        }
    )
    response.raise_for_status()
    return response.json()

In [89]:
scripts = fetch_all_scripts()
print(f"Available scripts: {len(scripts)}")

# Select a random script
import random
random_script = random.choice(scripts)
print(f"Selected script: {random_script}")

# Fetch full details for the selected script
script_details = fetch_script_by_id(random_script["id"])
print(f"Chunks: {len(script_details['chunks'])}")

# Pick a random line from the script
random_chunk = random.choice(script_details["chunks"])
print(f"Random random_chunk: {random_chunk['text']}")

# Mock embedding for the random line (replace with actual embeddings from your embedding service if available)
random_embedding = np.random.rand(768).tolist()  # Assuming 384 dimensions

# Perform similarity search
top_k = 5
search_results = search_similar_lines(random_embedding, top_k=top_k)
print(f"Top {top_k} similar lines:")
for i, result in enumerate(search_results["results"]):
    print(f"\n{i + 1}. ID: {result['id']}, Similarity: {result['distance']}")
    result_script = fetch_chunk_by_id(result["id"])
    print(result_script["text"])


Available scripts: 22
Selected script: {'id': 'zZuh8YUBeDY', 'name': 'Richard Sutton on Pursuing AGI Through Reinforcement Learning'}
Chunks: 1024
Random random_chunk: temporally symmetric in in the Alberta plan which means that there are no special phases where you like training
Top 5 similar lines:

1. ID: IGu7ivuy1Ag-23, Similarity: 249.147705078125
machine translation so when you want to translate text from one language to another so for example let's say you

2. ID: O_5e_WSNedE-949, Similarity: 249.16970825195312
project for example is to apply the same kind of innovation methodology to basic

3. ID: O_5e_WSNedE-2097, Similarity: 249.16970825195312
project for example is to apply the same kind of innovation methodology to basic

4. ID: sw8IE3MX1SY-652, Similarity: 249.27902221679688
algorithms we've been looking at which represent learning approaches that fit naturally with different

5. ID: sw8IE3MX1SY-1061, Similarity: 249.31964111328125
tree learning or something simple like su