In [None]:
%autoawait


IPython autoawait is `on`, and set to use `asyncio`


In [None]:
from astrapy import DataAPIClient
from astrapy.constants import VectorMetric
from astrapy.ids import UUID
from astrapy.info import CollectionDefinition
import json
import os
from typing import Any, Dict, Union, List
import traceback

from dotenv import load_dotenv


In [None]:
# Constants
load_dotenv()

ASTRA_DB_TOKEN = os.getenv("ASTRA_DB_TOKEN")
ASTRA_DB_ENDPOINT = os.getenv("ASTRA_DB_API_ENDPOINT") 
ASTRA_DB_KEYSPACE = os.getenv("ASTRA_DB_KEYSPACE", "langflow")
ASTRA_DB_COLLECTION = os.getenv("ASTRA_DB_COLLECTION", "langflow_docs")
VECTOR_DIMENSION = int(os.getenv("VECTOR_DIMENSION", "768"))
EMBEDDING_KEY = os.getenv("EMBEDDING_KEY", "embedding")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")    


In [None]:
# Connect and create the Database object
my_client = DataAPIClient()
my_database = my_client.get_database(
    ASTRA_DB_ENDPOINT,
    token=ASTRA_DB_TOKEN,
    keyspace=ASTRA_DB_KEYSPACE,
)

# Create a vector collection
my_collection = my_database.create_collection(
    "dreams_collection",
    definition=(
        CollectionDefinition.builder()
        .set_vector_dimension(3)
        .set_vector_metric(VectorMetric.COSINE)
        .build()
    )
)


In [None]:

# Populate the collection with some documents
my_collection.insert_many(
    [
        {
            "_id": UUID("018e65c9-e33d-749b-9386-e848739582f0"),
            "summary": "Riding the waves",
            "tags": ["sport"],
            "$vector": [0, 0.2, 1],
        },
        {
            "summary": "Friendly aliens in town",
            "tags": ["scifi"],
            "$vector": [-0.3, 0, 0.8],
        },
        {
            "summary": "Meeting Beethoven at the dentist",
            "$vector": [0.2, 0.6, 0],
        },
    ],
)


In [None]:

my_collection.update_one(
    {"tags": "sport"},
    {"$set": {"summary": "Surfers' paradise"}},
)


In [None]:

# Run a vector search
cursor = my_collection.find(
    {},
    sort={"$vector": [0, 0.2, 0.4]},
    limit=2,
    include_similarity=True,
)

for result in cursor:
    print(f"{result['summary']}: {result['$similarity']}")

# This would print:
#   Surfers' paradise: 0.98238194
#   Friendly aliens in town: 0.91873914



In [None]:
# Connect and create the Database object
client = DataAPIClient()
database = client.get_database(
    ASTRA_DB_ENDPOINT,
    token=ASTRA_DB_TOKEN,
    keyspace=ASTRA_DB_KEYSPACE,
)
docs = database.get_collection("langflow_docs")
components = database.get_collection("langflow_components")
samples = database.get_collection("sample_cde")



In [None]:
import sys
import os

# Dynamically add the src folder to the Python path
notebook_dir = os.getcwd()  # Get the current working directory of the notebook
src_path = os.path.join(notebook_dir, "src")
if src_path not in sys.path:
	sys.path.append(src_path)

from graphrag_agent.tools.document_embedding import OpenAIEmbeddingGenerator

# Initialize the embedding generator
embedding_generator = OpenAIEmbeddingGenerator(api_key=OPENAI_API_KEY)

# Generate embedding for the given text
embedding = embedding_generator._generate_embedding("This is a test document")

# Print the first 5 elements of the embedding
print(embedding[:5])


ModuleNotFoundError: No module named 'graphrag_agent'

In [None]:
from itertools import combinations
all_collections = {
    "docs": docs_content,
    "components": components_content,
    "samples": samples_content,
}


In [None]:
import openai

def get_embedding(text: str, model: str = "text-embedding-ada-002") -> List[float]:
    """
    Generate an embedding for the given text using OpenAI's API.

    Args:
        text (str): The input text to generate the embedding for.
        model (str): The OpenAI model to use for generating the embedding.

    Returns:
        List[float]: The embedding vector for the input text.
    """
    openai.api_key = OPENAI_API_KEY
    response = openai.Embedding.create(
        input=text,
        model=model
    )
    return response['data'][0]['embedding']


In [None]:
from itertools import combinations
import openai

# Function to retrieve content from a collection
def retrieve_content(collection, query_filter=None):
    query_filter = query_filter or {}
    results = collection.find(query_filter)
    return [result.get("summary", "") for result in results]

# Retrieve content from the collections
docs_content = retrieve_content(docs)
components_content = retrieve_content(components)
samples_content = retrieve_content(samples)

# Combine content in different ways
all_collections = {
    "docs": docs_content,
    "components": components_content,
    "samples": samples_content,
}

# Generate combinations of collections
combinations_list = []
for r in range(1, len(all_collections) + 1):
    combinations_list.extend(combinations(all_collections.keys(), r))

# Function to generate a prompt
def generate_prompt(selected_collections):
    prompt_parts = []
    for collection_name in selected_collections:
        prompt_parts.extend(all_collections[collection_name])
    return "\n".join(prompt_parts)

# Function to get a response from OpenAI API
def get_openai_response(prompt):
    openai.api_key = OPENAI_API_KEY
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        max_tokens=150,
    )
    return response.choices[0].text.strip()

# Test different combinations
results = {}
for combination in combinations_list:
    prompt = generate_prompt(combination)
    print(f"Testing combination: {combination}")
    response = get_openai_response(prompt)
    results[combination] = response

# Print results
for combination, response in results.items():
    print(f"Combination: {combination}\nResponse: {response}\n")
