In [2]:
import weaviate
from weaviate.classes.config import Property, DataType
from weaviate.connect import ConnectionParams
from weaviate.classes.init import AdditionalConfig, Timeout
import weaviate.classes.config as wc
import json
import os
from datetime import datetime
from dotenv import load_dotenv
from setup.client import get_weaviate_client


load_dotenv()

collection_name = "HarryPotter"

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
WCS_URL = os.getenv("WCS_URL")
WCS_API_KEY = os.getenv("WCS_API_KEY")
USE_WCS = os.getenv("USE_WCS")

client = weaviate.connect_to_wcs(
        cluster_url=WCS_URL,
        auth_credentials=weaviate.auth.AuthApiKey(WCS_API_KEY),
        headers={
            "X-OpenAI-Api-Key": OPENAI_API_KEY
        }
    )

# Import OpenAi API Key
print(client.is_ready())

# client.collections.delete(collection_name)


# collection = client.collections.create(
#     collection_name,
#     properties=[
#         Property(name="question", data_type=DataType.TEXT),
#         Property(name="new_property", data_type=DataType.TEXT),
#     ],
#     vectorizer_config=wc.Configure.Vectorizer.text2vec_openai(),
#     generative_config=wc.Configure.Generative.openai()
# )

collection = client.collections.get(collection_name)


True


            Please make sure to close the connection using `client.close()`.


In [4]:
!echo $COLLECTION_NAME


HarryPotter


  pid, fd = os.forkpty()


In [15]:
client.close()

### View the fields in the collection configuration

In [None]:
collection_config = collection.config.get()
for property in collection_config.properties:
    print(f"Field Name: '{property.name}' with type {property.data_type}")

# print(json.dumps(collection_config.to_dict(), indent=2))

### Run a search

In [None]:
# Example query
import weaviate.classes as wvc
from weaviate.classes.query import Filter


query = "what are quotes harry says to the dursleies in chapter two"

response = collection.query.fetch_objects(
    limit=10,
    return_metadata=wvc.query.MetadataQuery(distance=True),
    # filters=Filter.by_property("fighters").equal("Anakin Skywalke"),

)

print(response)
# Process and print the response
for o in response.objects:
    print("-----Result------")
    print(json.dumps(o.properties, indent=2))

In [None]:
# Example query
import weaviate.classes as wvc
from weaviate.classes.query import Filter


query = "what are quotes harry says to the dursly"

response = collection.query.hybrid(
    limit=10,
    query=query,
    return_metadata=wvc.query.MetadataQuery(score=True,explain_score=True),
    # filters=Filter.by_property("fighters").equal("Anakin Skywalke"),

)

print(response)
# Process and print the response
for o in response.objects:
    print("-----Result------")
    print(f"Chapter: {o.properties['chapter_num']}") 
    print(f"Chunk Index: {o.properties['chunk_index']}")
    print(f"Content: {o.properties['content']}")
    print(f"Score: {o.metadata.score}")
    print(f"Explain: {o.metadata.explain_score}")

In [None]:
import time
from datetime import datetime

def run_timed_query(collection, query, filters=None, limit=3):
    """
    Run a hybrid query with timing metrics and optional filters
    
    Args:
        collection: Weaviate collection
        query (str): Search query
        filters: Optional query filters (default: None)
        limit (int): Number of results to return
    
    Returns:
        dict: Results and timing information
    """
    start_time = time.perf_counter()
    query_config = {
        "query": query,
        "limit": limit,
        "return_metadata": wvc.query.MetadataQuery(distance=True)
    }
    
    # Only add filters if they're provided
    if filters is not None:
        query_config["filters"] = filters
    
    try:
        response = collection.query.hybrid(**query_config)
        
        end_time = time.perf_counter()
        execution_time = (end_time - start_time) * 1000  # Convert to milliseconds
        
        timing_info = {
            "query_text": query,
            "filters_used": filters is not None,
            "execution_time_ms": round(execution_time, 2),
            "timestamp": datetime.now().isoformat(),
            "num_results": len(response.objects) if hasattr(response, 'objects') else 0
        }
        
        return {
            "timing": timing_info,
            "response": response
        }
        
    except Exception as e:
        end_time = time.perf_counter()
        execution_time = (end_time - start_time) * 1000
        
        return {
            "timing": {
                "query_text": query,
                "filters_used": filters is not None,
                "execution_time_ms": round(execution_time, 2),
                "timestamp": datetime.now().isoformat(),
                "error": str(e)
            },
            "response": None
        }

# Example usage:

# Without filters
result = run_timed_query(collection, "duels with force lightning")
print(f"Query without filters took {result['timing']['execution_time_ms']}ms")

# With filters
filters = Filter.by_property("fighters").equal("Anakin Skywalker")
result_with_filters = run_timed_query(collection, "duels with force lightning", filters=filters)
print(f"Query with filters took {result_with_filters['timing']['execution_time_ms']}ms")

# Compare multiple queries with and without filters
test_queries = [
    "duels in industrial settings",
    "fights between masters and apprentices"
]

for query in test_queries:
    # Without filters
    result = run_timed_query(collection, query)
    print(f"\nQuery: {query}")
    print(f"Without filters - Time: {result['timing']['execution_time_ms']}ms")
    print(f"Results found: {result['timing']['num_results']}")
    
    # With filters
    result_filtered = run_timed_query(collection, query, filters=filters)
    print(f"With filters - Time: {result_filtered['timing']['execution_time_ms']}ms")
    print(f"Results found: {result_filtered['timing']['num_results']}")


### Insert a second document with a field undefined in the collection configuration

```
answer
dynamic
```

In [None]:
uuid = collection.data.insert({
    "question": "What is the meaning of life",
    # "answer": "Weaviate",  # properties can be omitted
    "new_property": "42",
})

print(uuid)  # the return value is the object's UUID

### View the updated fields list in the collection configuration

In [None]:
collection_config = collection.config.get()
for property in collection_config.properties:
    print(f"Field Name: '{property.name}' with type {property.data_type}")

print(json.dumps(collection_config.to_dict(), indent=2))

In [None]:
# Example query
response = collection.query.hybrid(
    query="property", 
    limit=3
)

print(response)
# Process and print the response
for o in response.objects:
    print(json.dumps(o.properties, indent=2))