In [23]:
import weaviate
from weaviate.classes.config import Property, DataType
from weaviate.connect import ConnectionParams
from weaviate.classes.init import AdditionalConfig, Timeout
import weaviate.classes.config as wc
import json
import os
from datetime import datetime
from dotenv import load_dotenv
from setup.client import get_weaviate_client

load_dotenv()

collection_name = "HarryPotter"

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
WCS_URL = os.getenv("WCS_URL")
WCS_API_KEY = os.getenv("WCS_API_KEY")
USE_WCS = os.getenv("USE_WCS")

# client = weaviate.connect_to_wcs(
#         cluster_url=WCS_URL,
#         auth_credentials=weaviate.auth.AuthApiKey(WCS_API_KEY),
#         headers={
#             "X-OpenAI-Api-Key": OPENAI_API_KEY
#         }
#     )
client = weaviate.connect_to_local()
print(client.is_ready())

collection = client.collections.get(collection_name)

True


            Please make sure to close the connection using `client.close()`.


In [15]:
client.close()

In [24]:
aggregation = collection.aggregate.over_all(total_count=True)
print(f"Total Number of Documents ingested: {aggregation.total_count}")

Total Number of Documents ingested: 866


### View the fields in the collection configuration

In [None]:
collection_config = collection.config.get()
for property in collection_config.properties:
    print(f"Field Name: '{property.name}' with type {property.data_type}")

# print(json.dumps(collection_config.to_dict(), indent=2))

In [3]:
QUERY = "what did harry do at the zoo"


"""
Query: "Family Battle"
duel_054
duel_052

Query: Find duels with unique environmental challenges affecting combat


"""




'\nQuery: "Family Battle"\nduel_054\nduel_052\n\nQuery: Find duels with unique environmental challenges affecting combat\n\n\n'

### Run a Keyword search

In [21]:
# Example query
import weaviate.classes as wvc
from weaviate.classes.query import Filter
import time

# Start the timer
start_time = time.time()
QUERY="where di harry play chess?"
response = collection.query.bm25(
    query=QUERY, 
    limit=20,
    # query_properties=["source^2"],
    # return_properties=["source"],
    return_metadata=wvc.query.MetadataQuery(score=True,explain_score=True),
    # filters=Filter.by_property("fighters").equal("Anakin Skywalke"),

)

# print(response)
# Calculate elapsed time
elapsed_time = time.time() - start_time
print(f"Query: {QUERY}")
print(f"Query execution time: {elapsed_time:.4f} seconds")
# print(json.dumps(response.objects))
print(type(response.objects))
# # Process and print the response
for i, o in enumerate(response.objects, start=1):
    for index, (key, value) in enumerate(o.properties.items()):
        print(f"{key}: {value}")
    print(f"Score: {o.metadata.score}")
    print(f"Explain: {o.metadata.explain_score}")
    print(json.dumps(o.properties, indent=2))


Query: where di harry play chess?
Query execution time: 0.0130 seconds
<class 'list'>
chunk_id: 12_47
chapter_num: 12
chunk_index: 47
chunk_type: sentences
content: "This isn't safe -- she might have gone for Filch, I bet she heard us. Come on." And Ron pulled Harry out of the room. The snow still hadn't melted the next morning. "Want to play chess, Harry?" said Ron. "No." "Why don't we go down and visit Hagrid?"
chapter_num_text: twelve
chapter: None
chapter_title: THE MIRROR OF ERISED
Score: 2.0539097785949707
Explain: , BM25F_play_frequency:1, BM25F_play_propLength:43, BM25F_chess_frequency:1, BM25F_chess_propLength:43, BM25F_harry_frequency:2, BM25F_harry_propLength:43
{
  "chunk_id": "12_47",
  "chapter_num": 12,
  "chunk_index": 47,
  "chunk_type": "sentences",
  "content": "\"This isn't safe -- she might have gone for Filch, I bet she heard us. Come on.\" And Ron pulled Harry out of the room. The snow still hadn't melted the next morning. \"Want to play chess, Harry?\" said Ron.

### Run a Vector Search

In [25]:
# Example query
import weaviate.classes as wvc
from weaviate.classes.query import Filter


import time
QUERY="where did harry play chess?"


# Start the timer
start_time = time.time()

response = collection.query.near_text(
    query=QUERY, 
    limit=5,
    # certainty=0.9, # can be between 0 and 1: closer to 1 is more similar
    # distance= 0.2, # can be between 0 and 1: closer to 0 is more similar
    return_metadata=wvc.query.MetadataQuery(distance=True,certainty=True),
    # filters=Filter.by_property("fighters").equal("Anakin Skywalker"),

)


elapsed_time = time.time() - start_time
print(f"Query: {QUERY}")
print(f"Query execution time: {elapsed_time:.4f} seconds")
print(f"Length of Returned Objects: {len(response.objects)}")
# Process and print the response
for i, o in enumerate(response.objects, start=1):
    print(f"-----Result {i} ------")
    for index, (key, value) in enumerate(o.properties.items()):
        print(f"{key}: {value}")
    print(f"Distance: {o.metadata.distance}")
    print(f"Certainty: {o.metadata.certainty}")

Query: where did harry play chess?
Query execution time: 0.4718 seconds
Length of Returned Objects: 5
-----Result 1 ------
chunk_id: 17_43
chapter_num: 17
chunk_index: 43
chunk_type: sentences
content: Quirrell screamed and tried to throw Harry off -- the
pain in Harry's head was building -- he couldn't see -- he could only
hear Quirrell's terrible shrieks and Voldemort's yells of, "KILL HIM! KILL HIM!" and other voices, maybe in Harry's own head, crying, "Harry! Harry!" He felt Quirrell's arm wrenched from his grasp, knew all was lost, and
fell into blackness, down ... down... down...

Something gold was glinting just above him. The Snitch! He tried to
catch it, but his arms were too heavy. He blinked. It wasn't the Snitch at all. It was a pair of glasses. How
strange. He blinked again. The smiling face of Albus Dumbledore swam into view
above him. "Good afternoon, Harry," said Dumbledore. Harry stared at him.
chapter_num_text: seventeen
chapter_title: THE MAN WITH TWO FACES
chapter: 

### Run a Hybrid search

In [26]:
# Example query
import weaviate.classes as wvc
from weaviate.classes.query import Filter

import time

QUERY="where did harry save hermoine from the monster"
# Start the timer
start_time = time.time()

response = collection.query.hybrid(
    query=QUERY, 
    limit=3,
    # max_vector_distance=0.5,
    alpha=0.6, # Closer to 0 is keyword heaver/Closer to 1 is vector heavy
    return_metadata=wvc.query.MetadataQuery(distance=True,score=True,explain_score=True),
    # filters=Filter.by_property("fighters").equal("Anakin Skywalke"),

)

elapsed_time = time.time() - start_time
print(f"Query: {QUERY}")
print(f"Query execution time: {elapsed_time:.4f} seconds")
print(f"Length of Returned Objects: {len(response.objects)}")

# Process and print the response
# print(o.metadata)
for i, o in enumerate(response.objects, start=1):
    print(f"-----Result {i} ------")
    
    for index, (key, value) in enumerate(o.properties.items()):
        print(f"{key}: {value}")
    print(f"Score: {o.metadata.score}")
    print(f"Explain: {o.metadata.explain_score}")

Query: where did harry save hermoine from the monster
Query execution time: 0.1516 seconds
Length of Returned Objects: 3
-----Result 1 ------
chunk_id: 17_33
chapter_num: 17
chunk_index: 33
chapter_num_text: seventeen
chunk_type: sentences
content: "No, no, no. I tried to kill you. Your friend Miss Granger accidentally
knocked me over as she rushed to set fire to Snape at that Quidditch
match. She broke my eye contact with you. Another few seconds and I'd
have got you off that broom. I'd have managed it before then if Snape
hadn't been muttering a countercurse, trying to save you." "Snape was trying to save me?" "Of course," said Quirrell coolly. "\Why do you think he wanted to
referee your next match? He was trying to make sure I didn't do it
again. Funny, really... he needn't have bothered. I couldn't do anything
with Dumbledore watching. All the other teachers thought Snape was
trying to stop Gryffindor from winning, he did make himself unpopular...
and what a waste of time, when af

## Use Weaviate's Generative Feedback Loop feature to help provide more context

In [45]:
from weaviate.classes.config import Configure, Property, DataType, VectorDistances, VectorFilterStrategy
#create a review summary property
instruction = """
Create a summary of the following lightsaber duels from within episodes and movies within the Star Wars univers.
Use the source to understand what series and episode for each and make sure to state that
following that give an overview of the scene and why the battle is happening
don't make it longer than five sentances  
"""
overview_gfl = collection.gfl.create(
    property_name="duel_summary",
    data_type=DataType.TEXT,
    view_properties=["source", 'fighters', 'outcome', 'location'],
    instruction=instruction,
)

In [None]:
#Check the status of the GFL
gfl_status_response = collection.gfl.status(overview_gfl.workflow_id)
# Display the GFLStatusResponse object
print("GFL Status Response:")
print(f"Workflow ID: {gfl_status_response.workflow_id}")
print("Status:")
print(f"  Parent State: {gfl_status_response.status.parent_state}")
print(f"  Child State: {gfl_status_response.status.child_state}")
print(f"  Batch Count: {gfl_status_response.status.batch_count}")
print(f"  Total Items: {gfl_status_response.status.total_items}")
print(f"  Total Duration: {gfl_status_response.status.total_duration}")
print(f"  Start Time: {gfl_status_response.status.start_time}")
print(f"  End Time: {gfl_status_response.status.end_time}")

In [None]:
import time
from datetime import datetime

def run_timed_query(collection, query, filters=None, limit=3):
    """
    Run a hybrid query with timing metrics and optional filters
    
    Args:
        collection: Weaviate collection
        query (str): Search query
        filters: Optional query filters (default: None)
        limit (int): Number of results to return
    
    Returns:
        dict: Results and timing information
    """
    start_time = time.perf_counter()
    query_config = {
        "query": query,
        "limit": limit,
        "return_metadata": wvc.query.MetadataQuery(distance=True)
    }
    
    # Only add filters if they're provided
    if filters is not None:
        query_config["filters"] = filters
    
    try:
        response = collection.query.hybrid(**query_config)
        
        end_time = time.perf_counter()
        execution_time = (end_time - start_time) * 1000  # Convert to milliseconds
        
        timing_info = {
            "query_text": query,
            "filters_used": filters is not None,
            "execution_time_ms": round(execution_time, 2),
            "timestamp": datetime.now().isoformat(),
            "num_results": len(response.objects) if hasattr(response, 'objects') else 0
        }
        
        return {
            "timing": timing_info,
            "response": response
        }
        
    except Exception as e:
        end_time = time.perf_counter()
        execution_time = (end_time - start_time) * 1000
        
        return {
            "timing": {
                "query_text": query,
                "filters_used": filters is not None,
                "execution_time_ms": round(execution_time, 2),
                "timestamp": datetime.now().isoformat(),
                "error": str(e)
            },
            "response": None
        }

# Example usage:

# Without filters
result = run_timed_query(collection, "duels with force lightning")
print(f"Query without filters took {result['timing']['execution_time_ms']}ms")

# With filters
filters = Filter.by_property("fighters").equal("Anakin Skywalker")
result_with_filters = run_timed_query(collection, "duels with force lightning", filters=filters)
print(f"Query with filters took {result_with_filters['timing']['execution_time_ms']}ms")

# Compare multiple queries with and without filters
test_queries = [
    "duels in industrial settings",
    "fights between masters and apprentices"
]

for query in test_queries:
    # Without filters
    result = run_timed_query(collection, query)
    print(f"\nQuery: {query}")
    print(f"Without filters - Time: {result['timing']['execution_time_ms']}ms")
    print(f"Results found: {result['timing']['num_results']}")
    
    # With filters
    result_filtered = run_timed_query(collection, query, filters=filters)
    print(f"With filters - Time: {result_filtered['timing']['execution_time_ms']}ms")
    print(f"Results found: {result_filtered['timing']['num_results']}")


### Insert a second document with a field undefined in the collection configuration

```
answer
dynamic
```

In [None]:
uuid = collection.data.insert({
    "question": "What is the meaning of life",
    # "answer": "Weaviate",  # properties can be omitted
    "new_property": "42",
})

print(uuid)  # the return value is the object's UUID

### View the updated fields list in the collection configuration

In [None]:
collection_config = collection.config.get()
for property in collection_config.properties:
    print(f"Field Name: '{property.name}' with type {property.data_type}")

print(json.dumps(collection_config.to_dict(), indent=2))

In [None]:
# Example query
response = collection.query.hybrid(
    query="property", 
    limit=3
)

print(response)
# Process and print the response
for o in response.objects:
    print(json.dumps(o.properties, indent=2))