## Connect

In [None]:
# Import libraries
import os
from dotenv import load_dotenv
import json
import requests
import weaviate
from weaviate import EmbeddedOptions
from weaviate.classes.config import Configure, Property, DataType
from weaviate.classes.query import MetadataQuery, HybridVector, Move
import warnings

warnings.filterwarnings('ignore')

# Load the environment variables
load_dotenv()

aoai_key=os.environ['AZURE_OPENAI_API_KEY']
aoai_endpoint=os.environ['AZURE_OPENAI_ENDPOINT']
aoai_deployment=os.environ['AZURE_OPENAI_DEPLOYMENT']
aoai_embedding=os.environ['AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT']
aoai_embedding_3=os.environ['AZURE_OPENAI_EMBEDDINGS_3_DEPLOYMENT']

In [2]:
import warnings
warnings.filterwarnings('ignore')

### Utility Funtions

In [3]:
# JSON print beautifier
def json_print(data):
    print(json.dumps(data, indent=2))

### Create a Weaviate Embedded DB

In [None]:
response = requests.get("http://localhost:8079/v1/schema")

if response.status_code == 200:
    client = weaviate.connect_to_local(
        port=8079, 
        grpc_port=50050,    
        headers={
            "X-OpenAI-BaseURL": aoai_endpoint,
            "X-Azure-Api-Key": aoai_key
        }
    )
    print("Connected to existing instance")
else:
    client = weaviate.connect_to_embedded(
        version="1.26.1",  # e.g. version="1.26.5"
        headers={
            "X-OpenAI-BaseURL": aoai_endpoint,
            "X-Azure-Api-Key": aoai_key
        },
    )
    print("Connected to new instance")

print(client.is_ready())


In [None]:
# Show Weaviate DB metadata
json_print(client.get_meta())

### Create Collection (EU Destinations)

In [27]:
if client.collections.exists("eudestinations"):
    client.collections.delete("eudestinations")

In [None]:
client.collections.create(
    "eudestinations",
    vectorizer_config=[
        Configure.NamedVectors.text2vec_azure_openai(
            name="title_vector",            
            resource_name="aoai-airlift-1",
            deployment_id=aoai_embedding,
            base_url=aoai_endpoint
        )
    ],
    generative_config=Configure.Generative.azure_openai(
        resource_name="aoai-airlift-1",
        deployment_id=aoai_deployment,
        base_url=aoai_endpoint
        # frequency_penalty=0,
        # max_tokens=500,
        # presence_penalty=0,
        # temperature=0.7,
        # top_p=0.7
    )
    # Additional parameters not shown
)

### Read Europe Touristic Destinations

In [None]:
file_path = '../data/eu_destinations_n.json'

with open(file_path, "r") as file:
    data = file.read()

ds = json.loads(data)
json_print(ds[0])

### Create Embeddings

In [30]:
collection = client.collections.get("eudestinations")

with collection.batch.dynamic() as batch:
    for i, d in enumerate(ds):
        weaviate_obj = {
            "destination": d["Destination"],
            "region": d["Region"],
            "country": d["Country"],
            "category": d["Category"],
            "annualtourists": d["Approximate Annual Tourists"],
            "foods": d["Famous Foods"],
            "language": d["Language"],
            "besttimevisit": d["Best Time to Visit"],
            "costliving": d["Cost of Living"],
            "cultural": d["Cultural Significance"],
            "description": d["Description"]
        }

        # The model provider integration will automatically vectorize the object
        batch.add_object(
            properties=weaviate_obj,
            # vector=vector  # Optionally provide a pre-obtained vector
        )

In [None]:
# count = client.query.aggregate("eudestinations").with_meta_count().do()
# json_print(count)

### Query a specific Collection Dataset

In [31]:
collection = client.collections.get("eudestinations")

### Search Patterns

In [None]:
response = collection.query.fetch_objects(
    limit=1
)
for o in response.objects:
    json_print(o.properties)

In [None]:
response = collection.query.near_text(
    query="Picasso",  # The model provider integration will automatically vectorize the query
    limit=5,
    include_vector=True,
    return_metadata=MetadataQuery(distance=True)
)

for obj in response.objects:
    print(obj.properties)
    print(obj.metadata.distance)
    print(obj.vector)

In [None]:
response = collection.query.near_text(
    query="Picasso",  # The model provider integration will automatically vectorize the query    
    distance=0.20, 
    return_metadata=MetadataQuery(distance=True)
)

for obj in response.objects:
    print(obj.properties)
    print(obj.metadata.distance)

### Sparse and Dense Search

In [None]:
# Sparse
response = collection.query.bm25(
    query="Sea",
    return_metadata=MetadataQuery(score=True),
    limit=3
)

for o in response.objects:
    json_print(o.properties)
    print(o.metadata.score)

### Hybrid Search (Dense and Sparse)

In [None]:
response = collection.query.hybrid(
    query="sea",
    return_metadata=MetadataQuery(score=True, explain_score=True),
    limit=3
    )

for o in response.objects:
    json_print(o.properties)
    print(o.metadata.score)
    print(o.metadata.explain_score)

In [None]:
# An alpha of 1 is a pure vector (dense) search.
# An alpha of 0 is a pure keyword (sparse) search.
response = collection.query.hybrid(
    query="sea",
    alpha=0.7,
    limit=10,
)

for o in response.objects:
    json_print(o.properties)

In [None]:
response = collection.query.hybrid(
    query="sea",
    max_vector_distance=0.4,  # Maximum threshold for the vector search component
    vector=HybridVector.near_text(
        query="Scenic view",
        move_away=Move(force=0.5, concepts=["Volcano"]),
    ),
    alpha=0.75,
    limit=10,
)
for o in response.objects:
    json_print(o.properties)

### RAG Search with Azure OpenAI

In [22]:
prompt = "Create an itenerary from the {country} ordering by {annualtourists}"

response = collection.generate.near_text(
    query="sea",
    limit=2,
    single_prompt=prompt
)

In [None]:
print(response.generated)
for o in response.objects:
    print(o.properties)
    print(o.generated)

In [27]:
response = collection.generate.near_text(
    query="sea",
    limit=2,
    target_vector="title_vector",  # Specify the target vector for named vector collections
    single_prompt="Translate this into Spanish: {cultural}",
    return_metadata=MetadataQuery(distance=True)
)

In [None]:
print(response.generated)
for o in response.objects:
    print(o.properties)
    print(o.generated)

In [None]:
task = "Create a dish using the Foods"

response = collection.generate.near_text(
    query="sea",
    limit=5,
    grouped_task=task
)

# print the generated response
print(response.generated)