# Using RAG Pattern with Weaviate and Azure Open AI

In [1]:
# Import libraries
import os
from dotenv import load_dotenv
import json
import requests
import weaviate
from weaviate import EmbeddedOptions
from weaviate.classes.config import Configure, Property, DataType
from weaviate.classes.query import MetadataQuery, HybridVector, Move
import warnings

warnings.filterwarnings('ignore')

# Load the environment variables
load_dotenv()

aoai_key=os.environ['AZURE_OPENAI_API_KEY']
aoai_endpoint=os.environ['AZURE_OPENAI_ENDPOINT']
aoai_deployment=os.environ['AZURE_OPENAI_DEPLOYMENT']
aoai_embedding=os.environ['AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT']
aoai_embedding_3=os.environ['AZURE_OPENAI_EMBEDDINGS_3_DEPLOYMENT']



## Utility Funtions

In [2]:
# JSON print beautifier
def json_print(data):
    print(json.dumps(data, indent=2))

# Print response beautifier
def word_wrap(string, n_chars=72):
    # Wrap a string at the next space after n_chars
    if len(string) < n_chars:
        return string
    else:
        return string[:n_chars].rsplit(' ', 1)[0] + '\n' + word_wrap(string[len(string[:n_chars].rsplit(' ', 1)[0])+1:], n_chars)

## Create or connecto a Weaviate Embedded DB

* https://weaviate.io/developers/weaviate/connections/connect-embedded
* https://weaviate.io/developers/weaviate/connections/connect-local

In [3]:
try:
    response = requests.get("http://localhost:8079/v1/schema")
    r_err = False
except Exception as err:    
    r_err = True

if r_err:
    client = weaviate.connect_to_embedded(
        version="1.26.1",  # e.g. version="1.26.5"
        headers={
            "X-OpenAI-BaseURL": aoai_endpoint,
            "X-Azure-Api-Key": aoai_key
        },
    )
    print("Connected to new instance")
elif response.status_code == 200:
    client = weaviate.connect_to_local(
        port=8079, 
        grpc_port=50050,    
        headers={
            "X-OpenAI-BaseURL": aoai_endpoint,
            "X-Azure-Api-Key": aoai_key
        }
    )
    print("Connected to existing instance")

print(client.is_ready())

HTTPConnectionPool(host='localhost', port=8079): Max retries exceeded with url: /v1/schema (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x79f002621c40>: Failed to establish a new connection: [Errno 111] Connection refused'))


{"action":"startup","default_vectorizer_module":"none","level":"info","msg":"the default vectorizer modules is set to \"none\", as a result all new schema classes without an explicit vectorizer setting, will use this vectorizer","time":"2024-10-21T22:15:27Z"}
{"action":"startup","auto_schema_enabled":true,"level":"info","msg":"auto schema enabled setting is set to \"true\"","time":"2024-10-21T22:15:27Z"}
{"level":"info","msg":"No resource limits set, weaviate will use all available memory and CPU. To limit resources, set LIMIT_RESOURCES=true","time":"2024-10-21T22:15:27Z"}
{"level":"info","msg":"module offload-s3 is enabled","time":"2024-10-21T22:15:27Z"}
{"level":"info","msg":"open cluster service","servers":{"Embedded_at_8079":43395},"time":"2024-10-21T22:15:27Z"}
{"address":"10.0.4.2:43396","level":"info","msg":"starting cloud rpc server ...","time":"2024-10-21T22:15:27Z"}
{"level":"info","msg":"starting raft sub-system ...","time":"2024-10-21T22:15:27Z"}
{"address":"10.0.4.2:43395"

Connected to new instance
True


{"docker_image_tag":"unknown","level":"info","msg":"configured versions","server_version":"1.26.1","time":"2024-10-21T22:15:29Z"}
{"action":"grpc_startup","level":"info","msg":"grpc server listening at [::]:50050","time":"2024-10-21T22:15:29Z"}
{"address":"10.0.4.2:43395","level":"info","msg":"current Leader","time":"2024-10-21T22:15:29Z"}
{"action":"restapi_management","docker_image_tag":"unknown","level":"info","msg":"Serving weaviate at http://127.0.0.1:8079","time":"2024-10-21T22:15:29Z"}


{"action":"bootstrap","level":"info","msg":"node reporting ready, node has probably recovered cluster from raft config. Exiting bootstrap process","time":"2024-10-21T22:15:30Z"}
{"action":"hnsw_prefill_cache_async","level":"info","msg":"not waiting for vector cache prefill, running in background","time":"2024-10-21T22:15:30Z","wait_for_cache_prefill":false}
{"level":"info","msg":"Completed loading shard eudestinations_MCh5Qceg5mhL in 80.954326ms","time":"2024-10-21T22:15:30Z"}
{"action":"hnsw_vector_cache_prefill","count":3000,"index_id":"vectors_title_vector","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2024-10-21T22:15:30Z","took":3062746}
{"action":"telemetry_push","level":"info","msg":"telemetry started","payload":"\u0026{MachineID:e4b59739-f0d4-4545-94d7-0191baeb4a06 Type:INIT Version:1.26.1 NumObjects:0 OS:linux Arch:amd64 UsedModules:[generative-openai text2vec-openai]}","time":"2024-10-21T22:15:30Z"}


In [4]:
# Show Weaviate DB metadata
json_print(client.get_meta())

{
  "hostname": "http://127.0.0.1:8079",
  "modules": {
    "generative-openai": {
      "documentationHref": "https://platform.openai.com/docs/api-reference/completions",
      "name": "Generative Search - OpenAI"
    },
    "qna-openai": {
      "documentationHref": "https://platform.openai.com/docs/api-reference/completions",
      "name": "OpenAI Question & Answering Module"
    },
    "ref2vec-centroid": {},
    "reranker-cohere": {
      "documentationHref": "https://txt.cohere.com/rerank/",
      "name": "Reranker - Cohere"
    },
    "text2vec-cohere": {
      "documentationHref": "https://docs.cohere.ai/embedding-wiki/",
      "name": "Cohere Module"
    },
    "text2vec-huggingface": {
      "documentationHref": "https://huggingface.co/docs/api-inference/detailed_parameters#feature-extraction-task",
      "name": "Hugging Face Module"
    },
    "text2vec-openai": {
      "documentationHref": "https://platform.openai.com/docs/guides/embeddings/what-are-embeddings",
      "nam

## Create a new Collection (EU Destinations)

https://weaviate.io/developers/weaviate/manage-data/collections

In [5]:
if client.collections.exists("eudestinations"):
    client.collections.delete("eudestinations")

* Using Azure OpenAI Embeddings: https://weaviate.io/developers/weaviate/model-providers/openai-azure/embeddings
* Usong Azure OpenAI: https://weaviate.io/developers/weaviate/model-providers/openai-azure/generative

In [6]:
# Using a Weaviate collection with text-embedding-ada-002 and gpt-4-turbo-2024-04-09
client.collections.create(
    "eudestinations",
    vectorizer_config=[
        Configure.NamedVectors.text2vec_azure_openai(
            name="title_vector",            
            resource_name="aoai-airlift-1",
            deployment_id=aoai_embedding,
            base_url=aoai_endpoint
        )
    ],
    generative_config=Configure.Generative.azure_openai(
        resource_name="aoai-airlift-1",
        deployment_id=aoai_deployment,
        base_url=aoai_endpoint
        # frequency_penalty=0,
        # max_tokens=500,
        # presence_penalty=0,
        # temperature=0.7,
        # top_p=0.7
    )    
)

<weaviate.collections.collection.sync.Collection at 0x79f0194762a0>

{"action":"hnsw_prefill_cache_async","level":"info","msg":"not waiting for vector cache prefill, running in background","time":"2024-10-21T22:17:09Z","wait_for_cache_prefill":false}
{"level":"info","msg":"Created shard eudestinations_vowCOdq2iZ8P in 1.126111ms","time":"2024-10-21T22:17:09Z"}
{"action":"hnsw_vector_cache_prefill","count":1000,"index_id":"vectors_title_vector","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2024-10-21T22:17:09Z","took":37270}


## Read Europe Touristic Destinations

_Source: https://www.kaggle.com/datasets/faizadani/european-tour-destinations-dataset_

In [7]:
file_path = '../data/eu_destinations_n.json'

with open(file_path, "r") as file:
    data = file.read()

ds = json.loads(data)
json_print(ds[0])

{
  "Destination": "Rome",
  "Region": "Lazio",
  "Country": "Italy",
  "Category": "City",
  "Approximate Annual Tourists": "14 million",
  "Famous Foods": "Pizza, Pasta, Gelato",
  "Language": "Italian",
  "Best Time to Visit": "Spring (April-May) or Fall (Sept-Oct)",
  "Cost of Living": "Medium-high",
  "Cultural Significance": "The capital city, known for its historical landmarks like the Colosseum, Vatican City, and Pantheon.",
  "Description": "A hub of ancient history and modern culture, with rich traditions, art, and landmarks."
}


## Loading Embeddings into the Weaviate DB

https://weaviate.io/developers/weaviate/manage-data/import

In [8]:
collection = client.collections.get("eudestinations")

with collection.batch.dynamic() as batch:
    for i, d in enumerate(ds):
        weaviate_obj = {
            "destination": d["Destination"],
            "region": d["Region"],
            "country": d["Country"],
            "category": d["Category"],
            "annualtourists": d["Approximate Annual Tourists"],
            "foods": d["Famous Foods"],
            "language": d["Language"],
            "besttimevisit": d["Best Time to Visit"],
            "costliving": d["Cost of Living"],
            "cultural": d["Cultural Significance"],
            "description": d["Description"]
        }

        # The model provider integration will automatically vectorize the object
        batch.add_object(
            properties=weaviate_obj,
            # vector=vector  # Optionally provide a pre-obtained vector
        )

## Query a specific Collection Dataset

In [9]:
collection = client.collections.get("eudestinations")

## Search Patterns

https://weaviate.io/developers/weaviate/search/basics

In [12]:
# Fetch a DB object in ascending UUID order
response = collection.query.fetch_objects(
    limit=1
)
for o in response.objects:
    json_print(o.properties)

{
  "language": "Maltese, English",
  "description": null,
  "besttimevisit": "Spring (April-May) or Fall (Sep-Oct)",
  "category": "City",
  "destination": "Mdina",
  "costliving": "Medium-high",
  "region": "Central Region",
  "annualtourists": "1 million",
  "country": "Malta",
  "foods": "Pastizzi, Rabbit Stew, Imqaret",
  "cultural": "A medieval walled city known as the \"Silent City,\" offering a glimpse into Malta's past."
}


### RAG Search with Azure OpenAI

Retrieval Augmented Generation (RAG) is an architecture that augments the capabilities of a Large Language Model (LLM) like ChatGPT by adding an information retrieval system that provides grounding data. 

### Single Prompt Search
* Single prompt search **returns a generated response for each object** in the query results.
* Define object properties – using **{prop-name}** syntax – to interpolate retrieved content in the prompt.
* **Distance closer to 0:** _identical vectors_ / **Distance closer to 2:** _Opposing vectors_.
* https://weaviate.io/developers/weaviate/search/generative#single-prompt-search

In [25]:
prompt = "Create a historic 1 day trip itenerary \
    visiting only 3 specific places from the mentioned {destination} \
    considering the {cultural} values, \
    and prepare a dish for dinner using the mentioned {foods}"

response = collection.generate.near_text(
    query="Italy ancient history",
    single_prompt=prompt,
    return_properties=['destination','country','foods','cultural','description'],
    limit=2,    
    return_metadata=MetadataQuery(distance=True)
)

for i, o in enumerate(response.objects):    
    print(f"Rank {i + 1} - Score: {o.metadata.distance}")
    json_print(o.properties)      
    print(f"Azure OpenAI Response:\n------------------------ \n{word_wrap(o.generated)}\n------------------------\n")

In [39]:
prompt = "Translate to Spanish language the following values after the colon: \
    {destination}. {country}. {foods}. {cultural}. {description}"

response = collection.generate.near_text(
    query="Gaudi",
    single_prompt=prompt,
    return_properties=['destination','country','foods','cultural','description'],
    limit=1,
    #target_vector="title_vector",  # Specify the target vector for named vector collections    
    return_metadata=MetadataQuery(distance=True)
)

In [40]:
# print(response.generated)
# for o in response.objects:
#     print(o.properties)
#     print(o.generated)


for i, o in enumerate(response.objects):    
    print(f"Rank {i + 1} - Score: {o.metadata.distance}")
    json_print(o.properties)      
    print(f"Azure OpenAI Response:\n------------------------ \n{word_wrap(o.generated)}\n------------------------\n")

Rank 1 - Score: 0.1673290729522705
{
  "destination": "Barcelona",
  "description": "A city of stunning architecture, art, and vibrant culture. Famous for La Sagrada Familia and Park G\u0081ell.",
  "foods": "Paella, Tapas, Gazpacho",
  "cultural": "Known for its Gaudi architecture, beautiful beaches, and vibrant nightlife.",
  "country": "Spain"
}
Azure OpenAI Response:
------------------------ 
Barcelona. España. Paella, Tapas, Gazpacho. Conocida por su
arquitectura de Gaudí, hermosas playas y vida nocturna vibrante. Una
ciudad de impresionante arquitectura, arte y cultura vibrante. Famosa
por La Sagrada Familia y el Parque Güell.
------------------------



### Grouped task search
* Grouped task search returns one response that includes all of the query results. 
* By default grouped task search uses all object properties in the prompt.
* https://weaviate.io/developers/weaviate/search/generative#grouped-task-search

In [None]:
task = "Create a dish using the Foods"

response = collection.generate.near_text(
    query="sea",
    limit=5,
    grouped_task=task
)

# print the generated response
print(response.generated)