#### Search

In [1]:
%pip install -q azure-search-documents python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [2]:
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents import SearchClient

import os
from dotenv import load_dotenv

load_dotenv("../.env")

# Azure Search service details
service_name = os.getenv("SERVICE_NAME")
admin_key = os.getenv("SEARCH_ADMIN_KEY")
index_name = os.getenv("SEARCH_INDEX_NAME")

# Initialize the search index client
endpoint = f"https://{service_name}.search.windows.net/"
credential = AzureKeyCredential(admin_key)
index_client = SearchIndexClient(endpoint=endpoint, credential=credential)
search_client = SearchClient(endpoint=endpoint, index_name=index_name, credential=credential)

print(f"endpoint: {endpoint}")
print(f"index_name: {index_name}")

endpoint: https://hotelservice.search.windows.net/
index_name: hotel-reviews-index


In [3]:
from openai import OpenAI

openai_client = OpenAI()

def embed_query(query: str) -> list:
    response = openai_client.embeddings.create(
        input=query, model="text-embedding-3-small"
    )
    return response.data[0].embedding

In [6]:
from azure.search.documents.models import VectorizedQuery
from azure.search.documents.models import QueryType, QueryCaptionType, QueryAnswerType

def full_text_search(query, top=5):
    """Perform a full-text search."""
    results = search_client.search(search_text=query, top=top)
    return list(results)

def vector_search(query):
    embedded_query = embed_query(query)
    vector_query = VectorizedQuery(vector=embedded_query, k_nearest_neighbors=3, fields="embedding")
  
    results = search_client.search(  
        search_text=None,  
        vector_queries= [vector_query],
        select=["hotel_name", "review_text", "review_title", "city"],
    )  
    return list(results)

def hybrid_search(query, top=5):
    """Perform a hybrid search (combination of full-text and vector search)."""
    embedded_query = embed_query(query)
    vector_query = VectorizedQuery(vector=embedded_query, k_nearest_neighbors=3, fields="embedding")
    results = search_client.search(
        search_text=query,
        vector_queries=[vector_query],
        select=["id", "review_text", "review_title", "hotel_name", "city"],
        top=top,
    )
    return list(results)

def semantic_search(query, top=5):
    """Perform a semantic search using the vector search method."""
    embedded_query = embed_query(query)
    vector_query = VectorizedQuery(
        vector=embedded_query, k_nearest_neighbors=3, fields="embedding"
    )
    results = search_client.search(
        search_text=query,
        vector_queries=[vector_query],
        select=[
            "id",
            "review_text",
            "review_title",
            "hotel_name",
            "city",
            "hotel_state",
        ],
        query_type=QueryType.SEMANTIC,
        semantic_configuration_name="ps-hotels-semantic-config",
        query_caption=QueryCaptionType.EXTRACTIVE,
        query_answer=QueryAnswerType.EXTRACTIVE,
        top=top,
    )
    return results

def print_semantic_results(results):
    for result in results:
        print(f"Hotel Name: {result['hotel_name']}")
        print(f"Reranker Score: {result['@search.reranker_score']}")
        print(f"Review Title: {result['review_title']}")
        print(f"Review: {result['review_text']}")

        captions = result["@search.captions"]
        if captions:
            caption = captions[0]
            if caption.highlights:
                print(f"Caption: {caption.highlights}")
            else:
                print(f"Caption: {caption.text}")
        print("\n")

def print_results(results):
    for result in results:
        print(f"Hotel Name: {result['hotel_name']}")
        print(f"City: {result['city']}")
        print(f"Score: {result['@search.score']}")
        print(f"Review Title: {result['review_title']}")
        print(f"Review: {result['review_text']}")
        print("\n")

In [9]:
query = "walkable"
results = full_text_search(query)

print_results(results)


Hotel Name: The Charlesmark Hotel
City: Boston
Score: 12.311747
Review Title: Convenient location in the city centre walkable to most...
Review: Bad: Nothing. Good: Convenient location in the city centre walkable to most attractions


Hotel Name: The Charlesmark Hotel
City: Boston
Score: 7.638342
Review Title: Excellent location in city centre
Review: Bad: Nothing. Good: Convenient location in the city centre walkable to most attractions


Hotel Name: dana hotel and spa
City: Chicago
Score: 6.6430044
Review Title: Great value, wonderfully walkable location, polite staff, restaurant beyond excellent.
Review: Bad: Worst bartender ever. Long Island Iced Tea missing a few ingredients, King Alphonse was actually a Kahlua and cream with ice. Ugh. In-room coffee machine didn't work, took two days to get fixed, and then made awful coffee. Morning coffee from the restaurant was undrinkable. Thankfully, Starbucks nearby. Good: Best restaurant for dinner of any hotel anywhere. Entrees superb, bre

In [10]:
query = "walkable"
results = vector_search(query)

print_results(results)

Hotel Name: The Charlesmark Hotel
City: Boston
Score: 0.5965724
Review Title: Convenient location in the city centre walkable to most...
Review: Bad: Nothing. Good: Convenient location in the city centre walkable to most attractions


Hotel Name: The Charlesmark Hotel
City: Boston
Score: 0.59231913
Review Title: Pleasantly surprised
Review: Bad: Closest parking was a 4-5 minute walk away for 28 overnight. Closer parking is available for 48 overnight. Good: Great neighborhood on Boylston and a block from Newbury St. Plenty of restaurants and bars within walking distance to choose from. It is one of the better values compared to other hotels in Boston. Half a block from the Copley T station if you want to get to another part of the city. 15- 20 minute walk to Fenway Park for the Sox game.


Hotel Name: The Westin Las Vegas Hotel & Spa
City: Las Vegas
Score: 0.59213525
Review Title: Clean, comfortable and easy to walk to the sights.
Review: Bad: Service was not all it could be especially 

In [11]:
query = "walkable"
results = hybrid_search(query)

print_results(results)

Hotel Name: The Charlesmark Hotel
City: Boston
Score: 0.03333333507180214
Review Title: Convenient location in the city centre walkable to most...
Review: Bad: Nothing. Good: Convenient location in the city centre walkable to most attractions


Hotel Name: The Charlesmark Hotel
City: Boston
Score: 0.016393441706895828
Review Title: Excellent location in city centre
Review: Bad: Nothing. Good: Convenient location in the city centre walkable to most attractions


Hotel Name: The Charlesmark Hotel
City: Boston
Score: 0.016393441706895828
Review Title: Pleasantly surprised
Review: Bad: Closest parking was a 4-5 minute walk away for 28 overnight. Closer parking is available for 48 overnight. Good: Great neighborhood on Boylston and a block from Newbury St. Plenty of restaurants and bars within walking distance to choose from. It is one of the better values compared to other hotels in Boston. Half a block from the Copley T station if you want to get to another part of the city. 15- 20 minute

#### Summarize Using OpenAI

In [12]:
from openai import OpenAI
import json

client = OpenAI()

SYSTEM_MESSAGE = "You are a helpful assistant."

USER_QUERY = query
SEARCH_RESULTS = results

USER_MESSAGE = f"""
  You are provided a user query, and the search results based on user query. You task is to summarize the results and put the best order for the results. 

USER_QUERY
```
{USER_QUERY}
```

SEARCH_RESULTS
```
{SEARCH_RESULTS}
```

Return a valid JSON with the following information

RESULT_SCHEMA
```
{{
  'summary': '',
  'results': []
}}
```
"""


response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": SYSTEM_MESSAGE},
        {"role": "user", "content": USER_MESSAGE},
    ],
    response_format={"type": "json_object"},
)

response = json.loads(response.choices[0].message.content) 
print(json.dumps(response, indent=2))

{
  "summary": "The search results highlight several hotels known for their walkable locations in vibrant urban areas. The Charlesmark Hotel in Boston is frequently mentioned for its central location close to attractions, while the dana hotel and spa in Chicago offers a walkable experience with great dining options. The Westin Las Vegas Hotel & Spa is noted for its clean and comfortable environment, also situated conveniently for exploring the city. Overall, these hotels provide great accessibility to various city sights.",
  "results": [
    {
      "city": "Boston",
      "review_title": "Convenient location in the city centre walkable to most...",
      "id": "4f9b3801-fc14-4f71-9a69-4d3aa3857723",
      "hotel_name": "The Charlesmark Hotel",
      "review_text": "Bad: Nothing. Good: Convenient location in the city centre walkable to most attractions."
    },
    {
      "city": "Boston",
      "review_title": "Excellent location in city centre",
      "id": "34132c10-104b-4565-9faf