In [4]:
import os
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient 
from azure.search.documents import SearchClient
from azure.search.documents.indexes.models import (
    ComplexField,
    CorsOptions,
    SearchIndex,
    ScoringProfile,
    SearchFieldDataType,
    SimpleField,
    SearchableField
)

In [5]:
import yaml

with open('../creds.dev.yml') as credsfi:
    creds = yaml.safe_load(credsfi)


service_name = creds['service_name']
admin_key = creds['primary_admin']

index_name = "hotels-quickstart"

# Create an SDK client
endpoint = "https://{}.search.windows.net/".format(service_name)
admin_client = SearchIndexClient(endpoint=endpoint,
                      index_name=index_name,
                      credential=AzureKeyCredential(admin_key))

search_client = SearchClient(endpoint=endpoint,
                      index_name=index_name,
                      credential=AzureKeyCredential(admin_key))

In [6]:
# clear out any existing data
try:
    result = admin_client.delete_index(index_name)
    print ('Index', index_name, 'Deleted')
except Exception as ex:
    print (ex)

Index hotels-quickstart Deleted


In [7]:
# Specify the index schema
name = index_name
fields = [
        SimpleField(name="HotelId", type=SearchFieldDataType.String, key=True),
        SearchableField(name="HotelName", type=SearchFieldDataType.String, sortable=True),
        SearchableField(name="Description", type=SearchFieldDataType.String, analyzer_name="en.lucene"),
        SearchableField(name="Description_fr", type=SearchFieldDataType.String, analyzer_name="fr.lucene"),
        SearchableField(name="Category", type=SearchFieldDataType.String, facetable=True, filterable=True, sortable=True),

        SearchableField(name="Tags", collection=True, type=SearchFieldDataType.String, facetable=True, filterable=True),

        SimpleField(name="ParkingIncluded", type=SearchFieldDataType.Boolean, facetable=True, filterable=True, sortable=True),
        SimpleField(name="LastRenovationDate", type=SearchFieldDataType.DateTimeOffset, facetable=True, filterable=True, sortable=True),
        SimpleField(name="Rating", type=SearchFieldDataType.Double, facetable=True, filterable=True, sortable=True),

        ComplexField(name="Address", fields=[
            SearchableField(name="StreetAddress", type=SearchFieldDataType.String),
            SearchableField(name="City", type=SearchFieldDataType.String, facetable=True, filterable=True, sortable=True),
            SearchableField(name="StateProvince", type=SearchFieldDataType.String, facetable=True, filterable=True, sortable=True),
            SearchableField(name="PostalCode", type=SearchFieldDataType.String, facetable=True, filterable=True, sortable=True),
            SearchableField(name="Country", type=SearchFieldDataType.String, facetable=True, filterable=True, sortable=True),
        ])
    ]
cors_options = CorsOptions(allowed_origins=["*"], max_age_in_seconds=60)
scoring_profiles = []
suggester = [{'name': 'sg', 'source_fields': ['Tags', 'Address/City', 'Address/Country']}]

In [8]:
index = SearchIndex(
    name=name,
    fields=fields,
    scoring_profiles=scoring_profiles,
    suggesters = suggester,
    cors_options=cors_options)

try:
    result = admin_client.create_index(index)
    print ('Index', result.name, 'created')
except Exception as ex:
    print (ex)

Index hotels-quickstart created


In [9]:
import yaml

with open('documents.json') as infi:
    documents = yaml.safe_load(infi)
    
print(len(documents))

4


In [12]:
try:
    result = search_client.upload_documents(documents=documents)
    print("Upload of new {} document(s) succeeded: {}".format(len(documents), result[0].succeeded))
except Exception as ex:
    print(ex.message)
    
# it seems that `key`s are unique (in our case HotelId) so if you upload a duplicate document,
# it will reupload the same doc with no exception

Upload of new 4 document(s) succeeded: True


In [14]:
# let's test this out by changing the name of one of the hotels
# update: yep!

documents[0]['HotelName'] += ' IS CHANGED NOW!'

try:
    result = search_client.upload_documents(documents=documents)
    print("Upload of new {} document(s) succeeded: {}".format(len(documents), result[0].succeeded))
except Exception as ex:
    print(ex.message)
    
# it seems that `key`s are unique (in our case HotelId) so if you upload a duplicate document,
# it will reupload the same doc with no exception

Upload of new 4 document(s) succeeded: True


In [15]:
# executes an empty search (search=*), returning an unranked list (search score = 1.0) of arbitrary documents.
# Because there are no criteria, all documents are included in results
results = search_client.search(search_text="*", include_total_count=True)

print ('Total Documents Matching Query:', results.get_count())
for result in results:
    print("{}: {}".format(result["HotelId"], result["HotelName"]))

Total Documents Matching Query: 4
1: Secret Point Motel IS CHANGED NOW!
2: Twin Dome Motel
3: Triple Landscape Hotel
4: Sublime Cliff Hotel


In [18]:
# adds whole terms to the search expression ("wifi")
# specifies that results contain only those fields in the `select` statement
# reduces data sent/rcved
# NOTE: searches all fields, even if they aren't in `select` stmt

results = search_client.search(search_text="wifi", include_total_count=True, select='HotelId,HotelName,Tags')

print ('Total Documents Matching Query:', results.get_count())
for result in results:
    print("{}: {}: {}".format(result["HotelId"], result["HotelName"], result["Tags"]))

Total Documents Matching Query: 1
2: Twin Dome Motel: ['pool', 'free wifi', 'concierge']


In [19]:
# filter, sort
results = search_client.search(search_text="hotels", 
                               select='HotelId,HotelName,Rating', 
                               filter='Rating gt 4', 
                               order_by='Rating desc')

for result in results:
    print("{}: {} - {} rating".format(result["HotelId"], result["HotelName"], result["Rating"]))

3: Triple Landscape Hotel - 4.8 rating
4: Sublime Cliff Hotel - 4.6 rating


In [22]:
# scope query matching

results = search_client.search(search_text="sublime", search_fields=['HotelName'], select='HotelId,HotelName')

for result in results:
    print("{}: {}".format(result["HotelId"], result["HotelName"]))

4: Sublime Cliff Hotel


In [23]:
# add facets and counts
# back to a match-everything search

results = search_client.search(search_text="*", facets=["Category"])

facets = results.get_facets()

for facet in facets["Category"]:
    print("    {}".format(facet))

    {'value': 'Boutique', 'count': 3}
    {'value': 'Resort and Spa', 'count': 1}


In [24]:
# just return a single doc by id ("key")
# we specified that HotelId was the key like:
# SimpleField(name="HotelId", type=SearchFieldDataType.String, key=True)

result = search_client.get_document(key="3")

print("Details for hotel '3' are:")
print("Name: {}".format(result["HotelName"]))
print("Rating: {}".format(result["Rating"]))
print("Category: {}".format(result["Category"]))

Details for hotel '3' are:
Name: Triple Landscape Hotel
Rating: 4.8
Category: Resort and Spa


In [26]:
# ooh fun, autocomplete!
# When the index was created, a suggester named sg was also created as part of the request
# suggester = [{'name': 'sg', 'source_fields': ['Tags', 'Address/City', 'Address/Country']}]
# what does `twoTerms` mean?

search_suggestion = 'sa'
results = search_client.autocomplete(search_text=search_suggestion, suggester_name="sg", mode='twoTerms')

print("Autocomplete for:", search_suggestion)
for result in results:
    print (result['text'])

Autocomplete for: sa
san antonio
sarasota


In [32]:
# what do we actually get back?
import json

results = search_client.search(search_text="*", include_total_count=True)

for res in results:
    print(json.dumps(res, sort_keys=True, indent=2))

{
  "@search.captions": null,
  "@search.highlights": null,
  "@search.reranker_score": null,
  "@search.score": 1.0,
  "Address": {
    "City": "New York",
    "Country": "USA",
    "PostalCode": "10022",
    "StateProvince": "NY",
    "StreetAddress": "677 5th Ave"
  },
  "Category": "Boutique",
  "Description": "The hotel is ideally located on the main commercial artery of the city in the heart of New York. A few minutes away is Time's Square and the historic centre of the city, as well as other places of interest that make New York one of America's most attractive and cosmopolitan cities.",
  "Description_fr": "L'h\u00f4tel est id\u00e9alement situ\u00e9 sur la principale art\u00e8re commerciale de la ville en plein c\u0153ur de New York. A quelques minutes se trouve la place du temps et le centre historique de la ville, ainsi que d'autres lieux d'int\u00e9r\u00eat qui font de New York l'une des villes les plus attractives et cosmopolites de l'Am\u00e9rique.",
  "HotelId": "1",
  "

In [18]:
# what about when we do an actual search?
# NOTE: `change*` matches `changed` but `change` does not -- did we do any stemming/lemmatization?

import json

results = search_client.search(search_text="change*", include_total_count=True)

for res in results:
    print(json.dumps(res, sort_keys=True, indent=2))

{
  "@search.captions": null,
  "@search.highlights": null,
  "@search.reranker_score": null,
  "@search.score": 1.0,
  "Address": {
    "City": "New York",
    "Country": "USA",
    "PostalCode": "10022",
    "StateProvince": "NY",
    "StreetAddress": "677 5th Ave"
  },
  "Category": "Boutique",
  "Description": "The hotel is ideally located on the main commercial artery of the city in the heart of New York. A few minutes away is Time's Square and the historic centre of the city, as well as other places of interest that make New York one of America's most attractive and cosmopolitan cities.",
  "Description_fr": "L'h\u00f4tel est id\u00e9alement situ\u00e9 sur la principale art\u00e8re commerciale de la ville en plein c\u0153ur de New York. A quelques minutes se trouve la place du temps et le centre historique de la ville, ainsi que d'autres lieux d'int\u00e9r\u00eat qui font de New York l'une des villes les plus attractives et cosmopolites de l'Am\u00e9rique.",
  "HotelId": "1",
  "