In [19]:
from elasticsearch import Elasticsearch, helpers
from urllib.request import urlopen
import getpass
import json

In [20]:
ELASTCSEARCH_CERT_PATH = "/Users/liuxg/elastic/elasticsearch-8.10.0/config/certs/http_ca.crt"
 
client = Elasticsearch(  ['https://localhost:9200'],
    basic_auth = ('elastic', 'vXDWYtL*my3vnKY9zCfL'),
    ca_certs = ELASTCSEARCH_CERT_PATH,
    verify_certs = True)

# Confirm that the client has connected with this test

In [21]:
print(client.info())

{'name': 'liuxgm.local', 'cluster_name': 'elasticsearch', 'cluster_uuid': 'lM5ZgEQNTkO8phtT5qZAHQ', 'version': {'number': '8.10.0', 'build_flavor': 'default', 'build_type': 'tar', 'build_hash': 'e338da74c79465dfdc204971e600342b0aa87b6b', 'build_date': '2023-09-07T08:16:21.960703010Z', 'build_snapshot': False, 'lucene_version': '9.7.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}


In [22]:
client.ingest.put_pipeline(
    id="elser-ingest-pipeline", 
    description="Ingest pipeline for ELSER",
    processors=[
    {
      "inference": {
        "model_id": ".elser_model_1",
        "target_field": "ml",
        "field_map": {
          "plot": "text_field"
        },
        "inference_config": {
          "text_expansion": {
            "results_field": "tokens"
          }
        }
      }
    }
  ]
)

ObjectApiResponse({'acknowledged': True})

In [24]:
INDEX_NAME = "elser-example-movies"

if(client.indices.exists(index=INDEX_NAME)):
    print("The index has already existed, going to remove it")
    client.options(ignore_status=404).indices.delete(index=INDEX_NAME)

 
client.indices.create(
  index=INDEX_NAME,
  settings={
      "index": {
          "number_of_shards": 1,
          "number_of_replicas": 1,
          "default_pipeline": "elser-ingest-pipeline"
      }
  },
  mappings={
    "properties": {
      "plot": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "ml.tokens": {
        "type": "rank_features"
      },
    }
  }
)

The index has already existed, going to remove it


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'elser-example-movies'})

In [26]:
# Load data into a JSON object
with open('data.json') as f:
   data_json = json.load(f)

print(data_json)

# Prepare the documents to be indexed
documents = []
for doc in data_json:
    documents.append({
        "_index": INDEX_NAME,
        "_source": doc,
    })

# Use helpers.bulk to index
helpers.bulk(client, documents)

print("Done indexing documents into `search-movies` index!")

[{'title': 'Pulp Fiction', 'runtime': '154', 'plot': 'The lives of two mob hitmen, a boxer, a gangster and his wife, and a pair of diner bandits intertwine in four tales of violence and redemption.', 'keyScene': "John Travolta is forced to inject adrenaline directly into Uma Thurman's heart after she overdoses on heroin.", 'genre': 'Crime, Drama', 'released': '1994'}, {'title': 'The Dark Knight', 'runtime': '152', 'plot': 'When the menace known as the Joker wreaks havoc and chaos on the people of Gotham, Batman must accept one of the greatest psychological and physical tests of his ability to fight injustice.', 'keyScene': "Batman angrily responds 'I’m Batman' when asked who he is by Falcone.", 'genre': 'Action, Crime, Drama, Thriller', 'released': '2008'}, {'title': 'Fight Club', 'runtime': '139', 'plot': 'An insomniac office worker and a devil-may-care soapmaker form an underground fight club that evolves into something much, much more.', 'keyScene': 'Brad Pitt explains the rules of 

In [27]:
response = client.search(
    index='elser-example-movies', 
    size=3,
    query={
        "text_expansion": {
            "ml.tokens": {
                "model_id":".elser_model_1",
                "model_text":"child toy"
            }
        }
    }
)

for hit in response['hits']['hits']:
    doc_id = hit['_id']
    score = hit['_score']
    title = hit['_source']['title']
    plot = hit['_source']['plot']
    print(f"Score: {score}\nTitle: {title}\nPlot: {plot}\n")

Score: 0.6608096
Title: Fight Club
Plot: An insomniac office worker and a devil-may-care soapmaker form an underground fight club that evolves into something much, much more.

Score: 0.3346656
Title: The Usual Suspects
Plot: A sole survivor tells of the twisty events leading up to a horrific gun battle on a boat, which began when five criminals met at a seemingly random police lineup.

Score: 0.21901892
Title: The Shawshank Redemption
Plot: Two imprisoned men bond over a number of years, finding solace and eventual redemption through acts of common decency.

