# Setup

These should already be installed in your notebook environment.
You can uncomment and run if needed

In [None]:
#!pip install -qU elasticsearch
#!pip install -qU eland[pytorch]

Import the required python libraries

In [None]:
import os
from elasticsearch import Elasticsearch, helpers, exceptions
from urllib.request import urlopen
from getpass import getpass
import json
import time

Create an Elasticsearch Python client

In [None]:
es = Elasticsearch(
    hosts = ["http://kubernetes-vm:9200"],
    basic_auth=("elastic", "changeme")
)

# Upload Hugging Face model with Eland

Use Eland's `eland_import_hub_model` command to upload the model to Elasticsearch.

For this example we've chosen the [`cross-encoder/ms-marco-MiniLM-L-6-v2`](https://huggingface.co/cross-encoder/ms-marco-MiniLM-L-6-v2) text similarity model.

In [None]:
MODEL_ID = "cross-encoder/ms-marco-MiniLM-L-6-v2"

!eland_import_hub_model \
  --url "http://kubernetes-vm:9200" \
  -u "elastic" \
  -p "changeme" \
  # Complete the missing argument
  # Complete the missing argument

# Create Inference Endpoint
We need to create an endpoint queries can use for reranking

In [None]:
model_config = {
  # complete the missing argument
  "service_settings": {
      # complete the missing argument
      # Complete the missing argument
      # Complete the missing argument
  },
      "task_settings": {
        "return_documents": True
    }
}

inference_id = "semantic-reranking" # DO NOT change this id

create_endpoint = es.inference.put(
    inference_id=inference_id,
    # complete the missing argument
    body=model_config
)

create_endpoint.body

## Verify



Verify the endpoint was created



You should see JSON output with information about the semantic endpoint

In [None]:
check_endpoint = es.inference.get(
    inference_id=inference_id,
)

check_endpoint.body

Verify the model was successfully deployed

The cell below should return 'started'




In [None]:
ES_MODEL_ID = "cross-encoder__ms-marco-minilm-l-6-v2"

model_info = es.ml.get_trained_models_stats(model_id=ES_MODEL_ID)

model_info.body['trained_model_stats'][0]['deployment_stats']['nodes'][0]['routing_state']['routing_state']

# Query with Reranking

Complete the following query.

This will user retrievers to:
1. Perform a semantic query against the chunked ELSER embeddings
2. Return the top 2 inner hit chunks
3. Perform a reranking using our previously created reranking API and model

In [None]:
USER_QUESTION = "Where can I get good pizza?"

response = es.search(
    index="restaurant_reviews",
    body={
      "retriever": {
        # complete the missing retriever type: {
          "retriever": {
            "standard": {
              "query": {
                "nested": {
                  "path": "semantic_body.inference.chunks",
                  "query": {
                    "sparse_vector": {
                      "inference_id": "my-elser-endpoint",
                      "field": "semantic_body.inference.chunks.embeddings",
                      "query": USER_QUESTION
                    }
                  },
                  "inner_hits": {
                    "size": 2,
                    "name": "restaurant_reviews.semantic_body",
                    "_source": [
                      "semantic_body.inference.chunks.text"
                    ]
                  }
                }
              }
            }
          },
        # Complete the missing argument
        # Complete the missing argument
        # Complete the missing argument
        "inference_text": USER_QUESTION
        }
      }
    }
)

response.raw

Print out the formatted response

In [None]:
for review in response.raw['hits']['hits']:
    print(f"Restaurant {review['_source']['Restaurant']} - Rating: {review['_source']['Rating']} - Reviewer: {review['_source']['Reviewer']}")