# Relevant Search

## Loading TMDB.json into local Elasticsearch

In [1]:
import requests  # HTTP lib
import json  # json parsing

# docker run --name elasticsearch --net elastic -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" -e "xpack.security.enabled=false" -t docker.elastic.co/elasticsearch/elasticsearch:8.6.1
host = "http://localhost:9200/"
index = "tmdb"
indexBaseUrl = host + index
headers = {"Content-Type": "application/json"}

In [None]:
def toJsonPrettyPrint(response):
    print(json.dumps(json.loads(response), indent=2))

In [None]:
def extract():
    f = open('tmdb.json')
    if f:
        return json.loads(f.read())


def reindex(analysisSettings={}, mappingSettings={}, movieDict={}):
    settings = {
        "settings": {
            "number_of_shards": 1,
            "index": {
                "analysis": analysisSettings,
            }
        }
    }

    settingsJson = json.dumps(settings)
    print(settingsJson)

    if mappingSettings:
        settings['mappings'] = mappingSettings

    requests.delete(host + index)
    requests.put(host + index, data=settingsJson, headers=headers)

    bulkMovies = ""
    for id, movie in movieDict.items():
        addCmd = {"index": {"_id": movie["id"]}}
        bulkMovies += json.dumps(addCmd) + "\n" + json.dumps(movie) + "\n"

    response = requests.post(indexBaseUrl + "/_bulk",
                             data=bulkMovies, headers=headers)
    return response


movieDict = extract()
reindex(movieDict=movieDict)


## The search function

In [None]:
def search(query: str):
    url = indexBaseUrl + "/_search"
    response = requests.get(url, data=json.dumps(query), headers=headers)

    searchHits = json.loads(response.text)['hits']

    print("Num\tRelevanceScore\tMovie Title")
    for idx, hit in enumerate(searchHits['hits']):
        print("%s\t%s\t\t%s" %
              (idx+1, round(hit['_score'], 2), hit['_source']['title']))


In [None]:
userSearch = 'basketball with cartoon aliens'
query = {
    "query": {
        "multi_match": {
            "query": userSearch,
            "fields": ["title^10", "overview"]
        }
    }
}
search(query)


### Explain Query

In [None]:
def explain(query: str): 
    url = indexBaseUrl + "/_validate/query?explain"
    response = requests.get(url, data = json.dumps(query), headers=headers)
    toJsonPrettyPrint(response.text)

In [None]:
explain(query)

### Analyze

In [None]:
query = {
    "analyzer": "standard",
    "text": "Fire with Fire"
}
response = requests.get(indexBaseUrl + "/_analyze", data=json.dumps(query), headers=headers)
toJsonPrettyPrint(response.text)