# Search with ElasticSearch

[Video](https://www.youtube.com/watch?v=1lgbR5wMvsI&list=PL3MmuxUbc_hIB4fSqLy_0AfTjVLpgjV3R&index=8&ab_channel=DataTalksClub%E2%AC%9B)

start elastic search by running this in the terminal

```
docker run -it \
    --rm \
    --name elasticsearch \
    -m 4GB \
    -p 9200:9200 \
    -p 9300:9300 \
    -e "discovery.type=single-node" \
    -e "xpack.security.enabled=false" \
    docker.elastic.co/elasticsearch/elasticsearch:8.4.3
```

In [None]:
import json
from elasticsearch import Elasticsearch
from tqdm.auto import tqdm

def elastic_search(query, es_client, index_name, course="data-engineering-zoomcamp",n=5):
    """
    Searches the specified Elasticsearch index for the query string.

    Args:
        query (str): Search text.
        es_client: Elasticsearch client instance.
        index_name (str): Name of the index to search.
        n (int): Number of top results to return (default is 5).

    Returns:
        list: List of top matching documents.
    """

    search_query = {
        "size": n,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^4", "text"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "course": course
                    }
                }
            }
        }
    }

    ## execute query
    response = es_client.search(index=index_name, body=search_query)

    result_docs = []

    for hit in response['hits']['hits']:
        result_docs.append(hit['_source'])

    return result_docs

In [None]:
# load documents

with open('documents.json', 'rt') as f_in:
    docs_raw = json.load(f_in)

documents = []
for course_dict in docs_raw:
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)

In [None]:
# create an index in ElasticSearch
es_client = Elasticsearch('http://localhost:9200')

index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"} 
        }
    }
}

index_name = "course-questions"

if not es_client.indices.exists(index=index_name):
    es_client.indices.create(index=index_name,body=index_settings)

    # add documents to index
    for doc in tqdm(documents):
        es_client.index(index=index_name, document=doc)

In [None]:
# perform a search
query = "How do copy a file to a Docker container?"
elastic_search(query, es_client, index_name, course="machine-learning-zoomcamp", n=3)