# Elasticsearch

- fuzzy en match query
- suggesties en aanvullingen



In [93]:
import json
from typing import Optional
from elasticsearch import Elasticsearch


In [98]:
# create a client from configuration
with open("config.json") as fp:
    config = json.load(fp)
    es = Elasticsearch(**config)


Kies hier een naam voor jouw index.

In [92]:
# the index to use
INDEX = "products"


Hulpfuncties voor het uitvoeren van henadelingen met betrekking tot de index.

In [120]:
def drop_index():
    """Drop an index"""
    if es.indices.exists(index=INDEX):
        es.options(ignore_status=[400, 404]).indices.delete(index=INDEX)


def create_index(mappings: dict | None = None) -> None:
    """Create an index

    Will drop an existing index before creation.
    """
    drop_index()
    es.indices.create(index=INDEX, mappings=mappings)


def get_mapping() -> Optional[dict]:
    """Get an index mapping

    See also: https://www.elastic.co/guide/en/elasticsearch/reference/7.17/indices-get-mapping.html
    """
    if es.indices.exists(index=INDEX):
        return es.indices.get_mapping(index=INDEX).raw


def put_mapping(body: dict) -> None:
    """Set an index mapping

    See also: https://www.elastic.co/guide/en/elasticsearch/reference/7.17/indices-put-mapping.html
    """
    if es.indices.exists(index=INDEX):
        es.indices.put_mapping(index=INDEX, properties=body.get("properties"))

def put_settings(body: dict) -> None:
        if es.indices.exists(index=INDEX):
            es.indices.put_settings(index=INDEX, settings=body.get("settings"))

def index_docs(document: dict | list[dict]) -> None:
    """Index a single or multiple documents"""
    if isinstance(document, dict):
        document = [document]

    for entry in document:
        es.index(index=INDEX, document=entry)

    es.indices.refresh(index=INDEX)


def get_docs() -> list:
    """Get all documents"""
    if result := es.search(index=INDEX, query={"match_all": {}}):
        return [e["_source"] for e in result["hits"]["hits"]]
    return []


def query(body: dict):
    """Query an index"""
    return es.search(index=INDEX, query=body.get("query"))


def products():
    """Yield product documents"""

    # fields to use
    text_fields = ["merchant", "keywords", "name", "brand", "color", "material"]
    # fields to join
    combine_fields = ["name", "brand", "color"]

    # create operations
    with open("elastic_export.json") as fp:
        # prepared documents
        entries = json.load(fp)

        for entry in entries:
            # remove empty values
            current = list(entry["fields"].items())

            for k, v in current:
                v = [x for x in v if x]
                if not v:
                    entry["fields"].pop(k)

            # prepare document
            value = {
                "_id": entry["_id"],
            }
            value.update(
                **{k: v for k, v in entry["fields"].items() if k in text_fields}
            )

            # combined values field
            combined = []
            [combined.extend(v) for k, v in value.items() if k in combine_fields]
            value["combined"] = " ".join(set(combined))

            # name as a single field
            value["name"] = value["name"][0]

            # add document
            yield value


def ngram(w, n):
    """Break up words in n consecutive parts"""
    if len(w) < n:
        return []
    else:
        return [w[:n]] + ngram(w[1:], n)


Maak een index aan en voeg documenten toe.

In [99]:
create_index()

docs = [{"name": "Apple iPhone 11"}, {"name": "Apple iPhone 5/5S/SE Screenprotector"}]

index_docs(docs)


Controleer of de documenten zijn toegevoegd.

In [100]:
get_docs()


[{'name': 'Apple iPhone 11'}, {'name': 'Apple iPhone 5/5S/SE Screenprotector'}]

Wat is de type mapping die elasticsearch heeft aangemaakt?

## Fuzzy query

*Returns documents that contain terms similar to the search term, as measured by a Levenshtein edit distance.*

[https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-fuzzy-query.html](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-fuzzy-query.html)

```python
{
    "query": {
        "fuzzy": {
            "name": {
                "value": "ipone",
                "fuzziness": "AUTO"
            }
        }
    }
}
```

Een fuzzy query uitvoeren.

In [112]:
body = {"query": {"fuzzy": {"name": {"value": "ipone", "fuzziness": "AUTO"}}}}

query(body)


ObjectApiResponse({'took': 5, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 2, 'relation': 'eq'}, 'max_score': 0.16888736, 'hits': [{'_index': 'products', '_id': 'vas7-IAB0VA9dqYQYDrP', '_score': 0.16888736, '_source': {'name': 'Apple iPhone 11'}}, {'_index': 'products', '_id': 'vqs7-IAB0VA9dqYQYTpJ', '_score': 0.1283544, '_source': {'name': 'Apple iPhone 5/5S/SE Screenprotector'}}]}})

## Match query

[https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html)

*Returns documents that match a provided text, number, date or boolean value. The provided text is analyzed before matching. The match query is the standard query for performing a full-text search, including options for fuzzy matching.*

```python
{
    "query": {
        "match": {
            "name": {
                "query": "this is a test"
            }
        }
    }
}
```

Match query met fuzziness

```python
{
    "query": {
        "match": {
            "name": {
                "query": "this is a testt",
                "fuzziness": "AUTO"
            }
        }
    }
}
```

Zie ook [How to Handle Typos in Elasticsearch Using Fuzzy Query](https://towardsdatascience.com/how-to-handle-typos-in-elasticsearch-using-fuzzy-query-8d3843a8cff3).

Voer een match query uit, met fuzziness.


In [113]:
body = {...}


## Autocomplete


-   [Implementing auto-complete functionality in Elasticsearch - Part I: Prefix queries](https://www.learningstuffwithankit.dev/implementing-auto-complete-functionality-in-elasticsearch-part-i-prefix-queries)

-   [Implementing auto-complete functionality in Elasticsearch - Part II: n-grams](https://www.learningstuffwithankit.dev/implementing-auto-complete-functionality-in-elasticsearch-part-ii-n-grams)


In [116]:
settings = {
    "settings": {
        "analysis": {
            "analyzer": {
                "namegrams": {
                    "type": "custom",
                    "tokenizer": "keyword",
                    "filter": [
                        "ngrams_filter"
                    ]
                }
            },
            "filter": {
                "ngrams_filter": {
                    "type": "ngram",
                    "min_gram": 3,
                    "max_gram": 8
                }
            }
        }
    }
}

mappings = {
    "properties": {
        "name": {
            "type": "string",
            "analyzer": "namegrams"
        }
    }
}


In [123]:
#put_settings(settings)
#put_mapping(mappings)