## Documentation

To read more about synonyms, checkout the docs [here](https://www.elastic.co/guide/en/elasticsearch/reference/current/synonyms-apis.html).

![synonyms_api_docs](../images/synonyms_api_docs.png)

## Connect to ElasticSearch

In [1]:
from pprint import pprint
from elasticsearch import Elasticsearch

HOST = "http://localhost:9200"

es = Elasticsearch(hosts=HOST)
client_info = es.info()
print("Connected tp Elasticsearch!")
pprint(client_info.body)

Connected tp Elasticsearch!
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'iugjHCt8SwCWRVd35xnJ0A',
 'name': '5013781c82bc',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2025-02-05T22:10:57.067596412Z',
             'build_flavor': 'default',
             'build_hash': '747663ddda3421467150de0e4301e8d4bc636b0c',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '9.12.0',
             'minimum_index_compatibility_version': '7.0.0',
             'minimum_wire_compatibility_version': '7.17.0',
             'number': '8.17.2'}}


## Setting up Synonyms

In [2]:
from pprint import pprint


settings = {
    "settings": {
        "analysis": {
            "filter": {
                "synonym_filter": {
                    "type": "synonym",
                    "synonyms": [
                        "car, automobile, vehicle",
                        "tv, television",
                        "smartphone, mobile, cell phone",
                        "jupyter, jupyter notebook, jupyterlab",
                        "jupiter, mars, earth, venus, mercury, saturn, uranus, neptune => planet"
                    ]
                }
            },
            "analyzer": {
                "synonym_analyzer": {
                    "tokenizer": "standard",
                    "filter": [
                        "lowercase",
                        "synonym_filter"
                    ]
                }
            }
        },
        "index": {
            "number_of_shards": 1,
            "number_of_replicas": 0
        }
    },
    "mappings": {
        "properties": {
            "description": {
                "type": "text",
                "analyzer": "synonym_analyzer"
            }
        }
    }
}

index_name = "my_synonym_index"
es.indices.delete(index=index_name, ignore_unavailable=True)
response = es.indices.create(index=index_name, body=settings)
pprint(response.body)

{'acknowledged': True, 'index': 'my_synonym_index', 'shards_acknowledged': True}


## Indexing documents

In [4]:
import json

from tqdm import tqdm

operations = []
dummy_data = json.load(open("../data/synonyms.json"))
for document in tqdm(dummy_data, total=len(dummy_data)):
    operations.append({"index": {"_index": index_name}})
    operations.append(document)

response = es.bulk(operations=operations)
pprint(response.body)

100%|██████████| 5/5 [00:00<00:00, 42711.85it/s]

{'errors': False,
 'items': [{'index': {'_id': '8vphPJUBq90DZqGOiGbF',
                      '_index': 'my_synonym_index',
                      '_primary_term': 1,
                      '_seq_no': 0,
                      '_shards': {'failed': 0, 'successful': 1, 'total': 1},
                      '_version': 1,
                      'result': 'created',
                      'status': 201}},
           {'index': {'_id': '8_phPJUBq90DZqGOiGbF',
                      '_index': 'my_synonym_index',
                      '_primary_term': 1,
                      '_seq_no': 1,
                      '_shards': {'failed': 0, 'successful': 1, 'total': 1},
                      '_version': 1,
                      'result': 'created',
                      'status': 201}},
           {'index': {'_id': '9PphPJUBq90DZqGOiGbF',
                      '_index': 'my_synonym_index',
                      '_primary_term': 1,
                      '_seq_no': 2,
                      '_shards': {'failed




## Searching with Synonyms

Now, let’s search for terms that should match synonyms. For example, we’ll search for "vehicle" (which should match "car" or "automobile").

In [5]:
query = {
    "query": {
        "match": {
            "description": "vehicle"
        }
    }
}

response = es.search(index=index_name, body=query)

print("Search Results:")
for hit in response["hits"]["hits"]:
    print(hit["_source"])

Search Results:
{'description': 'I love my car and television.'}


In [6]:
query = {
    "query": {
        "match": {
            "description": "planet"
        }
    }
}

response = es.search(index=index_name, body=query)

print("Search Results:")
for hit in response["hits"]["hits"]:
    print(hit["_source"])

Search Results:
{'description': 'I want to go to Mars.'}
{'description': 'I want to go to Venus.'}


## Expanding Synonyms for Search-Time Only

If you want to apply synonyms only during search (and not while indexing), you can modify the search query analyzer like this.

In [7]:
settings = {
    "settings": {
        "analysis": {
            "filter": {
                "synonym_filter": {
                    "type": "synonym",
                    "synonyms": [
                        "car, automobile, vehicle",
                        "tv, television"
                    ]
                }
            },
            "analyzer": {
                "index_analyzer": {
                    "tokenizer": "standard",
                    "filter": ["lowercase"]
                },
                "search_analyzer": {
                    "tokenizer": "standard",
                    "filter": ["lowercase", "synonym_filter"]
                }
            }
        },
        "index": {
            "number_of_shards": 1,
            "number_of_replicas": 0
        }
    },
    "mappings": {
        "properties": {
            "description": {
                "type": "text",
                "analyzer": "index_analyzer",
                "search_analyzer": "search_analyzer"
            }
        }
    }
}

es.indices.delete(index=index_name)
response = es.indices.create(index=index_name, body=settings)
pprint(response.body)

{'acknowledged': True, 'index': 'my_synonym_index', 'shards_acknowledged': True}


In [8]:
import json

from tqdm import tqdm

operations = []
dummy_data = json.load(open("../data/synonyms.json"))
for document in tqdm(dummy_data, total=len(dummy_data)):
    operations.append({"index": {"_index": index_name}})
    operations.append(document)

response = es.bulk(operations=operations)
pprint(response.body)

100%|██████████| 5/5 [00:00<00:00, 33182.78it/s]

{'errors': False,
 'items': [{'index': {'_id': '9_pnPJUBq90DZqGOV2au',
                      '_index': 'my_synonym_index',
                      '_primary_term': 1,
                      '_seq_no': 0,
                      '_shards': {'failed': 0, 'successful': 1, 'total': 1},
                      '_version': 1,
                      'result': 'created',
                      'status': 201}},
           {'index': {'_id': '-PpnPJUBq90DZqGOV2au',
                      '_index': 'my_synonym_index',
                      '_primary_term': 1,
                      '_seq_no': 1,
                      '_shards': {'failed': 0, 'successful': 1, 'total': 1},
                      '_version': 1,
                      'result': 'created',
                      'status': 201}},
           {'index': {'_id': '-fpnPJUBq90DZqGOV2au',
                      '_index': 'my_synonym_index',
                      '_primary_term': 1,
                      '_seq_no': 2,
                      '_shards': {'failed




In [9]:
query = {
    "query": {
        "match": {
            "description": "vehicle"
        }
    }
}

response = es.search(index=index_name, body=query)

print("Search Results:")
for hit in response["hits"]["hits"]:
    print(hit["_source"])

Search Results:
{'description': 'I love my car and television.'}
