In [1]:
from elasticsearch import Elasticsearch

In [64]:
username = 'elastic'
password = '1234'
host = 'http://localhost:9200'  # e.g., 'localhost' or 'https://your-elasticsearch-domain.com'

# Create an Elasticsearch client instance
es = Elasticsearch(
    hosts=host,
    basic_auth=(username, password),
    verify_certs = False
)

# Check if the connection was successful
if es.ping():
    print('#'*30)
    print("Connected to Elasticsearch")
    print('#'*30)
else:
    print("Connection failed")

# es.info()

##############################
Connected to Elasticsearch
##############################


In [90]:
es.indices.get_alias(index='*')

ObjectApiResponse({'my_index1': {'aliases': {}}, 'saleem_testing1': {'aliases': {}}})

In [401]:
# Index name
index_name = 'my_index1'

try:
    es.indices.delete(index='my_index1')
except:
    print('new')

# Mapping and settings
mapping = {
    'dynamic': 'true',
    'properties': {
        'data': {
            'type': 'nested',
            'properties': {
                'label': {'type': 'keyword'},
                'value': {
                    'type': 'keyword',
                    'fields': {
                        'phonetic': {
                            'type': 'text',
                            'analyzer': 'my_phonetic_analyzer'
                        },
                        'synonym': {
                            'type': 'text',
                            'analyzer': 'my_synonym_analyzer'
                        }
                    }
                }
            }
        },
        'source': {'type': 'keyword'},
        'country': {
            'type': 'text',
            'fields': {
                'keyword': {
                    'type': 'keyword',
                    'ignore_above': 256
                }
            }
        }
    }
}

settings = {
    'index': {
        'number_of_shards': 1,
        'number_of_replicas': 0,
        'analysis': {
            'analyzer': {
                'my_phonetic_analyzer': {
                    'tokenizer': 'standard',
                    'filter': ['lowercase', 'my_phonetic_filter']
                },
                'my_synonym_analyzer': {
                    'tokenizer': 'standard',
                    'filter': ['lowercase', 'my_synonym_filter']
                },
                'my_keyword_analyzer': {
                    'tokenizer': 'keyword',
                    'filter': ['lowercase']
                }
            },
            'filter': {
                'my_phonetic_filter': {
                    'type': 'phonetic',
                    'encoder': 'metaphone',
                    'replace': False
                },
                'my_synonym_filter': {
                    'type': 'synonym',
                    'synonyms': ['saleem => salim']
                }
            }
        }
    }
}

# Create index with mapping and settings
es.indices.create(index=index_name, body={'settings': settings, 'mappings': mapping})

# Example documents
documents = [
    [
        {'label': 'name', 'value': 'saleem'},
        {'label': 'country', 'value': 'pakistan'},
        {'label': 'date of birth', 'value': '16-02-1995', 'source': 'https://www.wikipedia.org/'},
        {'label': 'date of birth', 'value': '1997'},
        {'label': 'category', 'value': 'student'}
    ],
    [
        {'label': 'name', 'value': 'salim'},
        {'label': 'country', 'value': 'uae'},
        {'label': 'date of birth', 'value': '1991'},
        {'label': 'category', 'value': 'student'},
        {'label': 'category', 'value': '{id:300000111222}'}
    ],
    [
        {'label': 'name', 'value': 'saleem'},
        {'label': 'country', 'value': 'US'},
        {'label': 'date of birth', 'value': '16-02-1991', 'source': 'https://www.wikipedia.org/'},
        {'label': 'date of birth', 'value': '1996'},
        {'label': 'category', 'value': 'engineer'}
    ],
    [
        {'label': 'name', 'value': 'john'},
        {'label': 'age', 'value': '30'},
        {'label': 'occupation', 'value': 'engineer'}
    ]
]

# Index the documents
for doc in documents:
    body = {'data': [], 'source': ''}
    for item in doc:
        data_item = {
            'label': item['label'],
            'value': item['value']
        }
        body['data'].append(data_item)
        if 'source' in item:
            body['source'] = item['source']
        if item['label'] == 'country':
            body['country'] = item['value']
    es.index(index=index_name, body=body)


def search_records(name, keyword=None, country=None):
    query = {
        'query': {
            'bool': {
                'must': [
                    {
                        'nested': {
                            'path': 'data',
                            'query': {
                                'bool': {
                                    'must': [
                                        {
                                            'match': {
                                                'data.label': {
                                                    'query': 'name',
                                                    'analyzer': 'my_synonym_analyzer'
                                                }
                                            }
                                        },
                                        {
                                            'match': {
                                                'data.value.synonym': {
                                                    'query': name,
                                                    'fuzziness': 'AUTO'
                                                }
                                            }
                                        }
                                    ]
                                }
                            }
                        }
                    }
                ]
            }
        }
    }

    if keyword:
        query['query']['bool']['must'].append({
            'nested': {
                'path': 'data',
                'query': {
                    'wildcard': {
                        'data.value': {
                            'value': '*' + keyword + '*',
                            'case_insensitive': True
                        }
                    }
                }
            }
        })

    if country:
        query['query']['bool']['must'].append({
            'match': {
                'country': {
                    'query': country,
                    'fuzziness': 'AUTO',
                    'analyzer': 'my_keyword_analyzer'
                }
            }
        })
            

    search_results = es.search(index=index_name, body=query)
    for hit in search_results['hits']['hits']:
        print(hit['_source'])




  es.indices.create(index=index_name, body={'settings': settings, 'mappings': mapping})
  es.index(index=index_name, body=body)


In [403]:
search_records(name='saleem',keyword='',country='')

{'data': [{'label': 'name', 'value': 'saleem'}, {'label': 'country', 'value': 'pakistan'}, {'label': 'date of birth', 'value': '16-02-1995'}, {'label': 'date of birth', 'value': '1997'}, {'label': 'category', 'value': 'student'}], 'source': 'https://www.wikipedia.org/', 'country': 'pakistan'}
{'data': [{'label': 'name', 'value': 'salim'}, {'label': 'country', 'value': 'uae'}, {'label': 'date of birth', 'value': '1991'}, {'label': 'category', 'value': 'student'}, {'label': 'category', 'value': '{id:300000111222}'}], 'source': '', 'country': 'uae'}
{'data': [{'label': 'name', 'value': 'saleem'}, {'label': 'country', 'value': 'US'}, {'label': 'date of birth', 'value': '16-02-1991'}, {'label': 'date of birth', 'value': '1996'}, {'label': 'category', 'value': 'engineer'}], 'source': 'https://www.wikipedia.org/', 'country': 'US'}


  search_results = es.search(index=index_name, body=query)


In [404]:
search_records(name='saleem',keyword='student',country='')

{'data': [{'label': 'name', 'value': 'saleem'}, {'label': 'country', 'value': 'pakistan'}, {'label': 'date of birth', 'value': '16-02-1995'}, {'label': 'date of birth', 'value': '1997'}, {'label': 'category', 'value': 'student'}], 'source': 'https://www.wikipedia.org/', 'country': 'pakistan'}
{'data': [{'label': 'name', 'value': 'salim'}, {'label': 'country', 'value': 'uae'}, {'label': 'date of birth', 'value': '1991'}, {'label': 'category', 'value': 'student'}, {'label': 'category', 'value': '{id:300000111222}'}], 'source': '', 'country': 'uae'}


  search_results = es.search(index=index_name, body=query)


In [407]:
search_records(name='saleem',keyword='student',country='pakistan')

{'data': [{'label': 'name', 'value': 'saleem'}, {'label': 'country', 'value': 'pakistan'}, {'label': 'date of birth', 'value': '16-02-1995'}, {'label': 'date of birth', 'value': '1997'}, {'label': 'category', 'value': 'student'}], 'source': 'https://www.wikipedia.org/', 'country': 'pakistan'}


  search_results = es.search(index=index_name, body=query)
