In [340]:
from elasticsearch import Elasticsearch

In [341]:
username = 'elastic'
password = '1234'
host = 'http://localhost:9200'  # e.g., 'localhost' or 'https://your-elasticsearch-domain.com'

# Create an Elasticsearch client instance
es = Elasticsearch(
    hosts=host,
    basic_auth=(username, password),
    verify_certs = False
)

# Check if the connection was successful
if es.ping():
    print('#'*30)
    print("Connected to Elasticsearch")
    print('#'*30)
else:
    print("Connection failed")

# es.info()

##############################
Connected to Elasticsearch
##############################


In [291]:
es.indices.get_alias(index='*')

ObjectApiResponse({'face_recognition': {'aliases': {}}, 'my_index1': {'aliases': {}}, 'saleem_testing1': {'aliases': {}}})

In [330]:
def convert_to_string(item):
    if isinstance(item, dict):
        for key, value in item.items():
            item[key] = convert_to_string(value)
    elif isinstance(item, list):
        for i in range(len(item)):
            item[i] = convert_to_string(item[i])
    else:
        try:
            # Attempt to convert to string
            item = str(item)+' '
        except:
            # Handle any exceptions during conversion
            item = None  # Or you can choose a different default value

    return item

def correct_values(item):
    if isinstance(item, dict):
        for key, value in item.items():
            if key == 'birth_encorporation_date':
                if value == '[]':
                    item[key] = None  # Or remove the key-value pair if needed
            item[key] = correct_values(value)
    elif isinstance(item, list):
        for i in range(len(item)):
            item[i] = correct_values(item[i])

    return item

documents = [
    {
        'name': 'xyz',
        'birth_encorporation_date': '1950',
        'data': {
            'summary': {
                'name': 'wxyz',
                'place_of_birth': 'khanpur',
                'dob': '1996-01-01',
                'age': 27,
                'range': [20, 27],
                'salary': [10000, 200000],
            }
        }
    },
    {
        'name': 'xyz',
        'birth_encorporation_date': '[]',
        'data': {
            'summary': {
                'name': 'abcd',
                'nationality': 'pakistan',
                'range': '[20-27]',
                'birth_year': 1996,
            }
        }
    },
    {
        'name': 'uvs',
        'birth_encorporation_date': '[]',
        'data': {
            'summary': {
                'name': 'uvs',
                'nationality': 'pakistan',
                'date_of_birth': '1996-01-15',
                'siblings': {'brother': {'name': 'abc'}},
            }
        }
    }
]

# Correct and clean the values
for idx, doc in enumerate(documents):
    documents[idx] = correct_values(doc)

# Convert non-dict and non-list values to string
for idx, doc in enumerate(documents):
    documents[idx] = convert_to_string(doc)

display(documents)


[{'name': 'xyz ',
  'birth_encorporation_date': '1950 ',
  'data': {'summary': {'name': 'wxyz ',
    'place_of_birth': 'khanpur ',
    'dob': '1996-01-01 ',
    'age': '27 ',
    'range': ['20 ', '27 '],
    'salary': ['10000 ', '200000 ']}}},
 {'name': 'xyz ',
  'birth_encorporation_date': '[] ',
  'data': {'summary': {'name': 'abcd ',
    'nationality': 'pakistan ',
    'range': '[20-27] ',
    'birth_year': '1996 '}}},
 {'name': 'uvs ',
  'birth_encorporation_date': '[] ',
  'data': {'summary': {'name': 'uvs ',
    'nationality': 'pakistan ',
    'date_of_birth': '1996-01-15 ',
    'siblings': {'brother': {'name': 'abc '}}}}}]

In [331]:

# Index name
index_name = 'my_index1'

# Delete index if it exists
try:
    es.indices.delete(index=index_name)
except:
    print('Index does not exist or could not be deleted')

# Mapping and settings
mapping = {
    'dynamic': True,  # Allow dynamic mapping for varying fields
    'properties': {
        'name': {'type': 'text'},
        'birth_encorporation_date': {'type': 'text'},
        'data': {
           'type': 'object'
            }
        }
}

settings = {
    'number_of_shards': 1,
    'number_of_replicas': 0
}

# Create index with mapping and settings
es.indices.create(index=index_name, body={'settings': settings, 'mappings': mapping})

# Index documents
for doc in documents:
    es.index(index=index_name, body=doc)



  es.indices.create(index=index_name, body={'settings': settings, 'mappings': mapping})
  es.index(index=index_name, body=doc)


In [335]:
q={
    "query": {
        "match_all": {}
    },
}
result = es.search(index=index_name, body=q)
print(es.count(index=index_name, body=q))
for i in result['hits']['hits']:
    display(i)
    

{'count': 3, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}}


  result = es.search(index=index_name, body=q)
  print(es.count(index=index_name, body=q))


{'_index': 'my_index1',
 '_id': 'iVXYMooBLCGmj3FMU664',
 '_score': 1.0,
 '_source': {'name': 'xyz ',
  'birth_encorporation_date': '1950 ',
  'data': {'summary': {'name': 'wxyz ',
    'place_of_birth': 'khanpur ',
    'dob': '1996-01-01 ',
    'age': '27 ',
    'range': ['20 ', '27 '],
    'salary': ['10000 ', '200000 ']}}}}

{'_index': 'my_index1',
 '_id': 'ilXYMooBLCGmj3FMVq5S',
 '_score': 1.0,
 '_source': {'name': 'xyz ',
  'birth_encorporation_date': '[] ',
  'data': {'summary': {'name': 'abcd ',
    'nationality': 'pakistan ',
    'range': '[20-27] ',
    'birth_year': '1996 '}}}}

{'_index': 'my_index1',
 '_id': 'i1XYMooBLCGmj3FMWa7d',
 '_score': 1.0,
 '_source': {'name': 'uvs ',
  'birth_encorporation_date': '[] ',
  'data': {'summary': {'name': 'uvs ',
    'nationality': 'pakistan ',
    'date_of_birth': '1996-01-15 ',
    'siblings': {'brother': {'name': 'abc '}}}}}}

In [336]:
# Sample search query for nested field
def search_nested_records(key='', value=''):
    query = {
        'query': {
            
                'wildcard': {
                    key: f"*{value}*"
                }
                }
    }
    result = es.search(index=index_name, body=query)
    return result

# Example search for nested field
search_result = search_nested_records(key='data.summary.dob.keyword', value='1996')
for i in search_result['hits']['hits']:
    display(i)
    

  result = es.search(index=index_name, body=query)


{'_index': 'my_index1',
 '_id': 'iVXYMooBLCGmj3FMU664',
 '_score': 1.0,
 '_source': {'name': 'xyz ',
  'birth_encorporation_date': '1950 ',
  'data': {'summary': {'name': 'wxyz ',
    'place_of_birth': 'khanpur ',
    'dob': '1996-01-01 ',
    'age': '27 ',
    'range': ['20 ', '27 '],
    'salary': ['10000 ', '200000 ']}}}}

In [364]:
def search_nested_records(name='', birth_encorporation_date=''):
    query = {
        "query": {
            "bool": {
                "must": [],
                "should": []
            }
        }
    }

    if name:
        query["query"]["bool"]["must"].append({"match": {"name": name}})

    if birth_encorporation_date:
        query["query"]["bool"]["should"].append(
            {
              'wildcard': {
                    'data.summary.birth_year': f"*{birth_encorporation_date}*"
                }  
            }
            )
        query["query"]["bool"]["should"].append(
            {
              'wildcard': {
                    'data.summary.dob.keyword': f"*{birth_encorporation_date}*"
                }  
            }
            )
    result = es.search(index=index_name, body=query)
    return result


# Example search for nested field
search_result = search_nested_records(name='xyz', birth_encorporation_date='1996')
for i in search_result['hits']['hits']:
    display(i)
    

  result = es.search(index=index_name, body=query)


{'_index': 'my_index1',
 '_id': 'iVXYMooBLCGmj3FMU664',
 '_score': 1.4700036,
 '_source': {'name': 'xyz ',
  'birth_encorporation_date': '1950 ',
  'data': {'summary': {'name': 'wxyz ',
    'place_of_birth': 'khanpur ',
    'dob': '1996-01-01 ',
    'age': '27 ',
    'range': ['20 ', '27 '],
    'salary': ['10000 ', '200000 ']}}}}

{'_index': 'my_index1',
 '_id': 'ilXYMooBLCGmj3FMVq5S',
 '_score': 1.4700036,
 '_source': {'name': 'xyz ',
  'birth_encorporation_date': '[] ',
  'data': {'summary': {'name': 'abcd ',
    'nationality': 'pakistan ',
    'range': '[20-27] ',
    'birth_year': '1996 '}}}}