# Elastic Search

A sample of indexing and searching our generated json files

In [1]:
from elasticsearch import Elasticsearch
import json

In [2]:
es = Elasticsearch("http://localhost:9200")

In [4]:
def load_and_index(file_path, index_name):
    with open(file_path, "r") as f:
        docs = json.load(f)
    for doc in docs:
        es.index(index=index_name, document=doc)
    print(f"✅ Indexed {len(docs)} docs into '{index_name}'")

In [5]:
load_and_index("simulated_issues.json", "issues")
load_and_index("grafana_metrics.json", "metrics")
load_and_index("confluence_docs.json", "docs")

  es.index(index=index_name, document=doc)


✅ Indexed 10 docs into 'issues'
✅ Indexed 96 docs into 'metrics'
✅ Indexed 8 docs into 'docs'


In [6]:
# search
res = es.search(index="issues", query={"match": {"severity": "error"}})
print(res["hits"]["hits"][0])

{'_index': 'issues', '_type': '_doc', '_id': '3vMMF5YBOnTznT22o8lu', '_score': 1.1727202, '_source': {'id': 'ISSUE-003', 'timestamp': '2025-04-07T07:14:37.713423', 'severity': 'error', 'component': 'payment-gateway', 'description': 'Body not compare run deal town maintain he maintain me.', 'logs': ['CPU usage spike to 26.08%.', 'Memory usage at 8056MB exceeded threshold.', 'CPU usage spike to 79.45%.', 'CPU usage spike to 42.37%.', 'Error in database: Southern raise fear agree player.'], 'diagnostics': {'cpu_usage': 89.46, 'memory_mb': 5588}, 'status': 'open', 'assigned_to': 'njohnson', 'tags': ['performance', 'API'], 'related_issues': ['ISSUE-012', 'ISSUE-008'], 'resolved_notes': 'Forget general team of stand member analysis religious many television.', 'last_updated': '2025-04-07T07:14:37.713423', 'priority': 'low', 'environment': {'os': 'macOS', 'version': '8.4.19', 'region': 'ap-southeast-1'}}}


  res = es.search(index="issues", query={"match": {"severity": "error"}})


In [7]:
# verify population
print("Issues:", es.count(index="issues")["count"])
print("Metrics:", es.count(index="metrics")["count"])
print("Docs:", es.count(index="docs")["count"])

Issues: 20
Metrics: 96
Docs: 8


  print("Issues:", es.count(index="issues")["count"])
  print("Metrics:", es.count(index="metrics")["count"])
  print("Docs:", es.count(index="docs")["count"])


In [8]:
# test queries
res = es.search(index="issues", query={"match": {"tags": "timeout"}})
for r in res["hits"]["hits"]:
    print(r["_source"]["id"], r["_source"]["tags"])

ISSUE-002 ['db', 'timeout']
ISSUE-004 ['timeout', 'API']
ISSUE-005 ['performance', 'timeout']
ISSUE-007 ['timeout', 'login']
ISSUE-008 ['timeout', 'login']
ISSUE-002 ['db', 'timeout']
ISSUE-004 ['timeout', 'API']
ISSUE-005 ['performance', 'timeout']
ISSUE-007 ['timeout', 'login']
ISSUE-008 ['timeout', 'login']


  res = es.search(index="issues", query={"match": {"tags": "timeout"}})


In [9]:
res = es.search(index="metrics", query={
    "bool": {
        "must": [
            {"match": {"component": "frontend"}},
            {"range": {"cpu_usage": {"gt": 80}}}
        ]
    }
})
for r in res["hits"]["hits"]:
    print(r["_source"]["cpu_usage"], r["_source"]["timestamp"])

88.63 2025-04-07T06:52:43.506447
80.08 2025-04-07T06:57:43.506447
88.01 2025-04-07T07:27:43.506447
88.3 2025-04-07T07:42:43.506447


  res = es.search(index="metrics", query={


In [10]:
res = es.search(index="docs", query={"match": {"tags": "monitoring"}})
for r in res["hits"]["hits"]:
    print(r["_source"]["title"], r["_source"]["tags"])

ElasticSearch indexing FAQ ['frontend', 'monitoring', 'infrastructure']
Frontend error handling guide ['infrastructure', 'monitoring', 'frontend']
Scaling strategies for microservices ['monitoring', 'infrastructure', 'faq']


  res = es.search(index="docs", query={"match": {"tags": "monitoring"}})
