# Elasticsearch

More information about the elasticsearch Python package can be found at [elastic.co](https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/getting-started-python.html) and in the [documentation](https://elasticsearch-py.readthedocs.io/en/latest/).

In [None]:
%%bash -e
python3 -m pip -q install --upgrade pip
python3 -m pip -q install elasticsearch numpy pandas

In [None]:
# Python imports
from datetime import datetime
from elasticsearch import Elasticsearch
from IPython.display import HTML, display
import numpy as np
import os
import pandas as pd

In [None]:
# Connect to elastic and create variables
es = Elasticsearch("http://elastic:"+os.environ['ELASTIC_PASSWORD']+"@elasticsearch:9200")
results = []
sha_list = []
filename_list = []

In [None]:
# Term to search for
search_term = "secret"

In [None]:
# Retrieve results from elastic
response = es.search(index="leakdata-index-000001", size='10000', query={
    "match": {
        "attachment.content": search_term
    }
})

print("Recieved %d hits for search term '%s'." % (response['hits']['total']['value'], search_term))
for hit in response['hits']['hits']:
    results.append([hit["_source"]['filename'].replace('extracted/files/',''), hit["_source"]['sha256'], hit["_source"]['attachment']['content']])
    sha_list.append(hit["_source"]['sha256'])
    filename_list.append(hit["_source"]['filename'])

In [None]:
# Create dataframe and make a link to the file
df = pd.DataFrame(data=results, columns=['filename', 'sha256', 'content'])
df['sha256'] = df['sha256'].apply(lambda x: f'<a href="http://127.0.0.1:8081/{x}">{x}</a>')
#HTML(df.to_html(escape=False))

In [None]:
# Use pure javascript because of problem with Jupyter labs
doc_ids=sha_list
current_index = 0
javascript = """
<script type="text/Javascript">
    var currentIndex = {current_index};
    var docIDs = {doc_ids};
    var nameIDs = {filename_list}
    function updateIFrame(newIndex) {{
        if (newIndex >= 0 && newIndex < docIDs.length) {{
            currentIndex = newIndex;
            var baseURL = "http://127.0.0.1:8081/convert/";
            var iframe = document.getElementById("doc-iframe");
            iframe.src = baseURL + docIDs[currentIndex];
            document.getElementById("current-doc-id").textContent = "Current Document ID: " + docIDs[currentIndex];
            document.getElementById("current-name-id").textContent = "Current Document Name: " + nameIDs[currentIndex];

        }}
    }}
</script>
""".format(current_index=current_index, doc_ids=doc_ids, filename_list=filename_list)

html_code = """
{javascript}
<button onclick="updateIFrame(currentIndex - 1)">Previous</button>
<button onclick="updateIFrame(currentIndex + 1)">Next</button>
<br>
<span id="current-doc-id">Current Document ID: {initial_doc}</span><br>
<span id="current-name-id">Current Document Name: {initial_doc_name}</span>
<iframe id="doc-iframe" src="http://127.0.0.1:8081/convert/{initial_doc}" width="1150" height="850"></iframe>
""".format(javascript=javascript, initial_doc=doc_ids[current_index], initial_doc_name=filename_list[current_index])

display(HTML(html_code))