In [None]:
!pip install elasticsearch

Note: you may need to restart the kernel to use updated packages.


In [7]:
from elasticsearch import Elasticsearch
import json

# Connect to Elasticsearch
es = Elasticsearch([{'host': 'localhost', 'port': 9200, 'scheme': 'http'}])

# Specify the index name
index_name = "emails"

# Check if the index exists, create it if not
if not es.indices.exists(index=index_name):
    es.indices.create(index=index_name)
    print(f"Index '{index_name}' created.")

# Load the emails from the JSON file (or use the `emails` list from the previous step)
with open('emails.json', 'r', encoding='utf-8') as f:
    emails = json.load(f)

# Index each email into Elasticsearch
for i, email in enumerate(emails):
    response = es.index(index=index_name, id=i+1, document=email)
    print(f"Document indexed with ID: {i+1}")


ConnectionError: Connection error caused by: ConnectionError(Connection error caused by: ProtocolError(('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))))

In [None]:
# Search for all emails
response = es.search(index=index_name, body={"query": {"match_all": {}}})
for hit in response['hits']['hits']:
    print(hit['_source'])


{'from': 'Antoine Richard <a.richard@musee-calvet.org>', 'to': 'Marie Durand <marie.durand@archives-vaucluse.fr>', 'subject': '=?utf-8?q?M=C3=A9diation_num=C3=A9rique?= - Nouveau projet', 'date': 'Fri, 01 Dec 2023 00:00:00 -0000', 'body': 'Bonjour,\n\nDans le cadre de la pr=C3=A9paration de l\'exposition "R=C3=A9volution et Empi=\nre en Vaucluse" pr=C3=A9vue pour February 2024, pouvez-vous me confirmer la l=\niste des documents que vous souhaitez pr=C3=A9senter?\n\nNous devons finaliser la sc=C3=A9nographie avec le graphiste d\'ici deux semai=\nnes.\n'}
{'from': 'Antoine Richard <a.richard@musee-calvet.org>', 'to': 'Marie Durand <marie.durand@archives-vaucluse.fr>', 'subject': 'Budget =?utf-8?q?pr=C3=A9visionnel_2024_-_Pr=C3=A9paration?=', 'date': 'Fri, 27 Jan 2023 00:00:00 -0000', 'body': "Bonjour Marie,\n\nSuite =C3=A0 notre =C3=A9change, pourriez-vous me transmettre votre contribut=\nion au rapport d'activit=C3=A9 2022 concernant les projets men=C3=A9s par vot=\nre service?\n\nJ'aur

  response = es.search(index=index_name, body={"query": {"match_all": {}}})


In [None]:
mapping = {
    "properties": {
        "from": {"type": "keyword"},
        "to": {"type": "keyword"},
        "subject": {"type": "text"},
        "date": {"type": "date"},
        "body": {"type": "text"}
    }
}

# Create index with mapping
es.indices.create(index=index_name, body={"mappings": mapping})


BadRequestError: BadRequestError(400, 'resource_already_exists_exception', 'index [emails/WK9JMTipTxG8si95J_8Ghw] already exists')

In [None]:
pip install ipywidgets


Note: you may need to restart the kernel to use updated packages.


In [None]:
from elasticsearch import Elasticsearch
import ipywidgets as widgets
from IPython.display import display

# Connect to Elasticsearch
es = Elasticsearch([{'host': 'localhost', 'port': 9200, 'scheme': 'http'}])

# Function to search Elasticsearch based on the query entered
def search_elasticsearch(query):
    # Define the search query
    search_query = {
        "query": {
            "match": {
                "body": query  # Search the 'body' field of the emails
            }
        }
    }

    # Execute the search
    search_response = es.search(index="emails", body=search_query)

    # Display the search results
    print(f"Search results for '{query}':")
    for hit in search_response['hits']['hits']:
        print(f"Subject: {hit['_source']['subject']}")
        print(f"From: {hit['_source']['from']}")
        print(f"Date: {hit['_source']['date']}")
        print(f"To: {hit['_source']['to']}")
        print(f"Body: {hit['_source']['body'][:200]}...")  # Show first 200 characters of body
        print("-" * 80)


# Function to search Elasticsearch based on the query entered
def search_elasticsearch_multi(query):
    # Define the search query with multi_match to search across multiple fields
    search_query = {
        "query": {
            "multi_match": {
                "query": query,  # The search term entered
                "fields": ["subject", "from", "body", "to", "date"],  # Fields to search across
                "fuzziness": "AUTO"  # Automatically calculate fuzziness based on the length of the word
            }
        }
    }

    # Execute the search
    search_response = es.search(index="emails", body=search_query)

    # Display the search results
    print(f"Search results for '{query}':")
    for hit in search_response['hits']['hits']:
        print(f"Subject: {hit['_source']['subject']}")
        print(f"From: {hit['_source']['from']}")
        print(f"Date: {hit['_source']['date']}")
        print(f"To: {hit['_source']['to']}")
        print(f"Body: {hit['_source']['body'][:200]}...")  # Show first 200 characters of body
        print("-" * 80)


In [None]:
# Create the text widget for input
search_text = widgets.Text(
    description='Search Query:',
    placeholder='Enter search term...',
    layout=widgets.Layout(width='50%')
)

In [None]:
search_text

Text(value='', description='Search Query:', layout=Layout(width='50%'), placeholder='Enter search term...')

In [None]:


# Set up the interactivity
widgets.interactive(search_elasticsearch_multi, query=search_text)

interactive(children=(Text(value='lucie', description='Search Query:', layout=Layout(width='50%'), placeholder…

In [None]:


# Set up the interactivity
widgets.interactive(search_elasticsearch, query=search_text)

interactive(children=(Text(value='lucie', description='Search Query:', layout=Layout(width='50%'), placeholder…

In [None]:
search_button = widgets.Button(description="Search")
output = widgets.Output()

def on_button_click(b):
    with output:
        search_elasticsearch(search_text.value)

search_button.on_click(on_button_click)
display(search_text, search_button, output)


Text(value='', description='Search Query:', layout=Layout(width='50%'), placeholder='Enter search term...')

Button(description='Search', style=ButtonStyle())

Output()