### Setup Elasticsearch cluster

In [None]:
import os
import time
get_ipython().system = os.system

In [None]:
# download Elasticsearch binaries into downloads folder
# <YOUR PASSWORD> is your sudo password
!mkdir ../downloads
!wget -q https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-oss-7.9.2-linux-x86_64.tar.gz -P ../downloads
!wget -q https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-oss-7.9.2-linux-x86_64.tar.gz.sha512 -P ../downloads
!tar -xzf ../downloads/elasticsearch-oss-7.9.2-linux-x86_64.tar.gz -C ../downloads
!echo "<YOUR PASSWORD>" | sudo chown -R daemon:daemon ../downloads/elasticsearch-7.9.2/
!shasum -a 512 -c ../downloads/elasticsearch-oss-7.9.2-linux-x86_64.tar.gz.sha512

In [None]:
# start Elasticsearch server as bg process
!echo "<YOUR PASSWORD>" | sudo -HSu daemon ../downloads/elasticsearch-7.9.2/bin/elasticsearch &

In [None]:
# check the daemon process status
!!ps -ef | grep elasticsearch

In [None]:
port = 9200
host = f'http://localhost:{port}'

In [None]:
# check the cluster started up correctly
time.sleep(30)
!!curl -s {host}

## Collect browser history

In [None]:
from os import path
import os
import glob
import platform
import shutil
import tempfile
import sqlite3
import re
import json

#### Base aggregator
Every browser implements the base class as their internal database schemas are different

In [None]:
class Aggregator:
    def __init__(self):
        self.browsing_history = None
        
    def __enter__(self):
        # scan browsers internal history dbs
        curr_os = platform.system()
        home_dir = path.expanduser('~')
        oss_bin_paths = {
            'Linux'  : ('/', 'usr', 'bin'),
            'Darwin' : ('/', 'Applications'),
            'Windows': ('C:/', 'Program Files')
        }
        # /usr/bin: symlinked from /usr/lib
        install_dirs = {
            os : path.join(*path_comps) 
            for os, path_comps in oss_bin_paths.items()
        }
        browsers_data_stores = {
            'Chrome' : ('.config', 'google-chrome', 'Default', 'History'),
            'Firefox' : ('.mozilla', 'firefox', '*.default*', 'places.sqlite')
        }
        browsers = {
            browser : path.join(home_dir, *path_comps) 
            for browser, path_comps in browsers_data_stores.items()
        }        
        # store both locked and lock-free db copies
        self.db_files = {
            browser : {
                file_t : {} for file_t in ['orig', 'tmp']
            }
            for browser in browsers
        }
        
        for browser in browsers:
            found = glob.glob(f'{install_dirs[curr_os]}/*{browser.lower()}*') is not None
            if found:
                orig_file = glob.glob(browsers[browser])
                tmp_file = self._tmp_copy(*orig_file)
                self.db_files[browser]['orig'] = orig_file
                self.db_files[browser]['tmp'] = tmp_file
                
        return self
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        # delete tmp copies on exit
        for _, files in self.db_files.items():
            os.remove(files['tmp'])
                    
    def _tmp_copy(self, original_file):
        tmp = tempfile.gettempdir()
        filename = path.basename(original_file)
        tmp_file = path.join(tmp, filename)
        shutil.copy2(original_file, tmp_file)
        return tmp_file
        
    @classmethod
    def _regexp(cls, regex, field_val):
        # base REGEXP implementation
        return bool(re.search(regex, field_val))
    
    def _extract(self, row):
        return row[0]
    
    def _get_history_tables(self, conn, cursor):
        conn.create_function("REGEXP", 2, Aggregator._regexp)
        regex = '.*(history|visit).*'
        query = cursor.execute("""
            SELECT name FROM sqlite_master 
            WHERE type="table" and name REGEXP ?
        """, [regex])
        rows = query.fetchall()
        rows = [*map(self._extract, rows)]
        return rows
   
    def _get_fields(self, table):
        # looksup a table's columns
        query = self.cursor.execute(f'SELECT * FROM {table}')
        fields = [*map(self._extract, query.description)]
        return fields
            
    def _to_dict(self, row, fields):
        # converts row to dict by mapping columns to values
        dict_ = {}
        for i, val in enumerate(row):
            field = self._extract(fields[i])
            if field == 'visit_type':
                val = self._visit_type(val)
                val = val.split('_')[-1].lower()
            dict_[field] = val
        return dict_
    
    def _to_json(self, cursor):
        fields = cursor.description
        rows = cursor.fetchall()
        rows = [ self._to_dict(row, fields) for row in rows ]
        # rows_as_json = json.dumps(rows, indent = 2)
        return rows
    
    def merge(self, history):
        if not self.browsing_history:
            self.browsing_history = history
        else:
            self.browsing_history += history
                                 
    def get_history_as_json(self):
        # returns browser history as list of json documents
        raise NotImplementedError
    
    def save(self):
        if self.browsing_history:
            file_path = os.path.join('../dataset', 'browsing_history.json')
            with open(file_path, 'w') as file:
                json.dump(self.browsing_history, file, indent = 2)               

In [None]:
class AggregatorFirefox(Aggregator):
    def __init__(self, db_file):
        # raises sqlite3.OperationalError: unable to open database file
        self.conn = sqlite3.connect(f'file:{db_file}?mode=ro', uri = True)
        self.cursor = self.conn.cursor()
        sqlite3.enable_callback_tracebacks(True)
            
    def _visit_type(self, enum):
        # maps visit types to their written rep for ease of querying
        return [
            'TRANSITION_LINK',
            'TRANSITION_TYPED',
            'TRANSITION_BOOKMARK',
            'TRANSITION_EMBED',
            'TRANSITION_REDIRECT_PERMANENT',
            'TRANSITION_REDIRECT_TEMPORARY',
            'TRANSITION_DOWNLOAD',
            'TRANSITION_FRAMED_LINK',
            'TRANSITION_RELOAD'
        ][enum - 1]

    def get_history_as_json(self):
        # returns browser history as list of json documents
        self.cursor.execute("""
            SELECT moz_historyvisits.id,
                   moz_places.url, 
                   moz_places.title, 
                   moz_places.visit_count,
                   (
                       SELECT printf("%d", total(use_count))
                       FROM moz_inputhistory 
                       WHERE moz_inputhistory.place_id = moz_places.id
                   ) typed_count,
                   DATETIME(moz_places.last_visit_date/1000000,'unixepoch') as last_visit_date, 
                   DATETIME(moz_historyvisits.visit_date/1000000, 'unixepoch') as visit_date, 
                   moz_historyvisits.from_visit,
                   moz_historyvisits.visit_type,
                   'Firefox' as browser
            FROM moz_places, moz_historyvisits 
            WHERE moz_historyvisits.place_id = moz_places.id;
        """)
        rows_as_json = self._to_json(self.cursor)
        return rows_as_json

In [None]:
class AggregatorChrome(Aggregator):
    def __init__(self, db_file):
        # raises sqlite3.OperationalError: unable to open database file
        self.conn = sqlite3.connect(f'file:{db_file}?mode=ro', uri = True)
        self.cursor = self.conn.cursor()
        sqlite3.enable_callback_tracebacks(True)
         
    def _visit_type(self, enum):
        # maps visit types to their written rep for ease of querying
        CORE_MASK = 0xFF
        return [
            'LINK',
            'TYPED',
            'AUTO_BOOKMARK',
            'AUTO_SUBFRAME',
            'MANUAL_SUBFRAME',
            'GENERATED',
            'START_PAGE',
            'FORM_SUBMIT',
            'RELOAD',
            'KEYWORD',
            'KEYWORD_GENERATED'
        ][enum & CORE_MASK]

    def get_history_as_json(self):
        # returns browser history as list of json documents
        self.cursor.execute("""
            SELECT urls.id,
                   urls.url, 
                   urls.title, 
                   urls.visit_count, 
                   urls.typed_count, 
                   DATETIME(urls.last_visit_time / 1000000 + (strftime('%s', '1601-01-01')), 'unixepoch', 'localtime') as last_visit_date, 
                   DATETIME(visit_time / 1000000 + (strftime('%s', '1601-01-01')), 'unixepoch', 'localtime') as visit_date, 
                   visits.from_visit, 
                   visits.transition as visit_type,
                   'Chrome' as browser
            FROM urls, visits
            WHERE urls.id = visits.url
        """)
        rows_as_json = self._to_json(self.cursor)
        return rows_as_json

#### Extract browsers' history 

In [None]:
with Aggregator() as agg:
    db_files = agg.db_files
    for browser, AggregatorBrowser in list(zip(db_files, [AggregatorChrome, AggregatorFirefox])):
        db_file = db_files[browser]['tmp']
        print(f'\n{browser}: ', db_file, end="\n\n")
        try:
            agg_browser = AggregatorBrowser(db_file) 
            tables = agg_browser._get_history_tables(agg_browser.conn, agg_browser.cursor)

            for table in tables:
                fields = agg_browser._get_fields(table)
                print(table, fields, sep = "\n", end = "\n\n")

            json_docs = agg_browser.get_history_as_json()
            # print(json_docs)
            agg.merge(json_docs)
        except sqlite3.OperationalError as e:
            print(str(e))
    
    print(json.dumps(agg.browsing_history, indent = 2))
    
    agg.save()

## Build a search engine
Make sure to install `elasticsearch==7.9.1` (if not installed)

In [None]:
!pip install elasticsearch==7.9.1

In [None]:
import elasticsearch as es
import elasticsearch.helpers as helpers
from uuid import uuid4
from datetime import datetime as dt
import json
from os import path

In [None]:
class SearchEngine:
    def __init__(self, host):
        self.client = es.Elasticsearch(host)
        self.docs = self.read_browser_history()
        ping = self.client.ping()
        if not ping:
            raise Exception('Error: could not connect to cluster')
        print('Ok: cluster is up')
    
    def read_browser_history(self):
        docs_path = path.join('..', 'dataset', 'browsing_history.json')
        with open(docs_path, 'r') as f:
            docs = json.load(f)
            return docs

    def cluster_info(self):
        print(json.dumps(self.client.info(), indent = 2))
    
    def create_index(self, index_name, _doc, sim_module):
        # get document sample
        sample_doc = self.docs[0]
        # extract mappings from document sample
        mappings = self._extract_mappings(sample_doc)
        request_body = {
            'settings': {
                'number_of_shards': 1,
                'number_of_replicas': 1,
                'similarity' : sim_module
            }
        }
        request_body.update(mappings)
        # print(request_body)
        if self.client.indices.exists(index_name):
            raise Exception(f'Error: index {index_name} exists.')
        self.client.indices.create(index_name, body = request_body, ignore = 400)
        # bulk index docs
        self._do_index(self.docs, index_name, _doc)        
        print(f'OK: index {index_name} created.')
        
    def _do_index(self, docs, _index, _doc):
        def bulk(docs, _index, _doc):
            for i, doc in enumerate(docs):
                action = {
                    "_index": _index,
                    "_doc": _doc,
                    "_id": i,
                    "_source": {
                        key : value 
                        for key, value in doc.items()
                    }
                }
                yield action
        
        try:
            res = helpers.bulk(self.client, bulk(docs, _index, _doc))
            succ, fail = res
            print(f'Ok: success: {succ}; fail: {fail}')
        except Exception as e:
            print(str(e))
    
    def update_ranking_model(self, index_name, sim_module):
        name = [*sim_module.keys()][0]
        settings = { 
            'settings' : {
                'index' : {
                    'similarity' : sim_module
                }
            }
        }
        
        self.client.indices.close(index = index_name)
        self.client.indices.put_settings(index = index_name, body = settings)
        self.client.indices.open(index = index_name)
        
        model_type = sim_module[name]['type'].lower()
        base_index_name = index_name.split('_')[-1]
        new_index_name = f'{name}_{base_index_name}'
                
        if self._re_index(index_name, new_index_name):
            # delete old index
            self.client.indices.delete(index = index_name)
            if self._update_alias(new_index_name, base_index_name):                
                print(f"Index {index_name} updated with ranking model {model_type}")
        else:
            print(f"Failed to update {index_name} with ranking model {model_type}")
    
    def _re_index(self, index_name, new_index_name):
        # reindexes the old index with a new name
        res = self.client.reindex({
            'source' : {
                'index' : index_name
            },
            'dest' : {
                'index' : new_index_name
            }
        })['total'] > 0
        return res
    
    def _update_alias(self, index_name, alias):
        # creates alias with old index name to new index name
        # so we can keep using the old index name
        # e.g., history -> dfr_history
        res = bool(self.client.indices.update_aliases(body = {
            'actions' : [{
                'add'  : {
                    'index' : index_name,
                    'alias' : alias
                }
            }]
        })['acknowledged'])
        return res
    
    def index_info(self, index_name = None):
        if index_name:
            info = json.dumps(self.client.cat.indices(format = 'json', index = index_name), indent = 2)
        else:
            info = json.dumps(self.client.cat.indices(format = 'json'), indent = 2)
        print(info)
            
    def _convert_to_date(self, field):
        try:
            date = dt.strptime(field, '%Y-%m-%d %H:%M:%S')
            return date
        except:
            return field
            
    def _extract_mappings(self, sample):
        sample_ = sample.copy()
        sanitised_vals = [*map(self._convert_to_date, list(sample_.values()))]
        sample_.update(
            (field, val) 
            for field, val in zip(
                sample_.keys(), sanitised_vals
            )
        )   
        # print(sample_)
        types = {
            'int'      : 'integer',
            'str'      : 'text',
            'datetime' : 'date'
        }
        return {
            'mapping' : {
                '_source' : {
                    'enabled' : 'true'
                },
                'properties' : {
                    property_ : { 
                        'type' : types[type(property_val).__name__] 
                    }
                    for property_, property_val in sample_.items()
                }
            }
        }
    
    def query(self, index, body = {"size": 10000, "query": {"match_all": {}}}):
        return self.client.search(body = body, index = index, explain = True)
    
    def get_hits(self, results, *fields, explain = True, fmt = 'json'):
        if fmt == 'json':
            return {
                'count' : results['hits']['total']['value'],
                'hits' : {
                    hit['_id'] : {
                        **({ field : hit['_source'][field] for field in fields}),
                        'score': hit['_score'],
                        **({ 'explanation' : hit['_explanation'] } if explain else {})
                    } 
                    for hit in results['hits']['hits']
                }
            }
        elif fmt == 'ascii':
            fmt_hits = []
            for hit in results['hits']['hits']:
                fmt_str = f"id: {hit['_id']}\n"
                for field in fields:
                    fmt_str += f"{field}: {hit['_source'][field]}\n"
                fmt_str += f"score: {hit['_score']}\n"
                fmt_hits.append(fmt_str)
            return '\n'.join(fmt_hits)
        else:
            raise ValueError(f'Error: unrecognised format {fmt}')
    
    

In [None]:
# instantiate the search engine
try:
    se = SearchEngine(host)
except Exception as e:
    print(str(e))

In [None]:
# se.cluster_info()

In [None]:
# you can manually delete an index or all - * - if you feel you messed somewhere
se.client.indices.delete(index = '*')

## Ranking models
_BM25 similarity (default)_

Note: we're required to explain how ranking works for each of the models used, i.e. how it reflects in the documents returned.


### BM25 (Best Match Okapi)
This is the default ranking model used by Elasticsearch

In [None]:
sim_bm25  = {
    'sim_bm25' : {
        'type' : 'BM25',
        'b' : '0.75',
        'k1' : 1.2
    }
}

In [None]:
# create index
index_name = 'history'
_doc = 'browser_history'
try:
    se.create_index(index_name, _doc, sim_bm25)
except:
    pass

In [None]:
# se.index_info()

In [None]:
# get all records
q = se.query(index_name)
print(json.dumps(q, indent = 2))

In [None]:
# search for `sqlite` in  title
query = {
    'query' : {
        'term'  : {
            'title' : 'sqlite'
        } 
    } 
}


res = se.query(index_name, body = query)
hits = se.get_hits(res, *['url', 'title', 'visit_date', 'last_visit_date'], explain = False, fmt = 'ascii')
# if you want to print as json use `json.dumps(hits, indent = 2)`
print(hits)

In [None]:
# the bool clause allows us to build boolean expressions
# `should` behaves like an OR clause whereas `must` behaves like an AND clause
# this can be used across many fields

# search for either keyword in title
keywords = 'sqlite documentation history'
query = {
    'query' : {
        'bool' : {
            'should' : [
                {
                    'terms' : {
                        'title' : keywords.split(' ')
                    }
                }
            ]
        }
    }
}
res = se.query(index_name, body = query)
hits = se.get_hits(res, *['url', 'title', 'visit_date', 'last_visit_date'], explain = False)
print(json.dumps(hits, indent = 2))

In [None]:
# search for documents whose `last_visit_date` is more recent than March 29 and contain
# either history on the `title` or google somewhere in the URL
# note: /google/ is a regex pattern.
query = {
    'query' : {
        'bool' : {
            'must' : [
                {
                    'range' : {
                        'last_visit_date' : {
                            'gte' : '2023-29-03'
                        }
                    }
                }
            ],
            'should' : [
                {
                    'match'  : { 
                        'title' : 'history'
                    },
                    'match' : {
                        'url' :  '/google/'
                    }
                }
            ]
        }
    }
}
res = se.query(index_name, body = query)
hits = se.get_hits(res, *['url', 'title', 'visit_date', 'last_visit_date'], explain = False)
print(json.dumps(hits, indent = 2))

In [None]:
query = {
    'query': {
        'range': {
            'last_visit_date' : {
#                 "gte": "2023-01-01 00:00:00",
                "lte": "2023-03-18 00:00:00",
                "format": "yyyy-MM-dd HH-mm-ss"
            }
        }
    }
}
res = se.query(index_name, body = query)
hits = se.get_hits(res, *['url', 'title', 'visit_date', 'last_visit_date'], explain = False)
print(json.dumps(hits, indent = 2))

### DFR (Divergence from Randomness)
This model takes into account statistical properties of the collection, e.g. frequency and distribution of terms within the collection, length of documents, etc.  

In [None]:
sim_dfr = {
    "sim_dfr": {
      "type": "DFR",
      "basic_model": "g",
      "after_effect": "l",
      "normalization": "h2",
      "normalization.h2.c": "2.0"
    }
}
se.update_ranking_model(index_name, sim_dfr)

In [None]:
se.index_info()

### BM25F
This can be achieved using a `multi_match` query, which allows us to assign different weights to each field.

In [None]:
sim_bm25f = {
    "sim_bm25f": {
        'type' : 'BM25',
        'b' : '0.75',
        'k1' : 1.2
    }
}
# we have to update using the actual index name not the alias
# this is a known issue in Elasticsearch
# you can check the index name with `se.index_info()` in the cell above
se.update_ranking_model(f'{index_name}', sim_bm25f)

In [None]:
se.index_info()

In [None]:
# assign weights to fields
fields = [
    f'{field}^{str(weight)}' for field, weight in zip(
        ['title', 'url', 'browser', 'last_visit_date'], 
        [1, 2, 3, 4]
    )
]
fields

In [None]:
# `combined_fields` was introduced in v7.13 and truly implements BM25F
# as we running an old version (as provided by the lecturer), 
# we have to either use `multi_match` or bump the version (not sure we can)
# see: https://opensourceconnections.com/blog/2021/06/30/better-term-centric-scoring-in-elasticsearch-with-bm25f-and-the-combined_fields-query/

query = {
    'query' : {
        'multi_match' : {
            'query' : 'sqlite',
            'fields' : fields,
            'type' : 'cross_fields'
        }
    }
}

q1 = se.query(index_name, body = query)
hits = se.get_hits(q1, *['url', 'title', 'visit_date', 'last_visit_date'], explain = False)
print(json.dumps(hits, indent = 2))

### Evaluation
The test suite contains 3-tuples of the form (query id, document id, score). 
These are used to calculate the average effectiveness of the search engine by calculating
* Recall and precision
* F-score
* Fall-out

In [None]:
class Evaluator:
    def __init__(self, relevance, res):
        self.relevant_docs = [str(doc_id) for _, doc_id, score in relevance if score > 0]
        self.retrieved_docs = [hit['_id'] for hit in res['hits']['hits']]
        self.docs_intersection = set(self.relevant_docs).intersection(set(self.retrieved_docs))
        
    def recall(self):
        try:
            r = len(self.docs_intersection) / len(self.relevant_docs)
        except ZeroDivisionError:
            return -1
        return r

    def precision(self):
        try:
            p = len(self.docs_intersection) / len(self.retrieved_docs)
        except ZeroDivisionError:
            return -1
        return p

    def f_score(self):
        try:
            r = self.recall()
            p = self.precision()
            fs = 2 * (p * r) / (p + r)
        except ZeroDivisionError:
            return -1
        return fs

    def fall_out(self):
        try:
            fo = 1 - len(self.docs_intersection) / len(self.retrieved_docs)
        except ZeroDivisionError:
            return -1
        return fo

In [None]:
# queries:
# 0. search for documents that have `sqlite ` in  title
# 1. search for documents that have either history on the `title` or 'database' somewhere in the URL
# 2. search for links from Chrome

q_rj = [
    [
        (0, doc_id, score)
        for doc_id, score in zip(
            range(77),
            [0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,1,2,2,2,2,1
            ,2,0,0,0,0,0,0,0,2,2,2,2,2,2,2,0,0,0,0,2,2,0,0,0,0,0,0,0,0,2
            ,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
        )
    ],
    [
        (1, doc_id, score)
        for doc_id, score in zip(
            range(77),
            [0,0,0,0,2,2,1,1,2,2,1,2,2,1,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0
            ,0,0,1,1,0,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,2
            ,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
    )],
    [
        (2, doc_id, score)
        for doc_id, score in zip(
            range(77),
            [0,2,2,0,2,2,2,2,0,2,2,0,2,2,0,2,0,2,0,2,2,2,2,0,2,0,2,0,2,2
            ,0,0,2,2,2,0,0,2,0,2,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
            ,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
        )
    ]
]

# q_rj

In [None]:
# query hits for the queries above
qs = [
    se.query(index_name, body = {
        'query' : {
            'term'  : {
                'title' : 'sqlite'
            } 
        } 
    }),
    se.query(index_name, body = {
        'query' : {
            'bool' : {
                'should' : [
                    {
                        'match' : {
                            'title' : 'history'
                        }
                    },
                    {
                        'match' : {
                            'url' : '/database/'
                        }
                    }
                ]
            }
        }
    }),
    se.query(index_name, body = {
        'query' : {
            'bool' : {
                'must' : [
                    {
                        'match' : {
                            'visit_type' : 'link'
                        }
                    },
                    {
                        'match' : {
                            'browser' : 'chrome'
                        }
                    }
                ]
            }
        }
    })
]
for i, (rel, res) in enumerate(zip(q_rj, qs)):
    # run the evaluator on the test queries
    evaluator = Evaluator(rel, res)
    print(f'q#{i}',
          f'recall: {evaluator.recall():.3f}', 
          f'precision: {evaluator.precision():.3f}',
          f'f-score: {evaluator.f_score():.3f}',
          f'fall-out: {evaluator.fall_out():.3f}', 
          sep = "\n", end = "\n\n")

### User Interface

In [1]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display, clear_output

In [2]:
query_types = {
    'match' : lambda field, query : {'query': {'match': {field: query}}},
    'term' : lambda field, query : {'query': {'term': {field: query}}},
    'range' : lambda field, query : {'query': {'range': {field: {'lte': query}}}},
    'multi_match' :  lambda field, query : {'query': {'multi_match': {'query': query, 'type': 'cross_fields', 'fields': field}}}    
}

In [4]:
class SearchInterface:
    def __init__(self):
        self.search_area = widgets.Text(
            value = '',
            placeholder = 'Enter search...',
            description = 'Search:',
        )

        self.query_type_dd = widgets.Dropdown(
            options = ['match', 'term', 'range', 'multi_match'],
            value = 'match',
            description = 'Query type:'
        )

        self.field_input = widgets.Text(
            value = '',
            options = ['url', 'title', 'last_visit_date'],
            placeholder = 'Enter field...',
            description = 'Field:',
        )

        self.weight_input = widgets.Text(
            value = '',
            placeholder = 'Enter weights...',
            description = 'Weights:',
        )

        self.output = widgets.Output()

    def search(self, change):
        query_type = self.query_type_dd.value
        field = self.field_input.value.strip()
        weight = self.weight_input.value.strip()
        query = self.search_area.value.strip()

        query_f = query_types[query_type]
        with self.output:
            clear_output()
            if query:
                args = [field, query]
                if query_type == 'multi_match':
                    fields = [
                        f'{field}^{weight}' 
                        for field, weight in zip(
                            field.split(' '),
                            weight.split(' ')
                        )
                    ]
                    args[0] = fields                    
                res = se.query(index_name, body = query_f(*args))
                hits = se.get_hits(res, *['url', 'title', 'visit_date', 'last_visit_date'], explain = False, fmt = 'ascii')
                print(hits)            
    
    def display(self):
        self.search_area.observe(self.search, names='value')
        # render widgets
        display(self.query_type_dd, self.field_input, self.weight_input, self.search_area, self.output)

            
SearchInterface().display()

Dropdown(description='Query type:', options=('match', 'term', 'range', 'multi_match'), value='match')

Text(value='', description='Field:', placeholder='Enter field...')

Text(value='', description='Weights:', placeholder='Enter weights...')

Text(value='', description='Search:', placeholder='Enter search...')

Output()