### **02_search.ipynb**
### **Similarity search and multilingual semantic-similarity search**

* ##### 01 - Install packages
* ##### 02 - Import packages
* ##### 03 - Download Multilingual Universal Sentence Encoder model
* ##### 04 - Create Elasticsearch client
* ##### 05 - Similarity search
* ##### 06 - Multilingual semantic-similarity search

### 01 - Install packages

In [None]:
import sys

In [None]:
!{ sys.executable } -m pip install --upgrade elasticsearch tensorflow tensorflow-hub tensorflow-text urllib3

### 02 - Import packages

In [None]:
import tensorflow_text

from elasticsearch  import Elasticsearch
from tensorflow_hub import load

In [None]:
from urllib3 import disable_warnings
disable_warnings()

### 03 - Download Multilingual Universal Sentence Encoder model

In [None]:
model = load('https://www.kaggle.com/models/google/universal-sentence-encoder/frameworks/TensorFlow2/variations/multilingual-large/versions/2')

In [None]:
model('Hello World, ML Elasticsearch!')[0].numpy()

### 04 - Create Elasticsearch client

In [None]:
es_host     = '<elasticsearch_host>'
es_username = '<elasticsearch_username>'
es_password = '<elasticsearch_password>'

In [None]:
es = Elasticsearch(
    hosts        = es_host,
    basic_auth   = (es_username, es_password),
    verify_certs = False
)

In [None]:
es.info()

### 05 - Similarity search

In [None]:
bbc_news_index = 'bbc_news'

In [None]:
text_english    = 'european economic growth'
text_spanish    = 'crecimiento económico europeo'
text_portuguese = 'crescimento econômico europeu'

In [None]:
def search(query):

    result = es.search(index = bbc_news_index, query = query, size = 1)
    result = result['hits']['hits']

    if len(result) == 0:

        print('no results found...')
        return

    result = result[0]

    print(f"score : { result['_score'] }")
    print(f"label : { result['_source']['metadata']['label_text'] }")
    print(f"text  : { result['_source']['text'] }")

In [None]:
def similarity_search(text):

    query = {
        'match' : {
            'text' : text
        }
    }

    search(query)

In [None]:
similarity_search(text_english)

In [None]:
similarity_search(text_spanish)

In [None]:
similarity_search(text_portuguese)

### 06 - Multilingual semantic-similarity search

In [None]:
def multilingual_semantic_similarity_search(text):

    query = {
        'script_score' : {
            'query'  : { 'match_all' : {} },
            'script' : {
                'source' : "cosineSimilarity(params.vector, 'vector') + 1.0",
                'params' : { 'vector' : model(text)[0].numpy() }
            }
        }
    }

    search(query)

In [None]:
multilingual_semantic_similarity_search(text_english)

In [None]:
multilingual_semantic_similarity_search(text_spanish)

In [None]:
multilingual_semantic_similarity_search(text_portuguese)