In [None]:
# LASER
!python -m laserembeddings download-models

# SENTENCE-BERT
!pip install transformers # https://github.com/huggingface/transformers
!pip install -U sentence-transformers # https://github.com/UKPLab/sentence-transformers

# UNIVERSAL SENTENCE ENCODER
!pip install tensorflow
!pip install tensorflow_hub

# FASTTEXT
!pip install fasttext

In [61]:
from utils import tfidf_vectorizer, fasttext_vectorizer, laser_embeddings, bert_embeddings, use_embeddings, cosine_similarity

from arrays import same, different, synonyms, homonyms, sports_search, politics_search, food_search, science_search, finance_search, search_sentences, categories

## 1. Choose which model to use

Available models:
* tfidf_vectorizer
* fasttext_vectorizer
* laser_embeddings
* bert_embeddings
* use_embeddings

In [49]:
# Choose the model to use:
# tfidf_vectorizer, fasttext_vectorizer, laser_embeddings, bert_embeddings, use_embeddings
vectorize = use_embeddings

## 2. Create vectors

In [50]:
same_vectors = vectorize(same)
different_vectors = vectorize(different)
synonym_vectors = vectorize(synonyms)
homonym_vectors = vectorize(homonyms)
sports_vector = vectorize(sports_search)
politics_vector = vectorize(politics_search)
food_vector = vectorize(food_search)
science_vector = vectorize(science_search)
finance_vector = vectorize(finance_search)
ss_vectors = vectorize(search_sentences)

ease define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for  more details.


In [66]:
sim = cosine_similarity(synonym_vectors[8], synonym_vectors[9])
print(sim)

0.47814980149269104


## 3. Semantic Search

In [52]:
scores = []

# Choose your search term
search_term = finance_vector

for sentence in ss_vectors:
    score = round(cosine_similarity(sentence, search_term),2)
    scores.append(score)

In [53]:
results = []

for (score,sentence,category) in zip(scores, search_sentences,categories):
    results.append([score, sentence, category])

results.sort(reverse=True)

for row in results:
    print(row[0], row[2], row[1])
    print()

0.36 FINANCE Dow futures up 200 points in overnight trading after the index briefly erases 2020 losses

0.25 FINANCE Wednesday’s gains put the S&P 500 up more than 58% since hitting an intraday low on March 23.

0.09 FOOD Is there anything better than a fresh batch of soft chocolate chip cookies?

0.09 POLITICS Democrats are willing to resume negotiations once Republicans start to take this process seriously.

0.07 POLITICS This is election is a choice between President Trump’s strong stance with law and order and Joe Biden’s acquiescence to the anti-police left and siding with rioters.

-0.02 SPORTS The Vikings defense is already one of the best in the NFL and won’t ask much of Gladney.

-0.03 FOOD Made with fresh peaches, sugar, and a topping that bakes like slightly underbaked cookie dough, with crunchy sugar broiled on top.

-0.03 SPORTS And just like we saw in both games against FC Dallas, Nashville was sharp in the defensive third and in midfield.

-0.04 SCIENCE Scientists from 1