In [None]:
# LASER
!python -m laserembeddings download-models

# SENTENCE-BERT
!pip install transformers # https://github.com/huggingface/transformers
!pip install -U sentence-transformers # https://github.com/UKPLab/sentence-transformers

# UNIVERSAL SENTENCE ENCODER
!pip install tensorflow
!pip install tensorflow_hub

# FASTTEXT
!pip install fasttext

In [1]:
from models import FastText, LaserModel, SentenceBert, Tfidf, Use

from utils import cosine_similarity

from arrays import same, different, synonyms, homonyms, sports_search, politics_search, food_search, science_search, finance_search, search_sentences, categories

## 1. Choose which model to use

Available models:
* tfidf_vectorizer
* fasttext_vectorizer
* laser_embeddings
* bert_embeddings
* use_embeddings

In [2]:
# Choose the model to use:
# tfidf_vectorizer, fasttext_vectorizer, laser_embeddings, bert_embeddings, use_embeddings
model = LaserModel()

## 2. Create vectors

In [3]:
same_vectors = model.get_sentence_vec(same)
different_vectors = model.get_sentence_vec(different)
synonym_vectors = model.get_sentence_vec(synonyms)
homonym_vectors = model.get_sentence_vec(homonyms)
sports_vector = model.get_sentence_vec(sports_search)
politics_vector = model.get_sentence_vec(politics_search)
food_vector = model.get_sentence_vec(food_search)
science_vector = model.get_sentence_vec(science_search)
finance_vector = model.get_sentence_vec(finance_search)
ss_vectors = model.get_sentence_vec(search_sentences)

In [4]:
sim = cosine_similarity(synonym_vectors[0], synonym_vectors[1])
print(sim)

0.7871929407119751


## 3. Semantic Search

In [5]:
scores = []

# Choose your search term
search_term = science_vector

for sentence in ss_vectors:
    score = round(cosine_similarity(sentence, search_term),2)
    scores.append(score)

In [6]:
results = []

for (score,sentence,category) in zip(scores, search_sentences,categories):
    results.append([score, sentence, category])

results.sort(reverse=True)

for row in results:
    print(row[0], row[2], row[1])
    print()

0.62 POLITICS This is election is a choice between President Trump’s strong stance with law and order and Joe Biden’s acquiescence to the anti-police left and siding with rioters.

0.61 SCIENCE Scientists from 17 UK research centres are attempting to answer questions such as how long immunity lasts and why disease severity varies so much.

0.59 FOOD Made with fresh peaches, sugar, and a topping that bakes like slightly underbaked cookie dough, with crunchy sugar broiled on top.

0.59 SPORTS And just like we saw in both games against FC Dallas, Nashville was sharp in the defensive third and in midfield.

0.56 FINANCE Dow futures up 200 points in overnight trading after the index briefly erases 2020 losses

0.56 SCIENCE Decoding goals and movement plans is hard when you don't understand the neural code in which those things are communicated.

0.53 SPORTS The Vikings defense is already one of the best in the NFL and won’t ask much of Gladney.

0.53 POLITICS Democrats are willing to resume