In [None]:
# LASER
!python -m laserembeddings download-models

# SENTENCE-BERT
!pip install transformers # https://github.com/huggingface/transformers
!pip install -U sentence-transformers # https://github.com/UKPLab/sentence-transformers

# UNIVERSAL SENTENCE ENCODER
!pip install tensorflow
!pip install tensorflow_hub

# FASTTEXT
!pip install fasttext

In [8]:
from utils import tfidf_vectorizer, fasttext_vectorizer, laser_embeddings, bert_embeddings, use_embeddings, cosine_similarity

from arrays import same, different, synonyms, homonyms, sports_search, politics_search, food_search, science_search, finance_search, search_sentences, categories

## 1. Choose which model to use

Available models:
* tfidf_vectorizer
* fasttext_vectorizer
* laser_embeddings
* bert_embeddings
* use_embeddings

In [9]:
# Choose the model to use:
# tfidf_vectorizer, fasttext_vectorizer, laser_embeddings, bert_embeddings, use_embeddings
vectorize = tfidf_vectorizer

## 2. Create vectors

In [10]:
same_vectors = vectorize(same)
different_vectors = vectorize(different)
synonym_vectors = vectorize(synonyms)
homonym_vectors = vectorize(homonyms)
sports_vector = vectorize(sports_search)
politics_vector = vectorize(politics_search)
food_vector = vectorize(food_search)
science_vector = vectorize(science_search)
finance_vector = vectorize(finance_search)
ss_vectors = vectorize(search_sentences)

In [11]:
sim = cosine_similarity(synonym_vectors[0], synonym_vectors[1])
print(sim)

0.2045517424964144


## 3. Semantic Search

In [12]:
scores = []

# Choose your search term
search_term = science_vector

for sentence in ss_vectors:
    score = round(cosine_similarity(sentence, search_term),2)
    scores.append(score)

In [13]:
results = []

for (score,sentence,category) in zip(scores, search_sentences,categories):
    results.append([score, sentence, category])

results.sort(reverse=True)

for row in results:
    print(row[0], row[2], row[1])
    print()

0.19 SCIENCE Scientists from 17 UK research centres are attempting to answer questions such as how long immunity lasts and why disease severity varies so much.

0.11 SCIENCE Decoding goals and movement plans is hard when you don't understand the neural code in which those things are communicated.

0.09 POLITICS This is election is a choice between President Trump’s strong stance with law and order and Joe Biden’s acquiescence to the anti-police left and siding with rioters.

0.08 SPORTS And just like we saw in both games against FC Dallas, Nashville was sharp in the defensive third and in midfield.

0.06 SPORTS The Vikings defense is already one of the best in the NFL and won’t ask much of Gladney.

0.03 FOOD Made with fresh peaches, sugar, and a topping that bakes like slightly underbaked cookie dough, with crunchy sugar broiled on top.

0.02 FOOD Is there anything better than a fresh batch of soft chocolate chip cookies?

0.01 FINANCE Wednesday’s gains put the S&P 500 up more than 58