In [None]:
# LASER
!python -m laserembeddings download-models

# SENTENCE-BERT
!pip install transformers # https://github.com/huggingface/transformers
!pip install -U sentence-transformers # https://github.com/UKPLab/sentence-transformers

# UNIVERSAL SENTENCE ENCODER
!pip install tensorflow
!pip install tensorflow_hub

# FASTTEXT
!pip install fasttext

In [1]:
from util import tfidf_vectorizer, fasttext_vectorizer, laser_embeddings, bert_embeddings, use_embeddings, cosine_similarity

In [2]:
# Choose the model to use:
# tfidf_vectorizer, fasttext_vectorizer, laser_embeddings, bert_embeddings, use_embeddings
vectorize = bert_embeddings

In [35]:
##### 4 sentences with same meaning, but different structure and 4 sentences with same words as the first ones, but different meaning.
same = ["She angered me with her inappropriate comments, rumor-spreading, and disrespectfulness at the formal dinner table.", "She made me angry when she was rude at dinner.", "Her impoliteness, gossiping, and general lack of respect at dinner infuriated me.", "I was mad when she started spreading rumors, making inappropriate comments, and disrespecting other guests at our dinner."]

different = ["The company requires a formal dress code during work hours", "President Donald Trump called Joe Biden's running mate Kamala Harris 'angry' and 'mad'.", "The influenza is spreading from table surfaces in the restaurants.", "The Coffee Test is one of the tests for human-level Artificial General Intelligence."]


##### Synonyms/Paraphrases (Different spelling, same meaning)
synonyms = ["The need for software developers has gone up by 50% in 5 years", "The demand for programmers has doubled during the last five years", "Personal computers entered the market in 1977", "PCs came into shops in the late seventies", "Symptoms of influenza include fever and nasal congestion.", "A stuffy nose and elevated temperature are signs you may have the flu.", "He has tons of stuff to throw away.", "He needs to get rid of a lot of junk.", "Her life spanned years of incredible change for women as they gained more rights than ever before.", "She lived through the exciting era of women's liberation."]


##### Homonyms (same spelling/words, but different meaning)
homonyms = ["She lies on the couch", "She lies to the coach", "Train muscles twice a week", "Train departs twice a week", "I want to book a room", "I want to read a book a room", "These plants are huge", "Tesla plants are huge", "I saw a man", "A man has a saw"]


##### Semantic search

# Search sentences:
sports_search = ["Tarasenko has been one of the NHL's leading scorers during his nine-year career, with 214 goals in 507 games."]
politics_search = ["A political system is a framework which defines acceptable political methods within a society."]
food_search = ["This meal has summer dinner written all over it."]
science_search = ["Science is based on research, which is commonly conducted in academic and research institutions as well as in government agencies and companies"]
finance_search = ["Stocks mixed after Powell's inflation plan"]

# Sentences to search from:
search_sentences = ["The Vikings defense is already one of the best in the NFL and won’t ask much of Gladney.", "And just like we saw in both games against FC Dallas, Nashville was sharp in the defensive third and in midfield.", "This is election is a choice between President Trump’s strong stance with law and order and Joe Biden’s acquiescence to the anti-police left and siding with rioters.", "Democrats are willing to resume negotiations once Republicans start to take this process seriously.", "Made with fresh peaches, sugar, and a topping that bakes like slightly underbaked cookie dough, with crunchy sugar broiled on top.", "Is there anything better than a fresh batch of soft chocolate chip cookies?", "Scientists from 17 UK research centres are attempting to answer questions such as how long immunity lasts and why disease severity varies so much.", "Decoding goals and movement plans is hard when you don't understand the neural code in which those things are communicated.", "Dow futures up 200 points in overnight trading after the index briefly erases 2020 losses", "Wednesday’s gains put the S&P 500 up more than 58% since hitting an intraday low on March 23."]

categories = ["SPORTS", "SPORTS", "POLITICS","POLITICS","FOOD","FOOD","SCIENCE","SCIENCE","FINANCE","FINANCE"]

In [26]:
same_vectors = vectorize(same)
different_vectors = vectorize(different)
synonym_vectors = vectorize(synonyms)
homonym_vectors = vectorize(homonyms)
sports_vector = vectorize(sports_search)
politics_vector = vectorize(politics_search)
food_vector = vectorize(food_search)
science_vector = vectorize(science_search)
finance_vector = vectorize(finance_search)
ss_vectors = vectorize(search_sentences)

In [27]:
sim = cosine_similarity(same_vectors[0], different_vectors[0])
print(sim)

0.222197487950325


In [30]:
# Semantic search
scores = []
search_term = sports_vector

for sentence in ss_vectors:
    score = round(cosine_similarity(sentence, search_term),2)
    scores.append(score)

In [39]:
results = []

for (score,sentence,category) in zip(scores, search_sentences,categories):
    results.append([score, sentence, category])

results.sort(reverse=True)

for row in results:
    print(row[0], row[2], row[1])
    print()

0.35 FINANCE Wednesday’s gains put the S&P 500 up more than 58% since hitting an intraday low on March 23.

0.32 FINANCE Dow futures up 200 points in overnight trading after the index briefly erases 2020 losses

0.24 SPORTS The Vikings defense is already one of the best in the NFL and won’t ask much of Gladney.

0.14 SCIENCE Scientists from 17 UK research centres are attempting to answer questions such as how long immunity lasts and why disease severity varies so much.

0.14 POLITICS Democrats are willing to resume negotiations once Republicans start to take this process seriously.

0.13 SCIENCE Decoding goals and movement plans is hard when you don't understand the neural code in which those things are communicated.

0.1 FOOD Is there anything better than a fresh batch of soft chocolate chip cookies?

0.07 SPORTS And just like we saw in both games against FC Dallas, Nashville was sharp in the defensive third and in midfield.

0.04 FOOD Made with fresh peaches, sugar, and a topping tha