In [1]:
!pip install -U sentence-transformers



In [2]:
from sentence_transformers import SentenceTransformer
sentences = ["This is an example sentence", "Each sentence is converted"]

model = SentenceTransformer('sentence-transformers/distiluse-base-multilingual-cased-v1')
embeddings = model.encode(sentences)

In [3]:
from numpy import dot
from numpy.linalg import norm
from random import randrange

In [4]:
def cos_sim(A, B):
    return dot(A, B)/(norm(A)*norm(B))

# 1. 푸른 바다의 밤에 시원한 파도를 보며 소주한잔 하고싶네
# 2. 뜨거운 태양 아래에서 후덥지근한 여름 따끈한 국물과 함께 말아먹는 국밥의 맛은 예술이야.
# 3. 오늘은 대체로 바람이 많이 불고 구름이 많아서 지열을 식혀 줄 비가 내려줄 것 같은 선선한 느낌이야.

sentences = ["I'd like to have a glass of soju with the cool waves on the night of the blue sea.", "Each sentence is converted",
             "The taste of rice soup rolled with hot soup in hot summer is art.",
             "It's mostly windy and cloudy today, so it feels like it's going to rain to cool down the geothermal heat."]
embeddings = model.encode(sentences)
embeddings


import numpy as np

print('Low similarity:', cos_sim(embeddings[0], embeddings[1]))
print('Low similarity:', cos_sim(embeddings[1], embeddings[2]))
print('High similarity:',cos_sim(embeddings[2], embeddings[0]))

Low similarity: -0.048153147
Low similarity: 0.018100405
High similarity: 0.19365905


In [5]:
sentences = ["Jasmin",
             "Floral"]

embeddings = model.encode(sentences)
embeddings

array([[ 0.03877727,  0.00177483, -0.02318494, ..., -0.03856641,
         0.01684213, -0.06344511],
       [ 0.07865988, -0.01627447, -0.09001143, ...,  0.02253387,
         0.04392593, -0.02609395]], dtype=float32)

In [6]:
print('Low similarity:', cos_sim(embeddings[0], embeddings[1]))

Low similarity: 0.5378745


In [7]:
sentences = ["jasmine",
             "floral",
             "ocean"]
embeddings = model.encode(sentences)

print('High similarity:', cos_sim(embeddings[0], embeddings[1]))
print('Low similarity:', cos_sim(embeddings[1], embeddings[2]))
print('Low similarity:',cos_sim(embeddings[2], embeddings[0]))

High similarity: 0.52600086
Low similarity: 0.39563367
Low similarity: 0.33476874


In [8]:
import requests
from os import path
import pandas as pd

# Source
data_path = './dataset_210626_215600.csv'

if not path.exists(data_path):
    url = 'https://kyuuuw-nlp-dataset.s3.ap-northeast-2.amazonaws.com/fragrantica/dataset_210626_215600.csv'
    r = requests.get(url, allow_redirects=True)
    open(data_path, 'w').write(r.text)

data = pd.read_csv(data_path)



In [9]:
SIZE = 10

# criteria = randrange(0, SIZE)
crit_str = 'A strong sweet and mysterious scent. Smells luxurious.'
crit_vec = model.encode(crit_str)

print('Criteria sentence is')
print(crit_str + '\n')

for i, row in data.sample(1000).iterrows():
    str = row['review']
    vec = model.encode(str)
    data.loc[i, 'similarity'] = cos_sim(crit_vec, vec)

data

Criteria sentence is
A strong sweet and mysterious scent. Smells luxurious.



Unnamed: 0.1,Unnamed: 0,gender,name,accords,review,tokenized,only_english,longer_than_2_A,stopwords_removed,lemmatizated,label,similarity
0,0,female,Alien Mugler for women,"['white floral', 'amber', 'woody']","Got a sample of this today, and my 9 year old ...","['got', 'a', 'sample', 'of', 'this', 'today', ...","['got', 'a', 'sample', 'of', 'this', 'today', ...","['got', 'sample', 'this', 'today', 'and', 'yea...","['got', 'sample', 'today', 'year', 'old', 'dau...","['get', 'sample', 'today', 'year', 'old', 'dau...",2.0,
1,1,female,Alien Mugler for women,"['white floral', 'amber', 'woody']",First impressions. Test on blotter.\r\nI have ...,"['first', 'impressions.', 'test', 'on', 'blott...","['first', 'impressions', 'test', 'on', 'blotte...","['first', 'impressions', 'test', 'blotter', 'h...","['first', 'impressions', 'test', 'blotter', 'h...","['first', 'impression', 'test', 'blotter', 'he...",2.0,
2,2,female,Alien Mugler for women,"['white floral', 'amber', 'woody']",This perfume reminds me of my best friend. Act...,"['this', 'perfume', 'reminds', 'me', 'of', 'my...","['this', 'perfume', 'reminds', 'me', 'of', 'my...","['this', 'perfume', 'reminds', 'best', 'friend...","['perfume', 'reminds', 'best', 'friend', 'actu...","['perfume', 'reminds', 'best', 'friend', 'actu...",2.0,
3,3,female,Alien Mugler for women,"['white floral', 'amber', 'woody']",Imagine tripping over your own feet and fallin...,"['imagine', 'tripping', 'over', 'your', 'own',...","['imagine', 'tripping', 'over', 'your', 'own',...","['imagine', 'tripping', 'over', 'your', 'own',...","['imagine', 'tripping', 'feet', 'falling', 'fa...","['imagine', 'trip', 'foot', 'fall', 'face', 'f...",2.0,
4,4,female,Alien Mugler for women,"['white floral', 'amber', 'woody']",Gorgeous Gorgeous Blend ..\r\nLove the scent.....,"['gorgeous', 'gorgeous', 'blend', '..', 'love'...","['gorgeous', 'gorgeous', 'blend', '', 'love', ...","['gorgeous', 'gorgeous', 'blend', 'love', 'the...","['gorgeous', 'gorgeous', 'blend', 'love', 'sce...","['gorgeous', 'gorgeous', 'blend', 'love', 'sce...",2.0,
...,...,...,...,...,...,...,...,...,...,...,...,...
74774,75178,male,Jazz Club Maison Martin Margiela for men,"['tobacco', 'rum', 'sweet', 'vanilla', 'woody'...",I've been wanting to try this on for a while a...,"['i', ""'ve"", 'been', 'wanting', 'to', 'try', '...","['i', ""'ve"", 'been', 'wanting', 'to', 'try', '...","[""'ve"", 'been', 'wanting', 'try', 'this', 'for...","[""'ve"", 'wanting', 'try', 'since', 'finally', ...","[""'ve"", 'want', 'try', 'since', 'finally', 'ge...",1.0,
74775,75179,male,Jazz Club Maison Martin Margiela for men,"['tobacco', 'rum', 'sweet', 'vanilla', 'woody'...",Goes on sweet. Sweet and boozy with too much d...,"['goes', 'on', 'sweet.', 'sweet', 'and', 'booz...","['goes', 'on', 'sweet', 'sweet', 'and', 'boozy...","['goes', 'sweet', 'sweet', 'and', 'boozy', 'wi...","['goes', 'sweet', 'sweet', 'boozy', 'much', 'd...","['sweet', 'sweet', 'boozy', 'much', 'drink', '...",1.0,
74776,75180,male,Jazz Club Maison Martin Margiela for men,"['tobacco', 'rum', 'sweet', 'vanilla', 'woody'...",Yesterday I walked in to a store looking for a...,"['yesterday', 'i', 'walked', 'in', 'to', 'a', ...","['yesterday', 'i', 'walked', 'in', 'to', 'a', ...","['yesterday', 'walked', 'store', 'looking', 'f...","['yesterday', 'walked', 'store', 'looking', 'f...","['yesterday', 'walk', 'store', 'look', 'fragra...",1.0,
74777,75181,male,Jazz Club Maison Martin Margiela for men,"['tobacco', 'rum', 'sweet', 'vanilla', 'woody'...",Tried this today and now looking at notes I c...,"['tried', 'this', 'today', 'and', 'now', 'look...","['tried', 'this', 'today', 'and', 'now', 'look...","['tried', 'this', 'today', 'and', 'now', 'look...","['tried', 'today', 'looking', 'notes', 'unders...","['try', 'today', 'look', 'note', 'understand',...",1.0,


In [10]:
test_data = data.dropna()
test_data

Unnamed: 0.1,Unnamed: 0,gender,name,accords,review,tokenized,only_english,longer_than_2_A,stopwords_removed,lemmatizated,label,similarity
9,9,female,Alien Mugler for women,"['white floral', 'amber', 'woody']","I did vote for ""love"", but with love, I meant ...","['i', 'did', 'vote', 'for', '``', 'love', ""''""...","['i', 'did', 'vote', 'for', '``', 'love', ""''""...","['did', 'vote', 'for', 'love', 'but', 'with', ...","['vote', 'love', 'love', 'meant', 'older', 've...","['vote', 'love', 'love', 'meant', 'old', 'vers...",2.0,0.310645
50,50,female,Alien Mugler for women,"['white floral', 'amber', 'woody']","Sexy and very confident, a little dirty, not v...","['sexy', 'and', 'very', 'confident', ',', 'a',...","['sexy', 'and', 'very', 'confident', '', 'a', ...","['sexy', 'and', 'very', 'confident', 'little',...","['sexy', 'confident', 'little', 'dirty', 'refi...","['sexy', 'confident', 'little', 'dirty', 'refi...",2.0,0.472235
56,56,female,Alien Mugler for women,"['white floral', 'amber', 'woody']","A purple witch in a wild planet, who's trudgin...","['a', 'purple', 'witch', 'in', 'a', 'wild', 'p...","['a', 'purple', 'witch', 'in', 'a', 'wild', 'p...","['purple', 'witch', 'wild', 'planet', 'who', '...","['purple', 'witch', 'wild', 'planet', 'trudgin...","['purple', 'witch', 'wild', 'planet', 'trudge'...",2.0,0.173919
121,121,female,Alien Mugler for women,"['white floral', 'amber', 'woody']",A friend of mine had many old perfumes that sh...,"['a', 'friend', 'of', 'mine', 'had', 'many', '...","['a', 'friend', 'of', 'mine', 'had', 'many', '...","['friend', 'mine', 'had', 'many', 'old', 'perf...","['friend', 'mine', 'many', 'old', 'wanted', 'g...","['friend', 'mine', 'many', 'old', 'want', 'get...",2.0,0.307428
155,155,female,Alien Mugler for women,"['white floral', 'amber', 'woody']",Update: Took 0.5 mL out of this bottle into a ...,"['update', ':', 'took', '0.5', 'ml', 'out', 'o...","['update', '', 'took', '', 'ml', 'out', 'of', ...","['update', 'took', 'out', 'this', 'bottle', 'i...","['update', 'took', 'bottle', 'fresh', 'decante...","['update', 'take', 'bottle', 'fresh', 'decante...",2.0,0.049068
...,...,...,...,...,...,...,...,...,...,...,...,...
74549,74951,male,Jazz Club Maison Martin Margiela for men,"['tobacco', 'rum', 'sweet', 'vanilla', 'woody'...",This smells like a toned down and more natural...,"['this', 'smells', 'like', 'a', 'toned', 'down...","['this', 'smells', 'like', 'a', 'toned', 'down...","['this', 'smells', 'like', 'toned', 'down', 'a...","['smells', 'like', 'toned', 'natural', 'versio...","['smell', 'like', 'toned', 'natural', 'version...",1.0,0.439577
74557,74959,male,Jazz Club Maison Martin Margiela for men,"['tobacco', 'rum', 'sweet', 'vanilla', 'woody'...",Shocked!!!!!! I'm really shocked!! This fragra...,"['shocked', '!', '!', '!', '!', '!', '!', 'i',...","['shocked', '', '', '', '', '', '', 'i', ""'m"",...","['shocked', 'really', 'shocked', 'this', 'frag...","['shocked', 'really', 'shocked', 'fragrance', ...","['shock', 'really', 'shock', 'fragrance', 'get...",1.0,0.397381
74637,75040,male,Jazz Club Maison Martin Margiela for men,"['tobacco', 'rum', 'sweet', 'vanilla', 'woody'...","I Love this Fragrance, but in my humble opinio...","['i', 'love', 'this', 'fragrance', ',', 'but',...","['i', 'love', 'this', 'fragrance', '', 'but', ...","['love', 'this', 'fragrance', 'but', 'humble',...","['love', 'fragrance', 'humble', 'opinion', 're...","['love', 'fragrance', 'humble', 'opinion', 're...",1.0,0.291171
74684,75088,male,Jazz Club Maison Martin Margiela for men,"['tobacco', 'rum', 'sweet', 'vanilla', 'woody'...",At last I found it so I could smell it. Oh my ...,"['at', 'last', 'i', 'found', 'it', 'so', 'i', ...","['at', 'last', 'i', 'found', 'it', 'so', 'i', ...","['last', 'found', 'could', 'smell', 'what', 'd...","['last', 'found', 'could', 'smell', 'disappoin...","['last', 'found', 'could', 'smell', 'disappoin...",1.0,0.420118


In [11]:
print('Criteria')
print(crit_str + '\n------------------\n')


sorted = test_data.sort_values(by=['similarity'], ascending=False)
sorted

for i, row in sorted.iloc[0:10].iterrows():
    print(row['review'] + '\n' + row['name'] + '\n---------------------\n')

Criteria
A strong sweet and mysterious scent. Smells luxurious.
------------------

Smells like a juicy couture flanker. Sweet and creamy. Fades fast.
Cloud Ariana Grande for women
---------------------

It's a beautiful fragrance, but it was just a bit too sweet for me. I am a woman, and can tell you this is a unisex fragrance.
Jazz Club Maison Martin Margiela for men
---------------------

one of my beloved perfumes.
always makes me fresh. one of the best cool fragrances to me.
Versace Pour Homme Versace for men
---------------------

A perfect and absolutely oriental perfum for those who love deep and long lasting smells.
Chergui Serge Lutens for women and men
---------------------

A wonderful classy and very refined fragrance that combines lovely scent, amazing projection, great sillage and unbelievable performance overall... A masterpiece 10/10.
Dior Homme Intense 2011 Christian Dior for men
---------------------

Uno de los mejores aromas.
Terre d'Hermes Hermès for men
--------

In [12]:
a = model.encode('qwer')
b = model.encode('qwer')
print(np.dot(a, b))
print(cos_sim(a, b))

0.9074562
1.0
