In [9]:
from sentence_transformers import SentenceTransformer, util
import numpy as np

In [4]:
model = SentenceTransformer('all-MiniLM-L6-v2')

Downloading (…)e9125/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)7e55de9125/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)55de9125/config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)125/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)e9125/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading (…)9125/train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

Downloading (…)7e55de9125/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)5de9125/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [46]:
from typing import Union, Optional, TypedDict, List
class eventType(TypedDict):
    date: Optional[int]
    organizer: str
    zip_code: str
    uuid: str
    description: str
    title: str
    type: int
eventList = List[eventType]

from collections import defaultdict

import pgeocode
nomi = pgeocode.Nominatim('us')
def get_location(zip_code: str):
    query = nomi.query_postal_code(str(zip_code))
    location = query['place_name'] + ', ' + query['county_name']
    return location

def compare(item1: str, item2: str):
    return util.pytorch_cos_sim(model.encode(item1, convert_to_tensor=True), 
                                model.encode(item2, convert_to_tensor=True))[0][0]
def get_similarity(event1: eventType, event2: eventType):
    weights = {"title": 1,
               "organizer": 1,
               "zip_code": 1,
               "description": 2,
               "type": 1}
    
    event_types = ["Concert", "Play", "Movie Screening", 'Sports game', "Party"]
    similarity = 0
    for item in weights.keys():
        if item == "type": 
            similarity += compare(event_types[event1["type"]-1], event_types[event2["type"]-1]) * weights[item]
        
        elif item == "zip_code":
            query = nomi.query_postal_code(str(event1[item]))
            location = query['place_name'] + ', ' + query['county_name']
            similarity += compare(get_location(event1[item]), get_location(event2[item])) * weights[item]
        else: similarity += compare(event1[item], event2[item]) * weights[item]
    return similarity

def get_scores(history: eventList, available: eventList):
    scores = defaultdict(lambda: 0)
    for available_event in available:
        for history_event in history:
            scores[available_event["uuid"]] += get_similarity(available_event, history_event)
    return scores

In [48]:
attended_events = [{"date": 1685476378,
"organizer": "Weed Wade",
"zip_code": "90024",
"uuid": "1234",
"description": "The best concert ever",
"title": "Westwood Concert",
"type": 1},
{"date": 1685821978,
"organizer": "Joe Bruin",
"zip_code": "90066",
"uuid": "3000",
"description": "A massive party to celebrate the end of the midterm season.",
"title": "UCLA Midterm Party",
"type": 5}]

available_events = [{"date": 1685562778,
"organizer": "Sally Smith",
"zip_code": "90210",
"uuid": "5678",
"description": "A fun-filled day of games and activities",
"title": "Beverly Hills Family Fun Day",
"type": 5},
{"date": 1685649178,
"organizer": "John Doe",
"zip_code": "90046",
"uuid": "9101",
"description": "A night of laughter and entertainment",
"title": "Hollywood Comedy Night",
"type": 3},
{"date": 1685735578,
"organizer": "Jane Johnson",
"zip_code": "90028",
"uuid": "1121",
"description": "A gathering of local artists and their work",
"title": "Downtown Art Walk",
"type": 2},
{"date": 1685821978,
"organizer": "Mike Miller",
"zip_code": "90066",
"uuid": "3141",
"description": "A charity event to raise funds for a local cause",
"title": "Mar Vista Charity Gala",
"type": 5},
{"date": 1685821978,
"organizer": "Honduras Hernandez",
"zip_code": "90066",
"uuid": "5002",
"description": "A music festival to ring in the new year.",
"title": "New Year's Jazz Festival",
"type": 1},
]

get_scores(attended_events, available_events)

defaultdict(<function __main__.get_scores.<locals>.<lambda>()>,
            {'5678': tensor(4.8891),
             '9101': tensor(5.1819),
             '1121': tensor(4.4514),
             '3141': tensor(5.5860),
             '5002': tensor(6.5974)})

In [13]:
sentence1 = "Party with us!"
sentence2 = "Come drink beer with me"
# encode sentences to get their embeddings
embedding1 = model.encode(sentence1, convert_to_tensor=True)
embedding2 = model.encode(sentence2, convert_to_tensor=True)
# compute similarity scores of two embeddings
cosine_scores = util.pytorch_cos_sim(embedding1, embedding2)
print(cosine_scores)

tensor([[0.4335]])


In [8]:
cosine_scores

tensor([[0.7577]])

In [None]:
sentences1 = ["I like Python because I can build AI applications", "The cat sits on the ground"]   
sentences2 = ["I like Python because I can do data analytics", "The cat walks on the sidewalk"]
# encode list of sentences to get their embeddings
embedding1 = model.encode(sentences1, convert_to_tensor=True)
embedding2 = model.encode(sentences2, convert_to_tensor=True)
# compute similarity scores of two embeddings
cosine_scores = util.pytorch_cos_sim(embedding1, embedding2)
for i in range(len(sentences1)):
    for j in range(len(sentences2)):
        print("Sentence 1:", sentences1[i])
        print("Sentence 2:", sentences2[j])
        print("Similarity Score:", cosine_scores[i][j].item())
        print()