# NodeMapper tutorial

Returning node ids based on similarity of text embeddings.

Start by importing `NodeMapper`

In [None]:
# from hugger import *
from hugger.mapper import NodeMapper

Demo data for the tutorial

In [4]:
# An example dataframe
import pandas as pd

# generate data
ids = ["id1", "id2", "id3", "id4", "id5"]
texts = [
    "happy",
    "doughnut",
    "green",
    "sad",
    "foundation",
]
# to dataframe
df = pd.DataFrame({"id": ids, "text": texts})

Initializing `NodeMapper` will 
- load the given huggingface model
- generate embeddings for the text column
- creating a dictionary of the node ids : text embeddings

In [5]:
# init
mapper = NodeMapper(
    df=df,
    text_col="text",
    id_col="id",
    model_name="sentence-transformers/all-MiniLM-L6-v2",
)

Loading tokenizer for model: sentence-transformers/all-MiniLM-L6-v2
Loading model: sentence-transformers/all-MiniLM-L6-v2
Generating embeddings for 5 nodes ...


Like `HuggingMapper` can simply get embeddings for given text

In [6]:
# generate embedding for a single text
embedding = mapper.embed_text("Good morning")
print(embedding.shape)

# generate embeddings for a list of texts
embeddings = mapper.embed_text(["Hello world", "Good evening", "Lunch time!"])
print(embeddings.shape)

torch.Size([1, 384])
torch.Size([3, 384])


But the main purpose of `NodeMapper` is to find similar texts and their corresponding ids

In [10]:
# retrieve those most similar to given text, above threshold
mapper.get_similar("concrete", threshold=0)  # threshold 0 returns all

{'id5': {'text': 'foundation', 'score': 0.5212345123291016},
 'id2': {'text': 'doughnut', 'score': 0.31617769598960876},
 'id3': {'text': 'green', 'score': 0.30093249678611755},
 'id4': {'text': 'sad', 'score': 0.1695406585931778},
 'id1': {'text': 'happy', 'score': 0.1568318009376526}}

In [31]:
# retrieve top match, above threshold
print(mapper.get_match("joyful", threshold=0.5), "\n")
print(mapper.get_match("concrete", threshold=0.5), "\n")
print(mapper.get_match("donut", threshold=0.5), "\n")

('id1', {'text': 'happy', 'score': 0.6841421127319336}) 

('id5', {'text': 'foundation', 'score': 0.5212345123291016}) 

('id2', {'text': 'doughnut', 'score': 0.7036362886428833}) 

