In [None]:
!pip install datasets evaluate transformers[sentencepiece]
!pip install faiss-gpu

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from transformers import AutoTokenizer, AutoModel

model_ckpt = "sentence-transformers/distilbert-base-nli-stsb-mean-tokens"
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
model = AutoModel.from_pretrained(model_ckpt)

In [None]:
def cls_pooling(model_output):
    return model_output.last_hidden_state[:, 0]

In [None]:
import pickle
embeddings_dataset = pickle.load(open('Distillbert_PassageLevel_pkl', 'rb'))
embeddings_dataset

Dataset({
    features: ['Title', 'Subheading', 'Paragraph', 'URL', 'text', 'embeddings'],
    num_rows: 457
})

In [None]:
def get_embeddings(text_list):
    encoded_input = tokenizer(
        text_list, padding=True, truncation=True, return_tensors="pt"
    )
    #encoded_input = {k: v.to(device) for k, v in encoded_input.items()}
    model_output = model(**encoded_input)
    return cls_pooling(model_output)

In [None]:
question = ["Therapy for ADHD", "Not able to focus","Child not sleeping properly","bed wetting"]
question_embedding = get_embeddings([question[-1]]).cpu().detach().numpy()
#question_embedding

In [None]:
scores, samples = embeddings_dataset.get_nearest_examples(
    "embeddings", question_embedding, k=5
)

In [None]:
import pandas as pd

samples_df = pd.DataFrame.from_dict(samples)
samples_df["scores"] = scores
samples_df.sort_values("scores", ascending=False, inplace=True)

In [None]:
for _, row in samples_df.iterrows():
    print(f"TITLE: {row.Title}")
    print(f"SCORE: {row.scores}")
    print(f"COMMENT: {row.Paragraph}")
    print(f"URL: {row.URL}")
    print("=" * 50)
    print()

TITLE: 3-Day Potty Training Method: How It Works and How to Prepare- Parenting
SCORE: 344.7822265625
COMMENT: You’ll need a child-size potty to make things comfortable for your child. This best-selling potty chair looks like an adult-sized toilet, so they should quickly catch on to what it’s meant for. Fun details like a moving handle that makes flushing noises add to the “big kid” feeling your kid will get from using this.     Baby Wipes
URL: https://www.parenting.com/toddler/potty-training/how-3-day-potty-training-method-works

TITLE: Toilet Training Your Toddler: Strategy
SCORE: 341.1186218261719
COMMENT: Pregnancy & Childbirth  Newborn & Infants  Toddler & Pre-school  School Age  Teens & Young Adults
URL: https://childrensmd.org/browse-by-topic/behavior-development/toilet-training-your-toddler-strategy-not-tactics

TITLE: 3-Day Potty Training Method: How It Works and How to Prepare- Parenting
SCORE: 337.9621887207031
COMMENT: In general, parents who do the three-day potty-training 