In [35]:
import json
import os
import torch
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

import faiss
import random
import numpy as np
from transformers import AutoTokenizer, AutoModel


In [36]:
# Load the dataset
with open('intents.json', 'r') as file:
    data = json.load(file)
# Take only first 10 intents for testing

# Display the dataset structure
print(json.dumps(data, indent=2))  # Pretty print for better readability


{
  "intents": [
    {
      "tag": "Cuts",
      "patterns": [
        "What to do if Cuts?",
        "How to cure Cuts?",
        "Which medicine to apply for Cuts?",
        "what to apply on cuts?",
        "Cuts"
      ],
      "responses": [
        "Wash the cut properly to prevent infection and stop the bleeding by applying pressure for 1-2minutes until bleeding stops. Apply Petroleum Jelly to make sure that the wound is moist for quick healing. Finally cover the cut with a sterile bandage. Pain relievers such as acetaminophen can be applied."
      ],
      "context_set": ""
    },
    {
      "tag": "Abrasions",
      "patterns": [
        "how do you treat abrasions?",
        "Do Abrasions cause scars?",
        "Abrasions",
        "what to do if abrasions?",
        "Which medicine to apply for abrasions?",
        "How to cure abrasions?"
      ],
      "responses": [
        "Begin with washed hands.Gently clean the area with cool to lukewarm water and mild soap. Remove

In [37]:
# Load pre-trained BERT model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")

# Function to get BERT embeddings for a list of sentences
def get_bert_embeddings(texts):
    inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt", max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1)  # Mean pooling over all tokens
    return embeddings.cpu().numpy()  # Convert to numpy for FAISS compatibility


In [38]:
# Prepare the data for FAISS (embedding the patterns and storing their corresponding tags)
patterns = []
tags = []
responses = []  # Take only the first 10 responses for testing

# Extract patterns, tags, and responses from the dataset
for intent in data['intents']:
    patterns.extend(intent['patterns'])
    tags.extend([intent['tag']] * len(intent['patterns']))
    responses.extend([random.choice(intent['responses'])] * len(intent['patterns']))  # Take a random response for simplicity

# Generate embeddings for the patterns
pattern_embeddings = get_bert_embeddings(patterns)

# Display the shape of the embeddings to confirm
print("Embeddings shape:", pattern_embeddings.shape)


Embeddings shape: (188, 768)


In [39]:
# Initialize FAISS index
embedding_dim = 768  # Dimensionality of the embeddings (from BERT)
index = faiss.IndexFlatL2(embedding_dim)  # L2 distance for similarity

# Add embeddings to the FAISS index
index.add(pattern_embeddings)

# Confirm the number of items in the index
print(f"Number of items in FAISS index: {index.ntotal}")


Number of items in FAISS index: 188


In [42]:
def search_faiss(query, k=3):
    try:
        # Generate embedding for the query
        query_embedding = get_bert_embeddings([query])
        print(f"Query embedding shape: {query_embedding.shape}")  # Debugging line

        # Ensure the embedding is in the correct format (1 x embedding_dim)
        query_embedding = query_embedding.astype(np.float32)  # Convert to float32
        query_embedding = query_embedding.reshape(1, -1)  # Ensure the shape is correct
        print(f"Query embedding reshaped: {query_embedding.shape}")  # Debugging line

        # Search the FAISS index for the top k most similar patterns
        D, I = index.search(query_embedding, k)
        print(f"Search results - Distances: {D}, Indices: {I}")  # Debugging line
        
        # Validate the indices and prepare results
        results = []
        for i in range(k):
            idx = I[0][i]
            if idx >= 0 and idx < len(tags):  # Bounds checking
                results.append({
                    "tag": tags[idx],
                    "response": responses[idx],
                    "distance": float(D[0][i])  # Convert to native Python float
                })
        
        return results
    except Exception as e:
        print(f"Error during search: {str(e)}")
        return []

# Test the function with debugging
query = "How to treat cuts?"
results = search_faiss(query)

if results:
    for result in results:
        print(f"Tag: {result['tag']}")
        print(f"Response: {result['response']}")
        print(f"Similarity: {result['distance']}\n")
else:
    print("No results found")


Query embedding shape: (1, 768)
Query embedding reshaped: (1, 768)
Search results - Distances: [[22.138325 22.677105 23.590006]], Indices: [[10  1 55]]
Tag: Abrasions
Response: Begin with washed hands.Gently clean the area with cool to lukewarm water and mild soap. Remove dirt or other particles from the wound using sterilized tweezers.For a mild scrape thatâ€™s not bleeding, leave the wound uncovered.If the wound is bleeding, use a clean cloth or bandage, and apply gentle pressure to the area to stop any bleeding.Cover a wound that bled with a thin layer of topical antibiotic ointment, like Bacitracin, or a sterile moisture barrier ointment, like Aquaphor. Cover it with a clean bandage or gauze. Gently clean the wound and change the ointment and bandage once per day.Watch the area for signs of infection, like pain or redness and swelling. See your doctor if you suspect infection.
Similarity: 22.138324737548828

Tag: Cuts
Response: Wash the cut properly to prevent infection and stop th

In [43]:
# Example query
query = "How to treat cuts?"

# Run the search in the FAISS index
results = search_faiss(query)

# Display the results
if results:
    for result in results:
        print(f"Tag: {result['tag']}")
        print(f"Response: {result['response']}")
        print(f"Similarity: {result['distance']}\n")
else:
    print("No results found")


Query embedding shape: (1, 768)
Query embedding reshaped: (1, 768)
Search results - Distances: [[22.138325 22.677105 23.590006]], Indices: [[10  1 55]]
Tag: Abrasions
Response: Begin with washed hands.Gently clean the area with cool to lukewarm water and mild soap. Remove dirt or other particles from the wound using sterilized tweezers.For a mild scrape thatâ€™s not bleeding, leave the wound uncovered.If the wound is bleeding, use a clean cloth or bandage, and apply gentle pressure to the area to stop any bleeding.Cover a wound that bled with a thin layer of topical antibiotic ointment, like Bacitracin, or a sterile moisture barrier ointment, like Aquaphor. Cover it with a clean bandage or gauze. Gently clean the wound and change the ointment and bandage once per day.Watch the area for signs of infection, like pain or redness and swelling. See your doctor if you suspect infection.
Similarity: 22.138324737548828

Tag: Cuts
Response: Wash the cut properly to prevent infection and stop th