In [1]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
faq_data = [
    ("What are symptoms of heart attack?",
     "Common symptoms include chest pain, shortness of breath, nausea, and sweating."),

    ("How to reduce high blood pressure?",
     "Lifestyle changes such as reducing salt intake, exercising regularly, and medication can help."),

    ("What causes diabetes?",
     "Diabetes occurs when the body cannot produce enough insulin or use it effectively."),

    ("What is normal body temperature?",
     "Normal body temperature is around 98.6°F or 37°C."),

    ("How to treat fever at home?",
     "Rest, hydration, and paracetamol can help manage mild fever."),

    ("Symptoms of dehydration?",
     "Dry mouth, fatigue, dizziness, and dark urine are common symptoms."),

    ("What is anemia?",
     "Anemia is a condition where the body lacks enough healthy red blood cells."),

    ("How to prevent COVID-19?",
     "Vaccination, hand hygiene, and masks reduce infection risk."),

    ("What are asthma triggers?",
     "Dust, pollen, smoke, pollution, and cold air can trigger asthma."),

    ("When to visit doctor for chest pain?",
     "Seek medical care immediately if chest pain is severe or accompanied by breathlessness."),

    ("How to manage stress?",
     "Regular exercise, meditation, and adequate sleep help manage stress."),

    ("What are symptoms of flu?",
     "Fever, body aches, sore throat, and fatigue are common flu symptoms."),

    ("What causes migraine?",
     "Migraines may be triggered by stress, hormonal changes, or certain foods."),

    ("How to improve immunity?",
     "Balanced diet, exercise, sleep, and vaccination improve immunity."),

    ("What is hypertension?",
     "Hypertension is a condition where blood pressure remains consistently high.")
]

df_faq = pd.DataFrame(faq_data, columns=["question", "answer"])

print("Knowledge base created successfully.")
df_faq.head()

Knowledge base created successfully.


Unnamed: 0,question,answer
0,What are symptoms of heart attack?,"Common symptoms include chest pain, shortness ..."
1,How to reduce high blood pressure?,Lifestyle changes such as reducing salt intake...
2,What causes diabetes?,Diabetes occurs when the body cannot produce e...
3,What is normal body temperature?,Normal body temperature is around 98.6°F or 37°C.
4,How to treat fever at home?,"Rest, hydration, and paracetamol can help mana..."


In [3]:
#loading  embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')
print("Embedding model loaded successfully.")

Loading weights: 100%|██████████| 103/103 [00:00<00:00, 306.62it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


Embedding model loaded successfully.


In [4]:
question_embeddings = model.encode(
    df_faq['question'].tolist(),
    convert_to_numpy=True
)

print("embdeddings shape:", question_embeddings.shape)

embdeddings shape: (15, 384)


In [5]:
dimension = question_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(question_embeddings)
print("FAISS index created and embeddings added.")

FAISS index created and embeddings added.


In [6]:
def chatbot_response(query, top_k=1):
    query_embedding = model.encode([query], convert_to_numpy=True)

    distances, indices = index.search(query_embedding, top_k)

    best_index = indices[0][0]

    return {
        "query": query,
        "matched_question": df_faq.iloc[best_index]["question"],
        "answer": df_faq.iloc[best_index]["answer"],
        "distance_score": float(distances[0][0])
    }

In [7]:
print(chatbot_response("i have chest pain and shortness of breath"))

{'query': 'i have chest pain and shortness of breath', 'matched_question': 'When to visit doctor for chest pain?', 'answer': 'Seek medical care immediately if chest pain is severe or accompanied by breathlessness.', 'distance_score': 0.7715373039245605}


In [8]:
test_queries = [
    "What are the symptoms of a heart attack?",
    "How can I lower my blood pressure?",
    "What causes diabetes?",
]
for query in test_queries:
    print(chatbot_response(query))

{'query': 'What are the symptoms of a heart attack?', 'matched_question': 'What are symptoms of heart attack?', 'answer': 'Common symptoms include chest pain, shortness of breath, nausea, and sweating.', 'distance_score': 0.017610449343919754}
{'query': 'How can I lower my blood pressure?', 'matched_question': 'How to reduce high blood pressure?', 'answer': 'Lifestyle changes such as reducing salt intake, exercising regularly, and medication can help.', 'distance_score': 0.13661938905715942}
{'query': 'What causes diabetes?', 'matched_question': 'What causes diabetes?', 'answer': 'Diabetes occurs when the body cannot produce enough insulin or use it effectively.', 'distance_score': 1.9833149066204175e-13}


In [10]:
import os

os.makedirs("../models", exist_ok=True)

faiss.write_index(index, "../models/chatbot_faiss.index")
df_faq.to_csv("../data/processed/chatbot_knowledge_base.csv", index=False)

print("Chatbot index and knowledge base saved successfully.")

Chatbot index and knowledge base saved successfully.
