In [None]:
!pip uninstall -y langchain langchain-core langchain-community langchain-text-splitters
!pip install langchain==0.3.21 langchain-core==0.3.20 langchain-community==0.3.20 langchain-text-splitters==0.3.7
!pip install transformers torch sentence-transformers rank-bm25 beautifulsoup4 
!pip install biopython faiss-cpu nltk pymed

In [None]:
!pip uninstall -y langchain langchain-core langchain-community langchain-text-splitters
!pip install --upgrade langchain langchain-community


In [None]:
!pip install -q faiss-cpu nltk pymed spacy scikit-learn huggingface_hub

In [None]:
import os
import numpy as np
import faiss
import nltk
import spacy
import requests
from typing import List
from pymed import PubMed
from Bio import Entrez
from rank_bm25 import BM25Okapi
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from langchain.embeddings import HuggingFaceEmbeddings
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from huggingface_hub import login

In [None]:

nltk.download('punkt')
nltk.download('stopwords')
os.system("python -m spacy download en_core_web_sm")


In [None]:
Entrez.email = "khushi00452@gmail.com"
pubmed = PubMed(tool="HealthAI", email="khushi00452@gmail.com")


In [None]:
MENTAL_KEYWORDS = {
    "depress", "anxiet", "stress", "lonely", "overwhelm", 
    "panic", "burnout", "dread", "hopeless", "empty",
    "bipolar", "ptsd", "ocd", "paranoia", "insomnia",
    "eating disorder", "self harm", "suicid", 
    "therapy", "counsel", "psycholog", "psychiatr",
    "not okay", "can't cope", "breaking down", "mental health"
}


In [None]:
CRISIS_KEYWORDS = {
    "suicid", "kill myself", "end my life", 
    "want to die", "self harm", "cutting"
}
CRISIS_RESPONSE = """
I'm really sorry you're feeling this way. You're not alone, and help is available. 
Please contact a crisis hotline immediately:
- National Suicide Prevention Lifeline: 988 (US)
- Crisis Text Line: Text HOME to 741741 (US)
- International Association for Suicide Prevention: https://www.iasp.info/resources/Crisis_Centres/
Your life matters, and there are people who want to support you.
"""


In [None]:
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
nlp = spacy.load("en_core_web_sm")
stemmer = PorterStemmer()
stop_words = set(stopwords.words("english"))

def preprocess_text(text: str) -> str:
    """Lowercase, remove stopwords, and apply stemming."""
    tokens = word_tokenize(text.lower())
    filtered_tokens = [stemmer.stem(word) for word in tokens if word.isalnum() and word not in stop_words]
    return " ".join(filtered_tokens)

def extract_keywords(text: str) -> List[str]:
    """Extract important keywords from the query."""
    doc = nlp(text)
    return [token.text for token in doc if token.pos_ in ["NOUN", "ADJ"] and token.text.lower() not in stop_words]


In [None]:
def fetch_pubmed_articles(query, max_results=5):
    """Fetch relevant articles from PubMed."""
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
    params = {"db": "pubmed", "term": query, "retmode": "json", "retmax": max_results}
    response = requests.get(base_url, params=params)
    
    if response.status_code != 200:
        return []
    
    result_ids = response.json().get("esearchresult", {}).get("idlist", [])
    return [f"PubMed Article ID: {article_id}" for article_id in result_ids]

In [None]:
faiss_index = None
article_texts = []

from langchain.embeddings import HuggingFaceEmbeddings

def build_faiss_index(articles):
    """Build FAISS index and store globally."""
    global faiss_index, article_texts

    if not articles:
        faiss_index = None
        article_texts = []
        return

    
    embeddings = np.array(embedding_model.embed_documents(articles), dtype=np.float32)

    faiss_index = faiss.IndexFlatL2(embeddings.shape[1])
    faiss_index.add(embeddings)
    article_texts = articles

def retrieve_relevant_docs(query):
    """Use only the query input and global index/article data."""
    if faiss_index is None or not article_texts:
        return ["No relevant information found."]

    
    query_embedding = np.array(embedding_model.embed_query(query), dtype=np.float32).reshape(1, -1)
    _, result_ids = faiss_index.search(query_embedding, k=1)

    return [article_texts[i] for i in result_ids[0] if i < len(article_texts)] or ["No relevant information found."]


In [None]:
def route_query(query: str) -> str:
    """Routes queries to Crisis, Mental Health, or Medical RAG."""
    query_lower = query.lower()

    if any(keyword in query_lower for keyword in CRISIS_KEYWORDS):
        return "crisis"
    if any(keyword in query_lower for keyword in MENTAL_KEYWORDS):
        return "mental"
    return "medical"


In [None]:

login(token=os.getenv("HUGGINGFACE_TOKEN"))

tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")


In [None]:
def generate_response(query: str) -> str:
    """Generate a response to the user's health query."""
    query_type = route_query(query)

   
    if query_type == "crisis":
        return CRISIS_RESPONSE

   
    relevant_articles = fetch_pubmed_articles(query)[:3] 
    build_faiss_index(relevant_articles)
    retrieved_docs = retrieve_relevant_docs(query)

   
    context = "\n".join(retrieved_docs) if retrieved_docs else "No relevant medical information found."

   
    prompt = f"""You are a healthcare assistant. Provide accurate, compassionate medical information based on the context below.

User Query: {query}

Medical Context:
{context}

Response:"""

    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=200)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


In [None]:


queries = [
    "I feel really hopeless and anxious.",
    ]

for query in queries:
    response = generate_response(query)
    print(f"**Query:** {query}\n**Response:** {response}\n{'-'*50}")
    


In [None]:

queries = "What are the symptoms of pneumonia?",
response = generate_response(query)
print(f"**Query:** {query}\n**Response:** {response}\n{'-'*50}")
    


In [None]:
queries = [
    
    "I want to end my life."
]
 
for query in queries:
    response = generate_response(query)
    print(f"**Query:** {query}\n**Response:** {response}\n{'-'*50}")
    
