In [3]:
!pip install transformers
!pip install pinecone-client
!pip install requests


Collecting pinecone-client
  Downloading pinecone_client-5.0.1-py3-none-any.whl.metadata (19 kB)
Collecting pinecone-plugin-inference<2.0.0,>=1.0.3 (from pinecone-client)
  Downloading pinecone_plugin_inference-1.1.0-py3-none-any.whl.metadata (2.2 kB)
Collecting pinecone-plugin-interface<0.0.8,>=0.0.7 (from pinecone-client)
  Downloading pinecone_plugin_interface-0.0.7-py3-none-any.whl.metadata (1.2 kB)
Downloading pinecone_client-5.0.1-py3-none-any.whl (244 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.8/244.8 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pinecone_plugin_inference-1.1.0-py3-none-any.whl (85 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.4/85.4 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pinecone_plugin_interface-0.0.7-py3-none-any.whl (6.2 kB)
Installing collected packages: pinecone-plugin-interface, pinecone-plugin-inference, pinecone-client
Successfully installed pinecone-client-5.0.

In [12]:
# Imports
from transformers import AutoTokenizer, AutoModel , pipeline
from pinecone import Pinecone
import torch
import requests


In [5]:
# Pinecone connection details
api_key = "00dfadae-35e0-4fcd-92b7-f88e21899500"  # Replace with your Pinecone API key
index_name = "patient-symptoms"
cloud = "aws"
region = "us-east-1"

# Connect to Pinecone
pinecone = Pinecone(api_key=api_key)
index = pinecone.Index(index_name)

# Check if the index is loaded
print(f"Pinecone index '{index_name}' loaded successfully.")


Pinecone index 'patient-symptoms' loaded successfully.


In [6]:
# Load the same embedding model used to create the index
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embedding_tokenizer = AutoTokenizer.from_pretrained(embedding_model_name)
embedding_model = AutoModel.from_pretrained(embedding_model_name).to('cuda' if torch.cuda.is_available() else 'cpu')

# Function to generate embeddings for queries
def generate_embeddings(text):
    inputs = embedding_tokenizer(text, return_tensors="pt", padding=True, truncation=True).to('cuda' if torch.cuda.is_available() else 'cpu')
    embeddings = embedding_model(**inputs).last_hidden_state.mean(dim=1)
    return embeddings.detach().cpu().numpy()

# Test embedding generation for compatibility with Pinecone
test_query = "What symptoms does the patient have?"
test_embedding = generate_embeddings(test_query)
assert test_embedding.shape[1] == 384, "Embedding dimension mismatch with Pinecone index."
print(f"Embedding generated successfully with correct dimension: {test_embedding.shape}")


tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Embedding generated successfully with correct dimension: (1, 384)


In [42]:
def query_pinecone(index, query, top_k=5):
    query_embedding = generate_embeddings(query)
    results = index.query(
        vector=query_embedding.flatten().tolist(),
        top_k=top_k,
        include_metadata=True
    )
    return results

sample_query = "Who is experiencing fever?"
query_results = query_pinecone(index, sample_query)
formatted_context = format_retrieved_context(query_results)
print(f"Retrieved Context: {formatted_context}")
print(query_results)

Retrieved Context: patient_name: Sarah Johnson, symptom: Fever. patient_name: Grace Cooper, symptom: Fever. patient_name: Emily Davis, symptom: Fever. patient_name: rupankar, symptom: 1. Fever. patient_name: Michael Brown, symptom: Vomiting.
{'matches': [{'id': '4e402be733ed99b80be67b7ecf3045f5',
              'metadata': {'patient_name': 'Sarah Johnson', 'symptom': 'Fever'},
              'score': 0.878107369,
              'values': []},
             {'id': '4516ba83f194af97ac8e77fbf67b6c65',
              'metadata': {'patient_name': 'Grace Cooper', 'symptom': 'Fever'},
              'score': 0.878107369,
              'values': []},
             {'id': '99ca02b8b738cfbb89813ad542791543',
              'metadata': {'patient_name': 'Emily Davis', 'symptom': 'Fever'},
              'score': 0.878107369,
              'values': []},
             {'id': '0c9a08caed1f00819224bcb3aae45e7a',
              'metadata': {'patient_name': 'rupankar', 'symptom': '1. Fever'},
              'score

In [43]:
# Load a more capable QA model
qa_model = pipeline(
    "question-answering",
    model="deepset/roberta-base-squad2",
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
)

# Test the QA model with a simple prompt
qa_prompt = {"question": "What is the capital of India?", "context": "India's capital is New Delhi."}
qa_response = qa_model(qa_prompt)
print(f"QA response: {qa_response['answer']}")


config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/496M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/79.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]



QA response: New Delhi


In [55]:
def format_retrieved_context(results):
    """
    Format the retrieved context to group relevant data into a concise, structured string.
    """
    context = ""
    for match in results['matches']:
        patient_name = match['metadata'].get('patient_name', 'Unknown')
        symptom = match['metadata'].get('symptom', 'Unknown')
        context += f"Patient: {patient_name}, Symptom: {symptom}. "
    return context


In [49]:
def answer_query_with_rag(index, question):
    # Retrieve context from Pinecone
    query_results = query_pinecone(index, question)
    context = format_retrieved_contextt(query_results)

    # Split the context into individual entries for detailed QA
    entries = context.split(". ")  # Each entry contains "Patient: ... Symptom: ..."
    answers = []

    for entry in entries:
        if entry.strip():  # Skip empty entries
            qa_prompt = {"question": question, "context": entry}
            qa_response = qa_model(qa_prompt)
            answer = qa_response.get('answer', None)
            if answer and answer not in answers:  # Avoid duplicates
                answers.append(answer)

    # Return aggregated answers as a formatted string
    return ", ".join(answers)


In [61]:
# Query the RAG pipeline with a multi-match question
user_query = "Who are experiencing fever?"
response = answer_query_with_rag(index, user_query)
print(f"Final Answer: {response}")


Final Answer: Sarah Johnson, Emily Davis, Grace Cooper, rupankar, Fever, Patient: Laura Moore
