# Step 1: Prepare sample documents

In [2]:
patient_245 = {
    "title": "Patient Report - ID 245",
    "text": "Patient ID 245 had a blood test on June 18 2025. Hemoglobin levels reached 13.2 g/DL, within normal range. Cholesterol levels were slightly elevated at 210 mg/DL. No signs of infection or inflammation were detected. Overall, the patient is in good health.",
}

patient_246 = {
    "title": "Patient Report - ID 246",
    "text": "Patient ID 246 underwent a blood test on Aug 25 202, Cholestrol levels dropped to 195 mg/DL after medication.",
}

patient_312 = {
    "title": "Patient Report - ID 312",
    "text": "Patient ID 312 had a blood test on July 10 2025. Hemoglobin levels were low at 11.5 g/DL, indicating mild anemia.",
}

patient_411 = {
    "title": "Patient Report - ID 411",
    "text": "Patient ID 411 had a follow-up on Aug 20, 2025.",
}

patient_512 = {
    "title": "Patient Report - ID 512",
    "text": "Patient ID 512 had a X-ray on Sep 5, 2025. The scan showed a minor fracture in the left wrist.",
}

patient_587 = {
    "title": "Patient Report - ID 587",
    "text": "Patient ID 587 had a follow-up on Sep 10, 2025. The patient is recovering well from the surgery.",
}



# Step 2: Test the import and API Key

In [3]:
# !pip install google-generativeai

**How to get Google API Key**

- Open [Google AI Studio](https://aistudio.google.com/app/apikey)
- Navigate to `Dashboard > API Keys` 
- Either Generate New or use existing if available.
- Make sure to login.

In [4]:
import os
import google.generativeai as genai

from dotenv import load_dotenv

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
load_dotenv()

True

In [6]:
# Check and load GEMINI_API_KEY from .env file
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
    raise ValueError("GEMINI_API_KEY environment variable is missing or empty. Please set it before running this notebook.")

In [7]:
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

In [8]:
for model in genai.list_models():
    print(model)

Model(name='models/embedding-gecko-001',
      base_model_id='',
      version='001',
      display_name='Embedding Gecko',
      description='Obtain a distributed representation of a text.',
      input_token_limit=1024,
      output_token_limit=1,
      supported_generation_methods=['embedText', 'countTextTokens'],
      temperature=None,
      max_temperature=None,
      top_p=None,
      top_k=None)
Model(name='models/gemini-1.5-pro-latest',
      base_model_id='',
      version='001',
      display_name='Gemini 1.5 Pro Latest',
      description=('Alias that points to the most recent production (non-experimental) release '
                   'of Gemini 1.5 Pro, our mid-size multimodal model that supports up to 2 '
                   'million tokens.'),
      input_token_limit=2000000,
      output_token_limit=8192,
      supported_generation_methods=['generateContent', 'countTokens'],
      temperature=1.0,
      max_temperature=2.0,
      top_p=0.95,
      top_k=40)
Model(name='m

Out of the models listed, will pick "models/text-embedding-004"

# Step 3: Create an embedding for a patient data

In [None]:
genai.embed_content(
    model="models/text-embedding-004",
    content = patient_245["text"],
    task_type = "retrieval_document"
)

In [None]:
patient_embed = genai.embed_content(
    model="models/text-embedding-004",
    content = patient_245["text"],
    task_type = "retrieval_document"
)

In [None]:
print(patient_embed)

In [None]:
len(patient_embed["embedding"])  # 1024 dimensional embedding

In [9]:
def embed_text(text):
    response = genai.embed_content(
        model="models/text-embedding-004",
        content=text,
        task_type="retrieval_document"
    )
    return response["embedding"]

In [13]:
import pandas as pd

doc = [
    patient_245,
    patient_246,
    patient_312,
    patient_411,
    patient_512,
    patient_587,
]

df = pd.DataFrame(doc)

df

Unnamed: 0,title,text
0,Patient Report - ID 245,Patient ID 245 had a blood test on June 18 202...
1,Patient Report - ID 246,Patient ID 246 underwent a blood test on Aug 2...
2,Patient Report - ID 312,Patient ID 312 had a blood test on July 10 202...
3,Patient Report - ID 411,"Patient ID 411 had a follow-up on Aug 20, 2025."
4,Patient Report - ID 512,"Patient ID 512 had a X-ray on Sep 5, 2025. The..."
5,Patient Report - ID 587,"Patient ID 587 had a follow-up on Sep 10, 2025..."


In [None]:
df["embedding"] = df["text"].apply(embed_text)

In [15]:
df

Unnamed: 0,title,text
0,Patient Report - ID 245,Patient ID 245 had a blood test on June 18 202...
1,Patient Report - ID 246,Patient ID 246 underwent a blood test on Aug 2...
2,Patient Report - ID 312,Patient ID 312 had a blood test on July 10 202...
3,Patient Report - ID 411,"Patient ID 411 had a follow-up on Aug 20, 2025."
4,Patient Report - ID 512,"Patient ID 512 had a X-ray on Sep 5, 2025. The..."
5,Patient Report - ID 587,"Patient ID 587 had a follow-up on Sep 10, 2025..."


In [None]:
def query_similarity_score(query, vector):
    query_embed = embed_text(query)
return float(np.dot(query_embed, vector))


In [17]:
# User query
query = "can you show me the revent lab results for patient id 245?"

In [None]:
df['Embeddings'].apply(lambda vector: query_similarity_score(query, vector))


In [None]:
df_new = df.copy()
df_new['similarity'] = df_new['embedding'].apply(lambda vector: query_similarity_score(query, vector))
df_new = df_new.sort_values(by='similarity', ascending=False)
df_new.head(3)

In [19]:
def most_similar_doc(query):
    df_new = df.copy()
    df_new['Similarity'] = df_new['embedding'].apply(lambda vector: query_similarity_score(query, vector))
    df_new = df_new.sort_values(by='Similarity', ascending=False)
    top_id = df_new.iloc[0]['title']
    top_text = df_new.iloc[0]['text']
    return top_id, top_text

In [21]:
def RAG(query):
    top_id, top_text = most_similar(query)

    llm = genai.GenerativeModel("models/gemini-2.0-flash")
    prompt = (f"Answer this query {query} based on the following context: {top_text}")

    res = llm.generate_context(prompt=prompt)

    print(res)
    source_title = f"Patient Rport - {top_id}"

    return f"{res.text}\n\nSource Document is: {source_title}"

In [None]:
print(RAG(query))