In [2]:
import os
import base64
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
import json

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["LANGSMITH_API_KEY"] = os.getenv("LANGSMITH_API_KEY")
os.environ["LANGSMITH_TRACING"]="true"
os.environ["LANGSMITH_ENDPOINT"]=os.getenv("LANGSMITH_ENDPOINT")
os.environ["LANGSMITH_PROJECT"]=os.getenv("LANGSMITH_PROJECT")

client = OpenAI()

In [3]:
# Function to encode the image
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

image_path = "img/example.jpg"

base64_image = encode_image(image_path)

In [4]:
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4.1", temperature=0)

In [5]:
from typing import List, Optional, Dict
from pydantic import BaseModel
from langchain.schema import BaseMessage, HumanMessage, AIMessage

class State(BaseModel):
    conversation: Optional[list[BaseMessage]] = []
    user_input: str
    encoded_image: Optional[str] = None
    image_details: Optional[str] = None
    probability: Optional[Dict[str, float]] = None
    disease_details: Optional[Dict[str, Dict[str, str]]] = None

In [6]:
initial_state = State(
    conversation=[],
    user_input="Can you tell me What is this?",
    encoded_image=base64_image,
    image_details="",
    probability={},
)


In [7]:
ANALYSE_IMAGE_PROMPT = """
Act as an AI dermatology analysis assistant. You are provided with an image of a skin condition.
Your task is to analyze the image and provide a structured report covering the following points. 
**Do not provide medical advice or diagnosis.** Your analysis is purely based on visual information for informational purposes.

### **User Description / Input**
"{user_input}"

Please structure your response using the following headings:

1.  **Image Quality Assessment:**
    * Evaluate the suitability of the image for analysis (e.g., clarity, lighting, focus, resolution, obstructions). 
    * Is the image quality sufficient for a meaningful preliminary visual analysis? (Yes/No/Partially)
    * Provide a brief justification for your assessment.

2.  **Possible Body Part Identification:**
    * Based on visual cues (skin texture, hair patterns, contours), suggest the most likely body part shown in the image. If uncertain, state it.

3.  **Image Segmentation and Region Description:**
    * Identify and describe the distinct visual regions in the image (e.g., apparently normal skin, the primary area of interest/lesion, surrounding skin, background elements).

4.  **Analysis of Potential Area(s) of Concern:**
    * Focus on the area(s) that appear anomalous or are infected with skin diseases.
    * Describe their visual characteristics in detail:
        * **Color(s):** (e.g., red, brown, black, white, yellow, flesh-toned, variations)
        * **Morphology/Shape:** (e.g., round, oval, irregular, linear)
        * **Borders:** (e.g., well-defined, ill-defined, raised, smooth)
        * **Texture:** (e.g., smooth, rough, scaly, crusted, ulcerated, bumpy, waxy)
        * **Size:** (Provide an estimated size relative to the image frame if possible, or describe as small/large)
        * **Distribution/Pattern:** (e.g., single lesion, multiple lesions, clustered, scattered, symmetrical)
        * **Signs of Inflammation:** (e.g., redness, swelling, warmth - infer warmth cautiously based on redness/swelling)

5.  **Initial Visual Hypothesis (Non-Diagnostic):**
    * Based *strictly* on the visual patterns and characteristics observed above, provide a preliminary hypothesis about the *nature* of the findings (e.g., suggests inflammation, pigmentation anomaly, possible infection, characteristics consistent with a benign growth, features requiring further investigation).
    * **Crucially, DO NOT name specific diseases or provide a diagnosis.** Frame this section carefully as observations requiring professional medical evaluation.

**Mandatory Disclaimer:** Reiterate clearly at the end of your response that this analysis is based solely on visual data, is not a substitute for professional medical advice, diagnosis, or treatment, and the user must consult a qualified healthcare provider (like a dermatologist) for any health concerns.
"""

In [8]:
def analyse_image(state: State) -> State:
    completion = client.chat.completions.create(
        model="gpt-4.1",
        messages=[
            {
                "role": "user",
                "content": [
                    { "type": "text", "text": ANALYSE_IMAGE_PROMPT.format(user_input=state.user_input)},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{state.encoded_image}",
                        },
                    },
                ],
            }
        ],
        # max_tokens=1000
    )
    state.image_details = completion.choices[0].message.content
    return state

In [9]:
initial_state = analyse_image(initial_state)

In [10]:
PROBABILITY_ESTIMATION_PROMPT = """
You are a dermatology reasoning agent. Your goal is to return the **five most likely dermatological conditions** based on the visual analysis, hypothesis, and conversation.

Use the following inputs:

**Image Details:**
{image_details}

**Initial Visual Hypothesis:**
{initial_hypothesis}

**Conversation History:**
{conversation_history}

---

### Task Instructions:

1. Identify 5 possible skin diseases that best match the visual and contextual data.
2. Assign a probability (from 0.0 to 1.0) to each disease, ensuring the total adds up to 1.
3. Justify your top choices internally and return **only** the dictionary as the final structured output.

**Output Format:**

Return a dictionary in this format:

```json
{{
  "Psoriasis": 0.40,
  "Eczema": 0.25,
  "Tinea Corporis": 0.15,
  "Lichen Planus": 0.10,
  "Contact Dermatitis": 0.10
}}
"""

In [11]:
def generate_hypothesis(state: State):
    
    prompt_template = PromptTemplate(
        template=PROBABILITY_ESTIMATION_PROMPT,
        input_variables=["image_details", "initial_hypothesis", "conversation_history"]
    )

    formatted_prompt = prompt_template.format(
        image_details=state.image_details or "Not available.",
        initial_hypothesis=state.probability or {},
        conversation_history=state.conversation or "No conversation history."
    )

    output = llm.invoke([HumanMessage(content=formatted_prompt)])

    cleaned_json = output.content.strip().strip("`").replace("json", "").strip()
    state.probability = json.loads(cleaned_json)
    return state

In [12]:
initial_state = generate_hypothesis(initial_state)

In [13]:
initial_state.probability

{'Vitiligo': 0.6,
 'Pityriasis Alba': 0.15,
 'Post-Inflammatory Hypopigmentation': 0.1,
 'Idiopathic Guttate Hypomelanosis': 0.08,
 'Tinea Versicolor': 0.07}

In [32]:
DISEASE_INFO_QUERY = """\
You are a highly structured medical research assistant.

Extract and summarize only the **causes and symptoms** of "{disease_name}".
Sources: Wikipedia first, then Arxiv.

Strict format:
Summary:
<one‑paragraph — causes & symptoms only>

References:
- Source 1: <Name> - <URL>
- Source 2: <Name> - <URL>
"""

In [38]:
from typing import Dict, List, Optional
import json

from pydantic import BaseModel
from langgraph.graph import Graph, END
from langgraph.prebuilt import ToolNode
from langchain.schema import BaseMessage
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain.agents import Tool, initialize_agent, AgentExecutor
from langchain.agents.agent_types import AgentType
from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun
from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper

In [39]:
vectorstore = Chroma(
    persist_directory="./chroma_diseases",
    embedding_function=OpenAIEmbeddings()
)

wiki_tool = Tool(
    name="Wikipedia",
    func=WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()).run,
    description="Search disease info from Wikipedia."
)
arxiv_tool = Tool(
    name="Arxiv",
    func=ArxivQueryRun(api_wrapper=ArxivAPIWrapper(
        top_k_results=2,
        doc_content_chars_max=500
    )).run,
    description="Search scholarly info from Arxiv."
)

# Wrap them for LangGraph
wiki_node = ToolNode.from_langchain_tool(wiki_tool)
arxiv_node = ToolNode.from_langchain_tool(arxiv_tool)

AttributeError: type object 'ToolNode' has no attribute 'from_langchain_tool'

In [17]:
@node
def ProcessDiseases(state: State, wiki: wiki_node, arxiv: arxiv_node) -> State:
    details: Dict[str, Dict[str, object]] = {}
    for disease, _ in state.probability.items():
        # 1️⃣ Try Chroma
        try:
            hits = vectorstore.similarity_search(disease, k=1)
        except Exception:
            hits = []
        if hits and disease.lower() in hits[0].page_content.lower():
            # existing entry
            meta = hits[0].metadata or {}
            refs = meta.get("references", "[]")
            try:
                ref_list = json.loads(refs)
            except Exception:
                ref_list = [refs] if isinstance(refs, str) else []
            details[disease] = {
                "summary": hits[0].page_content,
                "references": ref_list
            }
            continue

        # 2️⃣ Not found → invoke agent
        agent = AgentExecutor.from_agent_and_tools(
            agent=initialize_agent(
                [wiki_tool, arxiv_tool],
                llm,
                agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
                verbose=False
            ).agent,
            tools=[wiki_tool, arxiv_tool],
            verbose=False,
            handle_parsing_errors=True,
            max_iterations=2,
            max_execution_time=120
        )
        query = DISEASE_INFO_QUERY.format(disease_name=disease)
        try:
            out = agent.invoke({"input": query}).get("output", "")
        except Exception as e:
            out = f"Error: {e}"

        # 3️⃣ Parse output
        lines = out.splitlines()
        ref_lines = [l for l in lines if l.lower().startswith("- source")]
        summary = "\n".join([l for l in lines if l not in ref_lines]).strip()

        # 4️⃣ Store back to Chroma
        meta = {
            "disease": disease,
            "source": "wikipedia+arxiv",
            "references": json.dumps(ref_lines)
        }
        try:
            vectorstore.add_documents([
                Document(page_content=summary, metadata={k: str(v) for k, v in meta.items()})
            ])
        except Exception:
            pass

        details[disease] = {"summary": summary, "references": ref_lines}

    state.disease_details = details
    return state

# --- 4. Build & Run Graph ------------------------------------------------------
graph = Graph(name="disease_info_one_node")
graph.include_tool_node(wiki_node)
graph.include_tool_node(arxiv_node)
graph.add_flow(ProcessDiseases)

def fetch_all_disease_details(state: State) -> State:
    return graph.run(state=state)

In [30]:
initial_state = fetch_disease_details_from_state(initial_state)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to search Wikipedia for information on the causes and symptoms of vitiligo.
Action: Wikipedia
Action Input: Vitiligo[0m
Observation: [36;1m[1;3mPage: Vitiligo
Summary: Vitiligo (, vi-ti-leye-goh) is a chronic autoimmune disorder that causes patches of skin to lose pigment or color. The cause of vitiligo is unknown, but it may be related to immune system changes, genetic factors, stress, or sun exposure. Treatment options include topical medications, light therapy, surgery and cosmetics. The condition can show up on any skin type as a light peachy color and can appear on any place on the body in all sizes. The spots on the skin known as vitiligo are also able to “change” as spots lose and regain pigment; they will stay in relatively the same areas but can move over time and some big patches can move through the years but never disappear overnight.

Page: Uncle Ruckus
Summary: Uncle Ruckus (born July 4, 1939)

In [31]:
initial_state.disease_details

{'Vitiligo': {'summary': 'Agent stopped due to iteration limit or time limit.',
  'references': []},
 'Pityriasis Alba': {'summary': 'Agent stopped due to iteration limit or time limit.',
  'references': []},
 'Post-Inflammatory Hypopigmentation': {'summary': 'Agent stopped due to iteration limit or time limit.',
  'references': []},
 'Idiopathic Guttate Hypomelanosis': {'summary': 'Agent stopped due to iteration limit or time limit.',
  'references': []},
 'Tinea Versicolor': {'summary': 'Agent stopped due to iteration limit or time limit.',
  'references': []}}