In [None]:
import json
import re
from typing import Any, Dict, List, Union
from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic
from langchain_core.messages import SystemMessage, HumanMessage,AIMessage

## EXAMPLE CALLS USING NATIVE WEBSEARCH TOOLS FOR OPENAI AND ANTHROPIC
1. OpenAI form
2. OpenAI application with JSON
3. Anthropic form
4. Anthropic application with JSON

In [None]:
#### 1. OPEN AI WEBSEARCH CALL
gpt5 = ChatOpenAI(model="gpt-5",
                  use_responses_api=True,    
                  reasoning={"effort": "low"},    
                  model_kwargs={
                      "tools": [{"type": "web_search_preview"}],
                      "text":{"verbosity": "low"},   
                      "max_output_tokens":1024, 
                      },
                  )

messages = [
    SystemMessage(content="You are a news summarizer."),
    HumanMessage(content="Summarize the most recent article you can find on OpenAI news.")
]

oai_response = gpt5.invoke(messages)

print(oai_response)

In [None]:
#### 2. OPEN AI WEBSEARCH WITH JSON SCHEMA
# Structured Responses will not work with websearch so you have to request a JSON response which for 4o and above is 90%+ accurate.
# alternative is Langgraph/langchain AI workflow and break it into multiple calls.


gpt4o = ChatOpenAI(model="gpt-4o",  # or gpt-4.1
                   temperature=0,  #task dependent but for computational/JSON stuff stereotypy good
                   max_tokens=1024,
                   model_kwargs={
                   "tools": [{"type": "web_search_preview"}],
                   "tool_choice": "auto",
                       },
                   )

#A JSON Schema is a formatted JSON definition.
schema = """
{
  "type": "object",
  "properties": {
    "summary": { "type": "string" },
    "sources": { "type": "array", "items": { "type": "string" } }
  },
  "required": ["summary", "sources"],
  "additionalProperties": false
}
"""

system_message = f" You are a news summarizer. Return a JSON object that strictly follows this JSON schema: {schema}"

messages = [
    SystemMessage(content=system_message),
    HumanMessage(content="Summarize the most recent article you can find on OpenAI news.")
]

oai_response = gpt4o.invoke(messages)

print(oai_response)

In [None]:
#### 3. ANTHROPIC WEBSEARCH CALL
# Anthropic web search seems to be less functional-- it seems to directly return tool results with commentary. 
# Documentation says the encrypted strings help Claude track references.

claude = ChatAnthropic(model="claude-opus-4-1-20250805", #"claude-sonnet-4-20250514", #"claude-3-7-sonnet-20250219",  # or ""claude-opus-4-1-20250805", etc.
                       temperature=0,
                       max_tokens=1024,
                       # Pass the server tool directly; no client implementation needed
                       model_kwargs={
                           "tools": [{
                               "type": "web_search_20250305",
                               "name": "web_search",
                               "max_uses": 3,
                               }]
                           },
                       )
messages = [
    SystemMessage(content="You are a news summarizer."),
    HumanMessage(content="Find the most recent  news article you can addressing news from OpenAI.")
]

#Claude seems not to integrate across search results so the first call needs to be content for a second?
claude_response1 = claude.invoke(messages)
messages.append(claude_response1)

messages.append(HumanMessage(content="Summarize the most recent article on OpenAI news that was returned from your previous web search."))
claude = ChatAnthropic(model="claude-sonnet-4-20250514", #"claude-3-7-sonnet-20250219",  # or ""claude-opus-4-1-20250805", etc.
                       temperature=0.5,
                       max_tokens=1024
                      )

claude_response2 = claude.invoke(messages)
print(claude_response2)

In [None]:
#Claudes first response with web search is crazy garbage. Can this be fixed some other way than a second call?
print(claude_response1)

In [None]:
#### 4. ANTHROPIC WEBSEARCH WITH JSON SCHEMA
# Structured Responses will not work with websearch so you have to request a JSON response which unknown accuracy??
# alternative is Langgraph/langchain AI workflow and break it into multiple calls.


claude = ChatAnthropic(model="claude-sonnet-4-20250514", #"claude-3-7-sonnet-20250219",  # or "claude-opus-4-1-20250805", etc.
                       temperature=0,
                       # Pass the server tool directly; no client implementation needed
                       max_tokens=1024,
                       model_kwargs={
                           "tools": [{
                               "type": "web_search_20250305",
                               "name": "web_search",
                               "max_uses": 3,
                               }]
                           },
                       )


system_message = f" You are a news summarizer. "

messages = [
    SystemMessage(content=system_message),
    HumanMessage(content="Summarize the most recent article you can find on OpenAI news.")
]

claude_response1 = claude.invoke(messages)


#Claude seems not to integrate across search results so the first call needs to be content for a second?
schema = """
{
  "type": "object",
  "properties": {
    "summary": { "type": "string" },
    "sources": { "type": "array", "items": { "type": "string" } }
  },
  "required": ["summary", "sources"],
  "additionalProperties": false
}
"""

messages.append(claude_response1)

messages.append(HumanMessage(content=f"Summarize the most recent article on OpenAI news that was returned from your previous web search. \
                            Return a JSON object that strictly follows this JSON schema: {schema}"))
claude = ChatAnthropic(model="claude-sonnet-4-20250514", #"claude-3-7-sonnet-20250219",  # or ""claude-opus-4-1-20250805", etc.
                       temperature=0, #needs to be 0
                       max_tokens=1024
                      )

claude_response2 = claude.invoke(messages)
print(claude_response2)


In [None]:

_JSON_FENCE_RE = re.compile(
    r"```(?:json)?\s*([\s\S]*?)\s*```", re.IGNORECASE
)

def _extract_fenced_json(text: str) -> List[Dict[str, Any]]:
    blocks = []
    for m in _JSON_FENCE_RE.finditer(text):
        payload = m.group(1).strip()
        try:
            blocks.append(json.loads(payload))
        except json.JSONDecodeError:
            # very last-resort: try single->double quotes if it looks like JSON-ish
            try:
                blocks.append(json.loads(payload.replace("'", '"')))
            except Exception:
                pass
    return blocks

def _extract_inline_json(text: str) -> List[Dict[str, Any]]:
    """
    Lightweight balanced-brace extractor: scans text, captures { ... } spans,
    and attempts json.loads on each. This is conservative (skips when invalid).
    """
    out = []
    stack = []
    start_idx = None
    for i, ch in enumerate(text):
        if ch == '{':
            if not stack:  # starting a new candidate
                start_idx = i
            stack.append('{')
        elif ch == '}':
            if stack:
                stack.pop()
                if not stack and start_idx is not None:
                    candidate = text[start_idx:i+1]
                    # ignore obvious markdown artifacts
                    if "```" in candidate:
                        continue
                    try:
                        out.append(json.loads(candidate))
                    except json.JSONDecodeError:
                        # last-resort fix for single quotes
                        try:
                            out.append(json.loads(candidate.replace("'", '"')))
                        except Exception:
                            pass
                    start_idx = None
    return out

def _gather_text_parts(content: Union[str, List[Dict[str, Any]]]) -> str:
    if isinstance(content, str):
        return content
    # content is a list of parts: [{'type': 'text', 'text': '...'}, ...]
    parts = []
    for p in content:
        if isinstance(p, dict) and p.get("type") == "text":
            parts.append(p.get("text", ""))
    return "\n".join(parts).strip()

def parse_ai_message_to_json_blocks(msg: AIMessage) -> Dict[str, Any]:
    """
    Returns a dict:
      {
        "json_blocks": [ {...}, {...} ],
        "citations": [ {"title":..., "url":...}, ... ],
        "tool_outputs": [ {...}, ... ],
        "raw_text": "<combined text content>"
      }

    example code:
        oai_res = oai.invoke(messages)
        parsed = parse_ai_message_to_json_blocks(oai_res)  # result is your AIMessage
        print("Found JSON blocks:", len(parsed["json_blocks"]))
        for i, block in enumerate(parsed["json_blocks"], 1):
            print(f"\n# Block {i}\n", json.dumps(block, indent=2))
        
        print("\nCitations:", parsed["citations"])
        print("\nTool outputs:", parsed["tool_outputs"])
    """
    raw_text = _gather_text_parts(msg.content)

    # 1) fenced JSON blocks
    json_blocks = _extract_fenced_json(raw_text)

    # 2) inline JSON (only if we didn't get any fenced, or to collect more)
    inline_blocks = _extract_inline_json(raw_text)
    # Avoid duplicates by stringifying
    seen = {json.dumps(b, sort_keys=True) for b in json_blocks}
    for b in inline_blocks:
        s = json.dumps(b, sort_keys=True)
        if s not in seen:
            json_blocks.append(b)
            seen.add(s)

    # 3) citations from annotations (if present in parts)
    citations: List[Dict[str, str]] = []
    if isinstance(msg.content, list):
        for p in msg.content:
            for ann in p.get("annotations", []) if isinstance(p, dict) else []:
                if ann.get("type") == "url_citation":
                    citations.append({
                        "title": ann.get("title", ""),
                        "url": ann.get("url", "")
                    })

    # 4) tool outputs from additional_kwargs
    tool_outputs = []
    ak = getattr(msg, "additional_kwargs", {}) or {}
    for t in ak.get("tool_outputs", []) or []:
        tool_outputs.append(t)

    return {
        "json_blocks": json_blocks,
        "citations": citations,
        "tool_outputs": tool_outputs,
        "raw_text": raw_text,
    }
    

In [None]:
def extract_urls(text: str) -> List[str]:
    # quick URL scrape for both vendors’ text
    return re.findall(r'https?://\S+', text)

def extract_text_from_ai_message(msg: AIMessage) -> str:
    """
     Handles OpenAI (string) and Anthropic (list of content blocks)
    example code:
        msg = AIMessage = claude.invoke(messages)
        text = extract_text_from_ai_message(msg)
    """
    if isinstance(msg.content, str):
        return msg.content
    parts = []
    for p in msg.content:
        if isinstance(p, dict) and p.get("type") == "text":
            parts.append(p.get("text", ""))
    return "\n".join(parts).strip()

In [None]:
claude = ChatAnthropic(
    model="claude-3-7-sonnet-20250219",  # or "claude-sonnet-4-20250514", "claude-opus-4-1-20250805", etc.
    temperature=0,
    # Pass the server tool directly; no client implementation needed
    model_kwargs={
        "tools": [{
            "type": "web_search_20250305",
            "name": "web_search",
            "max_uses": 3,
        }]
    },
)
messages = [
    SystemMessage(content="Return ONLY JSON: { answer: string, sources: string[] }"),
    HumanMessage(content="What is the most recent major AI policy update? Include links."),
]
msg = AIMessage = claude.invoke(messages)
text = extract_text_from_ai_message(msg)
print(text)