# Simple CLM Dataset Search Agent with Logfire

This notebook demonstrates a basic agent that can:
1. Search for datasets based on topics
2. Answer questions about dataset metadata
3. Maintain conversation history

## Step 0: Setup Logfire

Run this in your terminal:
```bash
logfire auth
```

This opens a browser to authenticate and saves your credentials. You only need to do this once.


### Step 1: Install Required Packages

In [None]:
!pip -q install python-dotenv ipywidgets pydantic-ai fastmcp openai nest-asyncio logfire opentelemetry-instrumentation-openai opentelemetry-instrumentation-httpx

### Step 2: Import and Setup

In [None]:
import asyncio
import os
import nest_asyncio
from pydantic_ai import Agent, RunContext
from fastmcp import Client
from dataclasses import dataclass
from typing import Optional
import ipywidgets as widgets
from IPython.display import display, clear_output
from datetime import datetime
import logfire


import logfire
from opentelemetry.instrumentation.openai import OpenAIInstrumentor
from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor

# Configure Logfire - Cloud only, no local logs
logfire.configure(
    service_name="ndp-clm-agent",
)

# Instrument OpenAI and HTTPX after logfire configuration
OpenAIInstrumentor().instrument()
HTTPXClientInstrumentor().instrument()

print("‚úì Everything initialized with cloud-only logging")
print("‚úì OpenAI API calls will be logged to Logfire cloud")
print("‚úì HTTP requests (MCP) will be logged to Logfire cloud")
print("‚úì No local logs created")

nest_asyncio.apply()

print("‚úì Everything initialized and connected to Logfire cloud")

### Step 3: API Keys and MCP

In [None]:
from dotenv import load_dotenv

load_dotenv()

# Choose your model: "openai" or "nrp"
MODEL = "openai"  # Change to "nrp" to use Qwen3

# Check API keys based on model choice
if MODEL == "openai":
    openai_key = os.getenv('OPENAI_API_KEY')
    if not openai_key:
        print("‚ö†Ô∏è Warning: OPENAI_API_KEY not set!")
    else:
        print("‚úì OpenAI API key found - Using GPT-4o-mini")
elif MODEL == "nrp":
    nrp_key = os.getenv('NRP_API_KEY')
    if not nrp_key:
        print("‚ö†Ô∏è Warning: NRP_API_KEY not set!")
    else:
        print("‚úì NRP API key found - Using Qwen3")

# Initialize MCP client
mcp_client = Client("https://wenokn.fastmcp.app/mcp")

@dataclass
class AgentContext:
    current_dataset: Optional[dict] = None

print("‚úì MCP client ready")

### Step 4: Create Agent with Logging

In [None]:
def get_model_config(model_name: str = "openai"):
    if model_name == "nrp":
        os.environ['OPENAI_BASE_URL'] = 'https://ellm.nrp-nautilus.io/v1'
        os.environ['OPENAI_API_KEY'] = os.getenv('NRP_API_KEY', '')
        return 'openai:qwen3'
    else:
        if 'OPENAI_BASE_URL' in os.environ:
            del os.environ['OPENAI_BASE_URL']
        os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', '')
        return 'openai:gpt-4o-mini'

agent = Agent(
    model=get_model_config(MODEL),
    deps_type=AgentContext,
    system_prompt="""You are a helpful assistant that helps users find and learn about California Landscape Metrics datasets.

You have access to a search_datasets tool that can find relevant datasets based on user queries.

When a user asks about a topic:
1. Use the search_datasets tool to find the most relevant dataset
2. Present the top result with key information (title, description, units)
3. Answer any follow-up questions about the dataset metadata

Be concise and helpful!"""
)

@agent.tool
async def search_datasets(
    ctx: RunContext[AgentContext],
    query: str,
    top_k: int = 3
) -> dict:
    """Search for datasets related to the query."""
    with logfire.span("search_datasets", query=query, top_k=top_k):
        async with mcp_client:
            result = await mcp_client.call_tool(
                "search_datasets",
                {"query": query, "top_k": top_k}
            )
            
            data = result.data
            if data.get('success') and data.get('datasets'):
                best_dataset = data['datasets'][0]
                ctx.deps.current_dataset = best_dataset
                
                logfire.info("dataset_found", dataset_title=best_dataset.get('title'), query=query)
                
                return {
                    'success': True,
                    'top_dataset': best_dataset,
                    'alternatives': data['datasets'][1:] if len(data['datasets']) > 1 else [],
                    'message': f"Found: {best_dataset['title']}"
                }
            else:
                logfire.warning("no_datasets_found", query=query)
                return {
                    'success': False,
                    'message': 'No datasets found',
                    'error': data.get('error', 'Unknown error')
                }

print("‚úì Agent created with automatic Logfire logging")

### Step 5: Conversational Agent

In [None]:
class ConversationalAgent:
    def __init__(self, agent, model_name="openai"):
        self.agent = agent
        self.model_name = model_name
        self.history = []
        # NRP Qwen3 needs longer timeout
        self.default_timeout = 180 if model_name == "nrp" else 60
        
        logfire.info("session_started", model_name=model_name, timeout=self.default_timeout)
    
    async def ask(self, question: str, timeout: int = None) -> str:
        if timeout is None:
            timeout = self.default_timeout
        
        with logfire.span("ask_question", question=question[:100]):
            if self.history:
                full_input = "\n".join(self.history) + f"\nUser: {question}"
            else:
                full_input = f"User: {question}"
            
            try:
                result = await asyncio.wait_for(
                    self.agent.run(full_input, deps=AgentContext()),
                    timeout=timeout
                )
                response = result.output if hasattr(result, 'output') else str(result)
                
                self.history.append(f"User: {question}")
                self.history.append(f"Assistant: {response}")
                
                logfire.info("response_generated", response_length=len(response))
                return response
                
            except asyncio.TimeoutError:
                logfire.error("timeout", timeout=timeout)
                return f"Error: Request timed out after {timeout} seconds."
            except Exception as e:
                logfire.error("error", error_type=type(e).__name__, error_message=str(e), exc_info=True)
                return f"Error: {type(e).__name__}: {str(e)}"
    
    def clear_history(self):
        self.history = []
        logfire.info("history_cleared")

conv_agent = ConversationalAgent(agent, model_name=MODEL)
print(f"‚úì Conversational agent ready with {MODEL}")

### Step 6: Chat Interface

In [None]:
class SimpleChatInterface:
    def __init__(self, agent):
        self.agent = agent
        self.messages = []
        
        self.output_area = widgets.VBox(
            layout=widgets.Layout(border='1px solid #ddd', height='400px', overflow_y='auto', padding='10px', margin='10px 0')
        )
        
        self.input_box = widgets.Textarea(
            placeholder='Ask about datasets...',
            layout=widgets.Layout(width='100%', height='80px')
        )
        
        self.send_button = widgets.Button(description='Send', button_style='primary')
        self.clear_button = widgets.Button(description='Clear', button_style='warning')
        self.status_label = widgets.HTML(value="‚úÖ Ready")
        
        self.send_button.on_click(self.on_send)
        self.clear_button.on_click(self.on_clear)
        
        button_row = widgets.HBox([self.send_button, self.clear_button, self.status_label])
        self.interface = widgets.VBox([
            widgets.HTML("<h3>ü§ñ Dataset Search Agent</h3>"),
            self.output_area,
            self.input_box,
            button_row
        ])
        
        # self.add_message("Welcome! Ask me about California Landscape Metrics datasets.", "system")
        # Welcome message
        self.add_message(
            "Welcome! I can help you find California Landscape Metrics datasets.\n\n"
            "Try asking:\n"
            "‚Ä¢ Find datasets about carbon turnover\n"
            "‚Ä¢ What datasets are available for burn probability?\n"
            "‚Ä¢ Tell me about the units used in this dataset\n"
            "‚Ä¢ What's the description of this dataset?",
            "system"
        )
    
    def add_message(self, text, role="user"):
        timestamp = datetime.now().strftime("%H:%M:%S")
        
        colors = {
            "user": ("#007bff", "üë§", "You", "#e7f3ff"),
            "assistant": ("#28a745", "ü§ñ", "Agent", "#e8f5e9"),
            "system": ("#6c757d", "‚ÑπÔ∏è", "System", "#f8f9fa")
        }
        
        color, icon, label, bg = colors.get(role, colors["system"])
        
        message = widgets.HTML(
            value=f"""<div style='margin: 10px 0; padding: 10px; background: {bg}; border-radius: 8px; border-left: 4px solid {color};'>
                <div style='display: flex; justify-content: space-between; margin-bottom: 5px;'>
                    <strong style='color: {color};'>{icon} {label}</strong>
                    <span style='color: #999; font-size: 0.85em;'>{timestamp}</span>
                </div>
                <div style='white-space: pre-wrap;'>{text}</div>
            </div>"""
        )
        
        self.messages.append(message)
        self.output_area.children = tuple(self.messages)
    
    def on_send(self, button):
        question = self.input_box.value.strip()
        if not question:
            return
        
        self.add_message(question, "user")
        self.input_box.value = ""
        
        self.send_button.disabled = True
        self.input_box.disabled = True
        self.status_label.value = "<span style='color: orange;'>‚è≥ Thinking...</span>"
        
        try:
            response = asyncio.get_event_loop().run_until_complete(self.agent.ask(question))
            self.add_message(response, "assistant")
            self.status_label.value = "<span style='color: green;'>‚úÖ Ready</span>"
        except Exception as e:
            self.add_message(f"Error: {str(e)}", "system")
            self.status_label.value = "<span style='color: red;'>‚ùå Error</span>"
        finally:
            self.send_button.disabled = False
            self.input_box.disabled = False
    
    def on_clear(self, button):
        self.messages = []
        self.agent.clear_history()
        self.output_area.children = tuple(self.messages)
        self.add_message("Chat cleared!", "system")
    
    def display(self):
        clear_output(wait=True)
        display(self.interface)

print("‚úì Chat interface ready")

### Step 7: Launch Chat

In [None]:
chat = SimpleChatInterface(conv_agent)
chat.display()

### Step 8: View Logs

In [None]:
logfire.info("setup_complete", message="Visit https://logfire.pydantic.dev to view all logs")
print("‚úì Setup complete!")
print("\nüìä View your logs at: https://logfire.pydantic.dev")
print("\nAll CLM-MCP communications are automatically logged:")
print("  ‚Ä¢ Dataset searches")
print("  ‚Ä¢ Agent responses")
print("  ‚Ä¢ Errors and timeouts")
print("  ‚Ä¢ Performance metrics")