# Module 1: Basics - Parsing LLM Outputs

This module teaches you how to:
1. Parse structured outputs from LLMs
2. Use Pydantic models for validation
3. Handle parsing errors
4. Extract specific information from responses

## Setup

In [None]:
from langchain_ollama import ChatOllama
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import PydanticOutputParser, CommaSeparatedListOutputParser
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from pydantic import BaseModel, Field
from typing import List
from dotenv import load_dotenv
import os

load_dotenv()

# Initialize local Ollama model
llm = ChatOllama(
    model="moondream:latest",
    temperature=0.7,
    base_url="http://localhost:11434"
)

print("Setup complete! Using local Ollama model: moondream")

## Define Pydantic Models

Pydantic models define the structure of the output we expect from the LLM.

In [None]:
# Define Pydantic models for structured output

class Person(BaseModel):
    """Information about a person"""
    name: str = Field(description="The person's full name")
    age: int = Field(description="The person's age")
    occupation: str = Field(description="The person's job or role")
    hobbies: List[str] = Field(description="List of hobbies")

class Recipe(BaseModel):
    """Recipe information"""
    name: str = Field(description="Name of the recipe")
    ingredients: List[str] = Field(description="List of ingredients")
    steps: List[str] = Field(description="Cooking steps")
    prep_time: int = Field(description="Preparation time in minutes")

print("Pydantic models defined!")

## Example 1: Pydantic Output Parser

Use `PydanticOutputParser` to get structured, validated output from the LLM.

In [None]:
# Create a parser for the Person model
parser = PydanticOutputParser(pydantic_object=Person)

# View the format instructions that will be sent to the LLM
print("Format Instructions for LLM:")
print(parser.get_format_instructions())

In [None]:
# Create a prompt that includes format instructions
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. {format_instructions}"),
    ("human", "Tell me about a fictional person named Alex who is a software engineer.")
])

# Format the prompt with parser instructions
formatted_prompt = prompt.format_messages(
    format_instructions=parser.get_format_instructions()
)

response = llm.invoke(formatted_prompt)
print("Raw Response:")
print(response.content)

In [None]:
# Parse the output
try:
    parsed_output = parser.parse(response.content)
    
    print("\n‚úÖ Parsed Output:")
    print(f"  Name: {parsed_output.name}")
    print(f"  Age: {parsed_output.age}")
    print(f"  Occupation: {parsed_output.occupation}")
    print(f"  Hobbies: {', '.join(parsed_output.hobbies)}")
except Exception as e:
    print(f"‚ùå Parsing error: {e}")

## Example 2: List Output Parser

`CommaSeparatedListOutputParser` extracts comma-separated lists from LLM output.

In [None]:
# Parser for comma-separated lists
list_parser = CommaSeparatedListOutputParser()

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. {format_instructions}"),
    ("human", "List 5 programming languages.")
])

formatted_prompt = prompt.format_messages(
    format_instructions=list_parser.get_format_instructions()
)

response = llm.invoke(formatted_prompt)
print(f"Raw Response: {response.content}")

In [None]:
# Parse the list
try:
    parsed_list = list_parser.parse(response.content)
    
    print("\n‚úÖ Parsed List:")
    for i, item in enumerate(parsed_list, 1):
        print(f"  {i}. {item}")
except Exception as e:
    print(f"‚ùå Parsing error: {e}")

## Example 3: Structured Output Parser

Use `ResponseSchema` to define exactly what fields you want in the output.

In [None]:
# Define response schemas
response_schemas = [
    ResponseSchema(name="title", description="Title of the book"),
    ResponseSchema(name="author", description="Author of the book"),
    ResponseSchema(name="genre", description="Genre of the book"),
    ResponseSchema(name="year", description="Publication year"),
]

structured_parser = StructuredOutputParser.from_response_schemas(response_schemas)

print("Format Instructions:")
print(structured_parser.get_format_instructions())

In [None]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a book expert. {format_instructions}"),
    ("human", "Tell me about a famous science fiction book.")
])

formatted_prompt = prompt.format_messages(
    format_instructions=structured_parser.get_format_instructions()
)

response = llm.invoke(formatted_prompt)
print(f"Raw Response:\n{response.content}")

In [None]:
# Parse the structured output
try:
    parsed_output = structured_parser.parse(response.content)
    
    print("\n‚úÖ Parsed Output:")
    for key, value in parsed_output.items():
        print(f"  {key}: {value}")
except Exception as e:
    print(f"‚ùå Parsing error: {e}")

## Example 4: Recipe Parser (Complex Pydantic Model)

Parse more complex structured data like recipes.

In [None]:
recipe_parser = PydanticOutputParser(pydantic_object=Recipe)

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a chef. {format_instructions}"),
    ("human", "Create a simple recipe for {dish}.")
])

formatted_prompt = prompt.format_messages(
    format_instructions=recipe_parser.get_format_instructions(),
    dish="chocolate chip cookies"
)

response = llm.invoke(formatted_prompt)
print(f"Raw Response:\n{response.content}")

In [None]:
# Parse the recipe
try:
    recipe = recipe_parser.parse(response.content)
    
    print(f"\nüç™ Recipe: {recipe.name}")
    print(f"\n‚è±Ô∏è Prep Time: {recipe.prep_time} minutes")
    print(f"\nü•Ñ Ingredients:")
    for ingredient in recipe.ingredients:
        print(f"  - {ingredient}")
    print(f"\nüìù Steps:")
    for i, step in enumerate(recipe.steps, 1):
        print(f"  {i}. {step}")
except Exception as e:
    print(f"‚ùå Parsing error: {e}")
    print("The LLM output might not match the expected format.")

## Example 5: Error Handling in Parsing

Always handle parsing errors gracefully since LLM outputs can be unpredictable.

In [None]:
def safe_parse(parser, response_content, prompt=None):
    """Safely parse LLM output with error handling"""
    try:
        return parser.parse(response_content)
    except Exception as e:
        print(f"‚ö†Ô∏è Primary parsing failed: {e}")
        
        # Try recovery if prompt is provided
        if prompt:
            try:
                return parser.parse_with_prompt(response_content, prompt)
            except Exception as e2:
                print(f"‚ö†Ô∏è Recovery also failed: {e2}")
        
        return None

# Test with a potentially problematic prompt
person_parser = PydanticOutputParser(pydantic_object=Person)

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. {format_instructions}"),
    ("human", "Tell me about someone.")
])

formatted_prompt = prompt.format_messages(
    format_instructions=person_parser.get_format_instructions()
)

response = llm.invoke(formatted_prompt)
print(f"Raw Response:\n{response.content}\n")

# Use safe parsing
result = safe_parse(person_parser, response.content, formatted_prompt)

if result:
    print(f"\n‚úÖ Successfully parsed:")
    print(f"  Name: {result.name}")
    print(f"  Age: {result.age}")
else:
    print("\n‚ùå Could not parse the response")

## Example 6: Using Parsers in Chains

Integrate parsers directly into LCEL chains.

In [None]:
# Create a complete chain with parsing
list_parser = CommaSeparatedListOutputParser()

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. {format_instructions}"),
    ("human", "List 5 {category}.")
])

# Chain: prompt -> llm -> parser
chain = prompt | llm | list_parser

# Test with different categories
categories = ["fruits", "countries", "programming concepts"]

for category in categories:
    try:
        result = chain.invoke({
            "category": category,
            "format_instructions": list_parser.get_format_instructions()
        })
        print(f"\nüìå {category.upper()}:")
        for i, item in enumerate(result, 1):
            print(f"   {i}. {item}")
    except Exception as e:
        print(f"\n‚ùå Failed for {category}: {e}")

## Key Takeaways

- **Output Parsers** structure LLM responses into usable data
- **Pydantic models** provide type safety and validation
- **CommaSeparatedListOutputParser** extracts lists
- **StructuredOutputParser** handles custom schemas
- **Always handle parsing errors** gracefully
- Parsers can be integrated into **LCEL chains**

## Exercise: Create Your Own Parser

Create a Pydantic model for a movie and parse movie information from the LLM.

In [None]:
# Your turn! Create a Movie model and parser

class Movie(BaseModel):
    """Information about a movie"""
    title: str = Field(description="Title of the movie")
    director: str = Field(description="Director's name")
    year: int = Field(description="Release year")
    genre: str = Field(description="Movie genre")
    rating: float = Field(description="Rating out of 10")

movie_parser = PydanticOutputParser(pydantic_object=Movie)

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a movie expert. {format_instructions}"),
    ("human", "Tell me about a classic sci-fi movie.")
])

formatted_prompt = prompt.format_messages(
    format_instructions=movie_parser.get_format_instructions()
)

response = llm.invoke(formatted_prompt)
print(f"Raw Response:\n{response.content}\n")

try:
    movie = movie_parser.parse(response.content)
    print(f"\nüé¨ Movie: {movie.title}")
    print(f"üé• Director: {movie.director}")
    print(f"üìÖ Year: {movie.year}")
    print(f"üé≠ Genre: {movie.genre}")
    print(f"‚≠ê Rating: {movie.rating}/10")
except Exception as e:
    print(f"‚ùå Parsing error: {e}")