In [1]:
## Imports

# Core imports
import os
import json
from typing import List, Optional, Dict, Any
from enum import Enum

# Pydantic imports
from pydantic import BaseModel, Field, validator

# LangChain imports
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser, OutputFixingParser
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.output_parsers import DatetimeOutputParser
from langchain.output_parsers import EnumOutputParser
from langchain_openai import ChatOpenAI
from langchain.schema import OutputParserException

# Optional imports for enhanced functionality
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

In [None]:
## Setup

# Set your OpenAI API key
os.environ["OPENAI_API_KEY"] = " "

# Initialize LLM
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")



In [3]:
## Demo 1: Pydantic Output Parser

# Define Pydantic model for structured output
class PersonInfo(BaseModel):
    name: str = Field(description="person's full name")
    age: int = Field(description="person's age in years")
    occupation: str = Field(description="person's job or profession")
    skills: List[str] = Field(description="list of key skills")
    
    # Optional validator
    @validator('age')
    def age_must_be_positive(cls, v):
        if v <= 0:
            raise ValueError('Age must be positive')
        return v

# Create parser
parser = PydanticOutputParser(pydantic_object=PersonInfo)

# Create prompt with format instructions
prompt = PromptTemplate(
    template="Extract information about the following person:\n{query}\n{format_instructions}",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [4]:
# Example usage
def parse_person_info(text):
    formatted_prompt = prompt.format_prompt(query=text)
    output = llm.predict(formatted_prompt.to_string())
    
    try:
        parsed = parser.parse(output)
        return parsed
    except OutputParserException as e:
        print(f"Parsing error: {e}")
        # Use OutputFixingParser as fallback
        fix_parser = OutputFixingParser.from_llm(parser=parser, llm=llm)
        parsed = fix_parser.parse(output)
        return parsed

# Test
text = "John Smith is a 35-year-old software engineer skilled in Python, Docker, and AWS"
result = parse_person_info(text)
print(f"Name: {result.name}")
print(f"Age: {result.age}")
print(f"Occupation: {result.occupation}")
print(f"Skills: {result.skills}")

Name: John Smith
Age: 35
Occupation: Software Engineer
Skills: ['Python', 'Docker', 'AWS']


In [5]:
## Demo 2: List Output Parser

# Simple comma-separated list parser
list_parser = CommaSeparatedListOutputParser()

list_prompt = PromptTemplate(
    template="List 5 {category}:\n{format_instructions}",
    input_variables=["category"],
    partial_variables={"format_instructions": list_parser.get_format_instructions()}
)

def get_list(category):
    prompt_text = list_prompt.format(category=category)
    output = llm.predict(prompt_text)
    return list_parser.parse(output)

# Test
items = get_list("programming languages")
print("Languages:", items)

Languages: ['Java', 'Python', 'C++', 'JavaScript', 'Ruby']


In [6]:
## Demo 3: Structured Output Parser (Alternative to Pydantic)

# Define response schemas
response_schemas = [
    ResponseSchema(name="product", description="name of the product"),
    ResponseSchema(name="price", description="price in USD"),
    ResponseSchema(name="in_stock", description="whether item is in stock (true/false)")
]

structured_parser = StructuredOutputParser.from_response_schemas(response_schemas)

structured_prompt = PromptTemplate(
    template="Extract product information from: {text}\n{format_instructions}",
    input_variables=["text"],
    partial_variables={"format_instructions": structured_parser.get_format_instructions()}
)

def parse_product(text):
    prompt_text = structured_prompt.format(text=text)
    output = llm.predict(prompt_text)
    return structured_parser.parse(output)

# Test
product_text = "The new iPhone 15 Pro costs $999 and is currently available"
product_info = parse_product(product_text)
print("Product Info:", product_info)

Product Info: {'product': 'iPhone 15 Pro', 'price': '$999', 'in_stock': 'true'}


In [7]:
## Demo 4: Complex Nested Pydantic Model

class Address(BaseModel):
    street: str
    city: str
    state: str
    zip_code: str

class Company(BaseModel):
    name: str = Field(description="company name")
    industry: str = Field(description="primary industry")
    employees: int = Field(description="number of employees")
    headquarters: Address = Field(description="company headquarters location")
    departments: List[str] = Field(description="list of main departments")

company_parser = PydanticOutputParser(pydantic_object=Company)

company_prompt = PromptTemplate(
    template="Extract company information from: {text}\n{format_instructions}",
    input_variables=["text"],
    partial_variables={"format_instructions": company_parser.get_format_instructions()}
)

def parse_company(text):
    prompt_text = company_prompt.format(text=text)
    output = llm.predict(prompt_text)
    return company_parser.parse(output)

# Test
company_text = """
TechCorp is a software company with 500 employees headquartered at 
123 Main St, San Francisco, CA 94105. They operate in the technology 
industry with departments including Engineering, Sales, Marketing, and HR.
"""
company = parse_company(company_text)
print(f"Company: {company.name}")
print(f"Employees: {company.employees}")
print(f"HQ City: {company.headquarters.city}")

Company: TechCorp
Employees: 500
HQ City: San Francisco


In [8]:
## Demo 5: Enum Parser

class Priority(Enum):
    HIGH = "high"
    MEDIUM = "medium"
    LOW = "low"

enum_parser = EnumOutputParser(enum=Priority)

enum_prompt = PromptTemplate(
    template="What priority level is this task: {task}\n{format_instructions}",
    input_variables=["task"],
    partial_variables={"format_instructions": enum_parser.get_format_instructions()}
)

def get_priority(task):
    prompt_text = enum_prompt.format(task=task)
    output = llm.predict(prompt_text)
    return enum_parser.parse(output)

# Test
priority = get_priority("Fix critical production bug")
print(f"Priority: {priority.value}")

Priority: high


In [9]:
## Quick Test Function

def test_all_parsers():
    print("Testing all parsers...")
    
    # Test each parser with simple examples
    try:
        # Pydantic
        person = parse_person_info("Alice Johnson, 28, data scientist, knows Python and SQL")
        print(f"1. Pydantic: {person.name} - {person.occupation}")
        
        # List
        langs = get_list("databases")
        print(f"2. List: {langs[:3]}")
        
        # Structured
        product = parse_product("MacBook Pro M3 available for $2499")
        print(f"3. Structured: {product}")
        
        # Enum
        prio = get_priority("Update documentation")
        print(f"4. Enum: {prio.value}")
        
    except Exception as e:
        print(f"Error: {e}")

# Run test
test_all_parsers()

Testing all parsers...
1. Pydantic: Alice Johnson - data scientist
2. List: ['1. MySQL', '2. PostgreSQL', '3. MongoDB']
3. Structured: {'product': 'MacBook Pro M3', 'price': '$2499', 'in_stock': 'true'}
4. Enum: medium


LangChain Output Parser Demos
Demo 1: Pydantic Output Parser

Extracts structured person information from text
Defines typed fields: name, age, occupation, skills list
Includes validation to ensure age is positive
Automatically fixes parsing errors using OutputFixingParser
Returns strongly-typed Python object with full IDE support

Demo 2: List Output Parser

Generates comma-separated lists from prompts
Simple format for brainstorming or enumeration tasks
Example: "List 5 programming languages"
Returns clean Python list ready for iteration
No complex structure needed

Demo 3: Structured Output Parser

Alternative to Pydantic using ResponseSchema definitions
Extracts product information: name, price, availability
Returns dictionary format
Good for quick prototypes without class definitions
Less type safety than Pydantic

Demo 4: Nested Pydantic Model

Handles complex company data with nested address object
Demonstrates multi-level data structures
Includes departments list and headquarters location
Shows how Pydantic handles relationships between objects
Useful for real-world hierarchical data

Demo 5: Enum Parser

Classifies text into predefined categories
Example: task priority (HIGH, MEDIUM, LOW)
Ensures output matches exact enum values
Prevents invalid classifications
Perfect for status fields or category assignments
