In [2]:
from dotenv import load_dotenv
load_dotenv()

True

## OpenAI

In [5]:
import os
from openai import OpenAI

client = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("OPENAI_API_KEY"),
)

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "Say this is a test",
        }
    ],
    model="gpt-3.5-turbo",
)

## Anthropic

In [3]:
import os
from anthropic import Anthropic

client = Anthropic(
    # This is the default and can be omitted
    api_key=os.environ.get("ANTHROPIC_API_KEY"),
    base_url=os.environ.get("ANTHROPIC_BASE_URL")
)

message = client.messages.create(
    max_tokens=1024,
    messages=[
        {
            "role": "user",
            "content": "Hello, Claude",
        }
    ],
    model="claude-3-opus-20240229",
)
print(message.content)

[TextBlock(text="Hello! It's nice to meet you. How are you doing today?", type='text')]


## Langchain

### Minimal examples

In [4]:
from typing import List

from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field, validator
from langchain_openai import ChatOpenAI

In [5]:
model = ChatOpenAI(temperature=0)

In [6]:
# Define your desired data structure.
class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")

    # You can add custom validation logic easily with Pydantic.
    @validator("setup")
    def question_ends_with_question_mark(cls, field):
        if field[-1] != "?":
            raise ValueError("Badly formed question!")
        return field


# And a query intented to prompt a language model to populate the data structure.
joke_query = "Tell me a joke."

# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=Joke)

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | model | parser

chain.invoke({"query": joke_query})

Joke(setup="Why couldn't the bicycle stand up by itself?", punchline='Because it was two tired!')

In [7]:
# Here's another example, but with a compound typed field.
class Actor(BaseModel):
    name: str = Field(description="name of an actor")
    film_names: List[str] = Field(description="list of names of films they starred in")


actor_query = "Generate the filmography for a random actor."

parser = PydanticOutputParser(pydantic_object=Actor)

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | model | parser

chain.invoke({"query": actor_query})

Actor(name='Tom Hanks', film_names=['Forrest Gump', 'Cast Away', 'Saving Private Ryan', 'Toy Story', 'The Green Mile', 'Apollo 13', 'Philadelphia', 'Captain Phillips', 'Sully', 'The Da Vinci Code'])

In [None]:
# retry failing

In [8]:
from pydantic import (
    BaseModel,
    ValidationError,
    ValidationInfo,
    field_validator,
)

In [9]:
# Here's another example, but with a compound typed field.
class Actor(BaseModel):
    name: str = Field(description="name of an actor")
    country_origin: str = Field(description="Country they were born in")

    @field_validator('country_origin')
    @classmethod
    def country_upper(cls, v: str) -> str:
        if not v.isupper():
            raise ValueError('Must be all caps!')
        return v


actor_query = "Generate the info for a random actor."

parser = PydanticOutputParser(pydantic_object=Actor)

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | model | parser

chain.invoke({"query": actor_query})



OutputParserException: Failed to parse Actor from completion {"name": "Tom Hanks", "country_origin": "United States"}. Got: 1 validation error for Actor
country_origin
  Value error, Must be all caps! [type=value_error, input_value='United States', input_type=str]
    For further information visit https://errors.pydantic.dev/2.7/v/value_error

### Blog example

In [2]:
import feedparser
podcast_atom_link = "https://api.substack.com/feed/podcast/1084089.rss" # latent space podcast
parsed = feedparser.parse(podcast_atom_link)
episode = [ep for ep in parsed.entries if ep['title'] == "Why AI Agents Don't Work (yet) - with Kanjun Qiu of Imbue"][0]
episode_summary = episode['summary']
print(episode_summary[:100])

<p><em>Thanks to the </em><em>over 11,000 people</em><em> who joined us for the first AI Engineer Su


In [3]:
from unstructured.partition.html import partition_html
parsed_summary = partition_html(text=''.join(episode_summary)) 
start_of_transcript = [x.text for x in parsed_summary].index("Transcript") + 1
print(f"First line of the transcript: {start_of_transcript}")
text = '\n'.join(t.text for t in parsed_summary[start_of_transcript:])
text = text[:3508] # shortening the transcript for speed & cost

First line of the transcript: 60


In [4]:
text

"Alessio: Hey everyone, welcome to the Latent Space Podcast. This is Alessio, Partner and CTO at Residence at Decibel Partners, and I'm joined by my co-host Swyx, founder of Smol.ai. [00:00:19]\nSwyx: Hey, and today in the studio we have Kanjun from Imbue. Welcome. So you and I have, I guess, crossed paths a number of times. You're formerly named Generally Intelligent and you've just announced your rename, rebrand in huge, humongous ways. So congrats on all of that. And we're here to dive in into deeper detail on Imbue. We like to introduce you on a high level basis, but then have you go into a little bit more of your personal side. So you graduated your BS at MIT and you also spent some time at the MIT Media Lab, one of the most famous, I guess, computer hacking labs in the world. Then you graduated MIT and you went straight into BizOps at Dropbox, where you're eventually chief of staff, which is a pretty interesting role we can dive into later. And then it seems like the founder bug 

## Pydantic V1

In [10]:
from pydantic.v1 import BaseModel as BaseModelV1

In [11]:
# Here's another example, but with a compound typed field.
class Actor(BaseModelV1):
    name: str = Field(description="name of an actor")
    film_names: List[str] = Field(description="list of names of films they starred in")


actor_query = "Generate the filmography for a random actor."

parser = PydanticOutputParser(pydantic_object=Actor)

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | model | parser

chain.invoke({"query": actor_query})

Actor(name='Tom Hanks', film_names=['Forrest Gump', 'Cast Away', 'Saving Private Ryan', 'Toy Story', 'The Green Mile', 'Apollo 13', 'Philadelphia', 'Captain Phillips', 'Sully', 'The Da Vinci Code'])

## Langchain Anthropic

In [13]:
from langchain_anthropic import ChatAnthropic
model_anthropic = ChatAnthropic(model='claude-3-opus-20240229')

In [14]:
# Here's another example, but with a compound typed field.
class Actor(BaseModel):
    name: str = Field(description="name of an actor")
    film_names: List[str] = Field(description="list of names of films they starred in")


actor_query = "Generate the filmography for a random actor."

parser = PydanticOutputParser(pydantic_object=Actor)

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | model_anthropic | parser

chain.invoke({"query": actor_query})



AuthenticationError: Error code: 401 - {'type': 'error', 'error': {'type': 'authentication_error', 'message': 'invalid x-api-key'}}

## Summary