In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
import feedparser
podcast_atom_link = "https://api.substack.com/feed/podcast/1084089.rss" # latent space podcastbbbbb
parsed = feedparser.parse(podcast_atom_link)
episode = [ep for ep in parsed.entries if ep['title'] == "Why AI Agents Don't Work (yet) - with Kanjun Qiu of Imbue"][0]
episode_summary = episode['summary']
print(episode_summary[:100])

<p><em>Thanks to the </em><em>over 11,000 people</em><em> who joined us for the first AI Engineer Su


In [4]:
from unstructured.partition.html import partition_html
parsed_summary = partition_html(text=''.join(episode_summary)) 
start_of_transcript = [x.text for x in parsed_summary].index("Transcript") + 1
print(f"First line of the transcript: {start_of_transcript}")
text = '\n'.join(t.text for t in parsed_summary[start_of_transcript:])
text = text[:3508] # shortening the transcript for speed & cost

First line of the transcript: 60


In [5]:
text

"Alessio: Hey everyone, welcome to the Latent Space Podcast. This is Alessio, Partner and CTO at Residence at Decibel Partners, and I'm joined by my co-host Swyx, founder of Smol.ai. [00:00:19]\nSwyx: Hey, and today in the studio we have Kanjun from Imbue. Welcome. So you and I have, I guess, crossed paths a number of times. You're formerly named Generally Intelligent and you've just announced your rename, rebrand in huge, humongous ways. So congrats on all of that. And we're here to dive in into deeper detail on Imbue. We like to introduce you on a high level basis, but then have you go into a little bit more of your personal side. So you graduated your BS at MIT and you also spent some time at the MIT Media Lab, one of the most famous, I guess, computer hacking labs in the world. Then you graduated MIT and you went straight into BizOps at Dropbox, where you're eventually chief of staff, which is a pretty interesting role we can dive into later. And then it seems like the founder bug 

## OpenAI

In [5]:
import os
from openai import OpenAI

client = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("OPENAI_API_KEY"),
)

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "Say this is a test",
        }
    ],
    model="gpt-3.5-turbo",
)

## Marvin

### Minimal examples

In [2]:
import marvin
from pydantic import BaseModel

In [10]:
class Location(BaseModel):
    city: str
    state: str

marvin.cast("the big apple", target=Location)

Location(city='New York', state='New York')

In [11]:
class Location(BaseModel):
    city: str
    state: str

locations = marvin.extract(
    "They've got a game in NY, then they go to DC before Los Angeles.",
    target=Location
)

In [12]:
locations

[Location(city='New York', state='NY'),
 Location(city='Washington', state='DC'),
 Location(city='Los Angeles', state='CA')]

In [13]:
class Location(BaseModel):
    city: str
    state: str

locations = marvin.generate(
    n=4,
    target=Location,
    instructions="US cities named after presidents",
)

In [15]:
locations

[Location(city='Jackson', state='Mississippi'),
 Location(city='Madison', state='Wisconsin'),
 Location(city='Jefferson City', state='Missouri'),
 Location(city='Lincoln', state='Nebraska')]

In [16]:
@marvin.fn
def sentiment(text: str) -> float:
    """
    Returns a sentiment score for `text` on a 
    scale of -1.0 (negative) to 1.0 (positive)
    """

In [18]:
print(sentiment("I love Marvin!")) # 0.8
print(sentiment("This example could use some work...")) # -0.2

0.8
-0.2


In [19]:
import marvin

img = marvin.beta.Image(
    'https://images.unsplash.com/photo-1548199973-03cce0bbc87b',
)

result = marvin.beta.extract(img, target=str, instructions='dog breeds')
result

['Pembroke Welsh Corgi', 'Yorkshire Terrier mix']

In [4]:
# Retry example
from pydantic import BaseModel, Field, field_validator


class Actor(BaseModel):
    name: str = Field(description="name of an actor")
    country_origin: str = Field(description="Country they were born in")

    @field_validator('country_origin')
    @classmethod
    def country_upper(cls, v: str) -> str:
        if not v.isupper():
            raise ValueError('Must be all caps!')
        return v


actor_query = "Generate the info for a random actor that is not from the US."

In [5]:
marvin.generate(
    # n=4,
    target=Actor,
    instructions=actor_query,
)

ValidationError: 1 validation error for FormatResponse
value.0.country_origin
  Value error, Must be all caps! [type=value_error, input_value='Ireland', input_type=str]
    For further information visit https://errors.pydantic.dev/2.7/v/value_error

### Blog example

In [12]:
from marvin import model
from pydantic import BaseModel
from typing import Optional, List
from pydantic import Field

In [7]:
class Person(BaseModel):
    name: str
    school: Optional[str] = Field(..., description="The school this person attended")
    company: Optional[str] = Field(..., description="The company this person works for")

@model
class People(BaseModel):
    people: List[Person]
People(text)

People(people=[Person(name='Alessio', school=None, company='Decibel Partners'), Person(name='Swyx', school=None, company='Smol.ai'), Person(name='Kanjun', school='MIT', company='Imbue')])

In [8]:
@model(instructions="Get the following information from the text")
class People(BaseModel):
    people: List[Person]
People(text)



People(people=[Person(name='Alessio', school=None, company='Decibel Partners'), Person(name='Swyx', school=None, company='Smol.ai'), Person(name='Kanjun', school='MIT', company='Imbue')])

In [9]:
class Company(BaseModel):
    name:str

class ResearchPaper(BaseModel):
    paper_name:str = Field(..., description="an academic paper reference discussed")

@model(instructions="Get the following information from the text")
class ExtractedInfo(BaseModel):
    people: List[Person]
    companies: List[Company]
    research_papers: Optional[List[ResearchPaper]]

ExtractedInfo(text)

ExtractedInfo(people=[Person(name='Alessio', school=None, company='Decibel Partners'), Person(name='Swyx', school=None, company='Smol.ai'), Person(name='Kanjun', school='MIT', company='Imbue')], companies=[Company(name='Decibel Partners'), Company(name='Smol.ai'), Company(name='Imbue'), Company(name='Dropbox'), Company(name='Ember'), Company(name='Sorceress'), Company(name='Generally Intelligent'), Company(name='MIT Media Lab'), Company(name='OpenAI')], research_papers=None)

## Pydantic V1

In [13]:
from pydantic.v1 import BaseModel as BaseModelV1

In [16]:
class Location(BaseModelV1):
    city: str
    state: str

marvin.cast("the big apple", target=Location)

PydanticInvalidForJsonSchema: Cannot generate a JsonSchema for core_schema.PlainValidatorFunctionSchema ({'type': 'with-info', 'function': <bound method BaseModel.validate of <class '__main__.Location'>>})

For further information visit https://errors.pydantic.dev/2.7/u/invalid-for-json-schema

## Summary

It looks like Marvin is really slick and easy to use so far.

PRO:
- instantiates Pydantic models with ease
- very simple and intuitive API
- support for assistant and more complex workflow, but at the heart very simple

CON:
- very much focussed on OpenAI
- not the clearest view on model settings, although it seems possible to some extent

Still to verify:
- how will it deal with Vizro Pydantic models
- "pydantic>=2.4.2"