# Construct pydantic model from text input

In [1]:
from pydantic_ai import Agent
agent = Agent(model="google-gla:gemini-2.5-flash")

result = await agent.run("Give me an IT employee working in sweden, kepp it short")
result

AgentRunResult(output='**Astrid Karlsson**\n**Role:** Cloud Solutions Architect\n**Location:** Gothenburg, Sweden\n**Details:** Works for a major SaaS company, specializing in AWS integrations. Known for her problem-solving skills and love for *fika* breaks.')

In [2]:
print(result.output)

**Astrid Karlsson**
**Role:** Cloud Solutions Architect
**Location:** Gothenburg, Sweden
**Details:** Works for a major SaaS company, specializing in AWS integrations. Known for her problem-solving skills and love for *fika* breaks.


In [3]:
from pydantic import BaseModel, Field

class EmployeeModel(BaseModel):
    name: str
    age: int
    salary: int = Field(gt=30_000, lt=50_000)
    position: str

result = await agent.run(
    "Give me an IT employee working in sweden", output_type=EmployeeModel
)
result

AgentRunResult(output=EmployeeModel(name='Bjorn', age=35, salary=45000, position='Software Engineer'))

In [4]:
employee = result.output
employee

EmployeeModel(name='Bjorn', age=35, salary=45000, position='Software Engineer')

In [5]:
employee.name, employee.age, employee.position

('Bjorn', 35, 'Software Engineer')

In [6]:
employee.model_dump()

{'name': 'Bjorn', 'age': 35, 'salary': 45000, 'position': 'Software Engineer'}

In [7]:
employee.model_dump_json()

'{"name":"Bjorn","age":35,"salary":45000,"position":"Software Engineer"}'

In [8]:
print(employee.model_dump_json(indent=2))

{
  "name": "Bjorn",
  "age": 35,
  "salary": 45000,
  "position": "Software Engineer"
}


`Several employees or a list of employees`

In [9]:
result = await agent.run(
    """Give me ten employees in AI and data engineering fields,
    roles can vary, but salary must be between 30000 and 50000""",
    output_type=list[EmployeeModel],
)
employees = result.output
employees

[EmployeeModel(name='Alice Smith', age=30, salary=45000, position='AI Engineer'),
 EmployeeModel(name='Bob Johnson', age=35, salary=48000, position='Data Engineer'),
 EmployeeModel(name='Charlie Brown', age=28, salary=40000, position='ML Engineer'),
 EmployeeModel(name='Diana Prince', age=32, salary=42000, position='Data Scientist (AI Focus)'),
 EmployeeModel(name='Ethan Hunt', age=38, salary=49000, position='Senior Data Engineer'),
 EmployeeModel(name='Fiona Gallagher', age=29, salary=38000, position='Junior AI Developer'),
 EmployeeModel(name='George Weasley', age=33, salary=46000, position='AI Research Engineer'),
 EmployeeModel(name='Hannah Abbott', age=31, salary=39000, position='Data Pipeline Engineer'),
 EmployeeModel(name='Ivan Drago', age=36, salary=47000, position='Machine Learning Operations Engineer'),
 EmployeeModel(name='Julia Roberts', age=27, salary=35000, position='AI Solutions Developer')]

In [10]:
len(employees)

10

In [11]:
for employee in employees:
    print(f"{employee.name =} and {employee.salary = }")

employee.name ='Alice Smith' and employee.salary = 45000
employee.name ='Bob Johnson' and employee.salary = 48000
employee.name ='Charlie Brown' and employee.salary = 40000
employee.name ='Diana Prince' and employee.salary = 42000
employee.name ='Ethan Hunt' and employee.salary = 49000
employee.name ='Fiona Gallagher' and employee.salary = 38000
employee.name ='George Weasley' and employee.salary = 46000
employee.name ='Hannah Abbott' and employee.salary = 39000
employee.name ='Ivan Drago' and employee.salary = 47000
employee.name ='Julia Roberts' and employee.salary = 35000


## CV or resume model - a more complex and nested model

In [12]:
class ExperienceModel(BaseModel):
    title: str
    company: str
    description: str
    start_year: int
    end_year: int


class EducationModel(BaseModel):
    title: str
    education_area: str
    school: str
    description: str
    start_year: int
    end_year: int

class CvModel(BaseModel):
    name: str
    age: int
    experiences: list[ExperienceModel]
    educations: list[EducationModel]

result = await agent.run(
    "Create a swedish person applying for a data engineering position",
    output_type=CvModel,
)
resume =result.output
resume

CvModel(name='Anna Karlsson', age=32, experiences=[ExperienceModel(title='Data Engineer', company='Swedbank', description='Developed and maintained data pipelines, built ETL processes, and worked with large-scale distributed systems.', start_year=2018, end_year=2023), ExperienceModel(title='Junior Data Engineer', company='Telia', description='Assisted in the development of data warehousing solutions and supported data migration projects.', start_year=2016, end_year=2018)], educations=[EducationModel(title='Master of Science in Computer Science', education_area='Data Engineering', school='KTH Royal Institute of Technology', description='Specialized in distributed systems and big data technologies.', start_year=2014, end_year=2016), EducationModel(title='Bachelor of Science in Information Systems', education_area='Computer Science', school='Uppsala University', description='Focused on database management and software development.', start_year=2011, end_year=2014)])

In [13]:
resume.experiences, resume.experiences[0].title

([ExperienceModel(title='Data Engineer', company='Swedbank', description='Developed and maintained data pipelines, built ETL processes, and worked with large-scale distributed systems.', start_year=2018, end_year=2023),
  ExperienceModel(title='Junior Data Engineer', company='Telia', description='Assisted in the development of data warehousing solutions and supported data migration projects.', start_year=2016, end_year=2018)],
 'Data Engineer')

## Optional postprocessing -> load into duckdb and unnest


In [14]:
import dlt

pipeline = dlt.pipeline(
    pipeline_name="resume_json_duckdb",
    destination=dlt.destinations.duckdb("cv.duckdb"),
    dataset_name="staging",
)

info = pipeline.run(data=[result.output.model_dump()], loader_file_format="jsonl", table_name="cv_entries")

print(info)

Pipeline resume_json_duckdb load step completed in 0.04 seconds
1 load package(s) were loaded to destination duckdb and into dataset staging
The duckdb destination used duckdb:////Users/plysch/Documents/github/ai_engineering_tps_de24/video_alongs/07_pydanticAI_fundamentals/cv.duckdb location to store data
Load package 1764011861.099592 is LOADED and contains no failed jobs


In [15]:
import duckdb

with duckdb.connect("cv.duckdb") as conn:
    desc = conn.sql("desc;").df()
    cv_entries = conn.sql("FROM staging.cv_entries").df()
    educations = conn.sql("FROM staging.cv_entries__educations").df()
    experiences = conn.sql("FROM staging.cv_entries__experiences").df()

desc

Unnamed: 0,database,schema,name,column_names,column_types,temporary
0,cv,staging,_dlt_loads,"[load_id, schema_name, status, inserted_at, sc...","[VARCHAR, VARCHAR, BIGINT, TIMESTAMP WITH TIME...",False
1,cv,staging,_dlt_pipeline_state,"[version, engine_version, pipeline_name, state...","[BIGINT, BIGINT, VARCHAR, VARCHAR, TIMESTAMP W...",False
2,cv,staging,_dlt_version,"[version, engine_version, inserted_at, schema_...","[BIGINT, BIGINT, TIMESTAMP WITH TIME ZONE, VAR...",False
3,cv,staging,cv_entries,"[name, age, _dlt_load_id, _dlt_id]","[VARCHAR, BIGINT, VARCHAR, VARCHAR]",False
4,cv,staging,cv_entries__educaations,"[title, education_area, school, description, s...","[VARCHAR, VARCHAR, VARCHAR, VARCHAR, BIGINT, B...",False
5,cv,staging,cv_entries__educations,"[title, education_area, school, description, s...","[VARCHAR, VARCHAR, VARCHAR, VARCHAR, BIGINT, B...",False
6,cv,staging,cv_entries__experiences,"[title, company, description, start_year, end_...","[VARCHAR, VARCHAR, VARCHAR, BIGINT, BIGINT, VA...",False


In [16]:
cv_entries

Unnamed: 0,name,age,_dlt_load_id,_dlt_id
0,Bjorn Borg,45,1764011541.314197,dcqWA5Xlb1LZ/A
1,Anna Karlsson,32,1764011861.099592,u+fOjWY7hsKEiA


In [17]:
educations

Unnamed: 0,title,education_area,school,description,start_year,end_year,_dlt_parent_id,_dlt_list_idx,_dlt_id
0,Master of Science in Computer Science,Data Engineering,KTH Royal Institute of Technology,Specialized in distributed systems and big dat...,2014,2016,u+fOjWY7hsKEiA,0,zSHY037LemwVaQ
1,Bachelor of Science in Information Systems,Computer Science,Uppsala University,Focused on database management and software de...,2011,2014,u+fOjWY7hsKEiA,1,o5VStGKDHNYSDA


In [18]:
experiences

Unnamed: 0,title,company,description,start_year,end_year,_dlt_parent_id,_dlt_list_idx,_dlt_id
0,Senior Data Engineer,Spotify,Developed and maintained data pipelines for va...,2018,2023,dcqWA5Xlb1LZ/A,0,5vNFo7a0GEV/jg
1,Data Engineer,Klarna,Built and optimized ETL processes for financia...,2014,2018,dcqWA5Xlb1LZ/A,1,GkJw51UU094IgA
2,Data Engineer,Swedbank,"Developed and maintained data pipelines, built...",2018,2023,u+fOjWY7hsKEiA,0,FW01Hwm9vPPqJw
3,Junior Data Engineer,Telia,Assisted in the development of data warehousin...,2016,2018,u+fOjWY7hsKEiA,1,uVDEp2WXMgEOaw


In [19]:
duckdb.sql("""
    SELECT
        cv.name,
        cv.age,
        ex.company,
        ex.description AS experience_description,
        ex.start_year AS experience_start_year,
        ex.end_year AS experience_end_year,
        e.title,
        e.education_area,
        e.school,
        e.start_year AS education_start_year,
        e.end_year AS education_end_year
    FROM cv_entries cv
    LEFT JOIN educations e ON cv._dlt_id = e._dlt_parent_id
    LEFT JOIN experiences ex ON cv._dlt_id = ex._dlt_parent_id


""").df()

Unnamed: 0,name,age,company,experience_description,experience_start_year,experience_end_year,title,education_area,school,education_start_year,education_end_year
0,Bjorn Borg,45,Spotify,Developed and maintained data pipelines for va...,2018,2023,,,,,
1,Bjorn Borg,45,Klarna,Built and optimized ETL processes for financia...,2014,2018,,,,,
2,Anna Karlsson,32,Swedbank,"Developed and maintained data pipelines, built...",2018,2023,Bachelor of Science in Information Systems,Computer Science,Uppsala University,2011.0,2014.0
3,Anna Karlsson,32,Telia,Assisted in the development of data warehousin...,2016,2018,Bachelor of Science in Information Systems,Computer Science,Uppsala University,2011.0,2014.0
4,Anna Karlsson,32,Swedbank,"Developed and maintained data pipelines, built...",2018,2023,Master of Science in Computer Science,Data Engineering,KTH Royal Institute of Technology,2014.0,2016.0
5,Anna Karlsson,32,Telia,Assisted in the development of data warehousin...,2016,2018,Master of Science in Computer Science,Data Engineering,KTH Royal Institute of Technology,2014.0,2016.0
