In [None]:
%pip install -q python-dotenv

In [None]:
# Install python-dotenv if not already installed


# Load environment variables from .env file
from dotenv import load_dotenv
import os

load_dotenv()  # This will load variables from .env into the environment

# Example: Access your key (replace 'YOUR_KEY_NAME' with the actual key name)
api_key = os.getenv('LLAMA_CLOUD_API_KEY')
print("API Key loaded:", api_key is not None)

Note: you may need to restart the kernel to use updated packages.
API Key loaded: True


In [2]:
%pip install pydantic

Note: you may need to restart the kernel to use updated packages.


In [None]:
from llama_cloud_services import LlamaExtract
from pydantic import BaseModel, Field

# Initialize client
extractor = LlamaExtract()

# Define schema using Pydantic
class Resume(BaseModel):
    name: str = Field(description="Full name of candidate")
    email: str = Field(description="Email address")
    skills: list[str] = Field(description="Technical skills and technologies")


# Create extraction agent
agent = extractor.create_agent(name="resume-parser", data_schema=Resume)

# Extract data from document
result = agent.extract("data/cv fakedata1.pdf")

In [10]:
result.__dict__

{'id': 'f456b4b8-0416-46ad-934d-23c6084dcce1',
 'created_at': datetime.datetime(2025, 7, 9, 15, 10, 35, 444195, tzinfo=datetime.timezone.utc),
 'updated_at': datetime.datetime(2025, 7, 9, 15, 10, 54, 53678, tzinfo=datetime.timezone.utc),
 'extraction_agent_id': '9e5e4cac-745d-4ff8-aada-d74ac84e0971',
 'data_schema': {'additionalProperties': False,
  'properties': {'name': {'description': 'Full name of candidate',
    'type': 'string'},
   'email': {'description': 'Email address', 'type': 'string'},
   'skills': {'description': 'Technical skills and technologies',
    'items': {'type': 'string'},
    'type': 'array'}},
  'required': ['name', 'email', 'skills'],
  'type': 'object'},
 'config': ExtractConfig(priority=None, extraction_target=<ExtractTarget.PER_DOC: 'PER_DOC'>, extraction_mode=<ExtractMode.BALANCED: 'BALANCED'>, multimodal_fast_mode=False, system_prompt=None, use_reasoning=False, cite_sources=False, chunk_mode=<DocumentChunkMode.PAGE: 'PAGE'>, invalidate_cache=False),
 'fil

In [5]:
result.data

{'name': 'Manasi Goyal',
 'email': 'manasi@example.com',
 'skills': ['Design software',
  'Visual communication',
  'Branding',
  'Project management']}

In [6]:
from llama_cloud_services import LlamaExtract

# Initialize client
extractor = LlamaExtract()

# List all agents
agents = extractor.list_agents()

print(agents)

[ExtractionAgent(id=9e5e4cac-745d-4ff8-aada-d74ac84e0971, name=resume-parser)]


In [7]:
# Get specific agent
agent = extractor.get_agent(id="9e5e4cac-745d-4ff8-aada-d74ac84e0971") # or id as param

# Delete agent
extractor.delete_agent(agent.id)

In [14]:
agent = extractor.get_agent(name="resume-parser")

In [15]:
# Queue multiple files for extraction
jobs = await agent.queue_extraction(["data/cv fakedata1.pdf", "data/cv fakedata1.docx"])

Uploading files: 100%|██████████| 2/2 [00:00<00:00,  2.73it/s]
Creating extraction jobs: 100%|██████████| 2/2 [00:04<00:00,  2.14s/it]


In [17]:
# Check job status
for job in jobs:
    status = agent.get_extraction_job(job.id).status
    print(f"Job {job.id}: {status}")

Job 7403de4c-ed4d-45f1-a4c4-f6a80a890ccd: StatusEnum.SUCCESS
Job 7f8cf56a-461e-4ab9-92b0-427d52ead59e: StatusEnum.PENDING


In [18]:
# Get results when complete
results = [agent.get_extraction_run_for_job(job.id) for job in jobs]

for result in results:
    print(result.data)

{'name': 'Manasi Goyal', 'email': 'manasi@example.com', 'skills': ['Design software', 'Visual communication', 'Branding', 'Project management']}
None


In [None]:
from pydantic import BaseModel, Field

agent = extractor.get_agent(name="resume-parser")

# Define schema using Pydantic
class Resume(BaseModel):
    name: str = Field(description="Full name of candidate")
    email: str = Field(description="Email address")
    skills: list[str] = Field(description="Technical skills and technologies")
    location: str = Field(description="Location of candidate")
    education: str = Field(description="Education of candidate")

# Update schema
agent.data_schema = Resume

# Save changes
agent.save()

Uploading files: 100%|██████████| 1/1 [00:00<00:00,  1.11it/s]
Creating extraction jobs: 100%|██████████| 1/1 [00:01<00:00,  1.52s/it]
Extracting files: 100%|██████████| 1/1 [00:06<00:00,  6.38s/it]
Uploading files: 100%|██████████| 1/1 [00:00<00:00,  1.47it/s]
Creating extraction jobs: 100%|██████████| 1/1 [00:01<00:00,  1.60s/it]
Extracting files: 100%|██████████| 1/1 [00:09<00:00,  9.98s/it]


In [21]:
result = agent.extract("data/cv fakedata1.pdf")

In [22]:
result.data

{'name': 'Manasi Goyal',
 'email': 'manasi@example.com',
 'skills': ['Design software',
  'Visual communication',
  'Branding',
  'Project management'],
 'location': 'Pune, Maharashtra',
 'education': 'Master of Arts Graphic Design, Graphic Design Institute; BA Fine Arts Graphic Design, School of Fine Art'}

In [23]:
from llama_cloud_services import LlamaExtract
from llama_cloud.core.api_error import ApiError
from llama_cloud import ExtractConfig


extract = LlamaExtract(
    project_id="679a928f-239b-49ef-9668-3f06490822d3",
    organization_id="a6047f85-dddc-45e7-bc37-aad53a2e3a76",
)

try:
    agent = extract.get_agent(name="resume-parser")
    if agent:
        extract.delete_agent(agent_id=agent.id)

except ApiError as e:
    if e.status_code == 404:
        pass
    else:
        raise


# extract one object per page
extract_config = ExtractConfig(
    extraction_mode="FAST", # FAST, BALANCED, MULTIMODAL, PREMIUM
    system_prompt="this is an resume for the company 'ACME'",
    extraction_target="PER_PAGE", # PER_DOC, PER_PAGE
    use_reasoning=False,
    cite_sources=False
)

agent = extract.create_agent(name="resume-parser", data_schema=Resume, config=extract_config)

result = agent.extract("data/cv fakedata1.pdf")

Uploading files: 100%|██████████| 1/1 [00:00<00:00,  1.59it/s]
Creating extraction jobs: 100%|██████████| 1/1 [00:00<00:00,  1.26it/s]
Extracting files: 100%|██████████| 1/1 [00:18<00:00, 18.37s/it]


In [24]:
from pprint import pprint

pprint(result.data)

[{'education': 'Master of Arts Graphic Design, Graphic Design Institute; BA '
               'Fine Arts Graphic Design, School of Fine Art',
  'email': 'manasi@example.com',
  'location': 'Pune, Maharashtra',
  'name': 'Manasi Goyal',
  'skills': ['Design software',
             'Visual communication',
             'Branding',
             'Project management']}]
