In [1]:
import getpass
import os
from dotenv import load_dotenv

load_dotenv()

os.environ["LANGSMITH_TRACING"] = "true"

if "LANGSMITH_API_KEY" not in os.environ:
    os.environ["LANGSMITH_API_KEY"] = getpass.getpass(
        prompt="Enter your LangSmith API Key (optional):"
    )

if "LANGSMITH_PROJECT" not in os.environ:
    os.environ["LANGSMITH_PROJECT"] = getpass.getpass(
        prompt="Enter your LangSmith Project Name (default = 'default'):"
    )
    if not os.environ.get("LANGSMITH_PROJECT"):
        os.environ["LANGSMITH_PROJECT"] = "default"

if not os.environ.get("COHERE_API_KEY"):
  os.environ["COHERE_API_KEY"] = getpass.getpass("Enter API key for Cohere: ")

In [None]:
# from langchain_core.pydantic_v1 import BaseModel, Field
from pydantic import BaseModel, Field
from typing import Optional
from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.chat_models import init_chat_model

# 1. Define your extraction schema
class Person(BaseModel):
    name: Optional[str] = Field(default=None,description="{The full name of the person}")
    hair_color: Optional[str] = Field(default=None,description="The person's hair color")
    height_in_meters: Optional[str] = Field(default=None,description="The person's height in meters")
    dummy_required_field: str = Field(description="dummy field no action needed.")

# 2. Create an output parser
parser = PydanticOutputParser(pydantic_object=Person)

# 3. Prompt Template
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an expert information extractor. Extract the following fields from the given input text. If any field is missing, use null. Respond in JSON format: {format_instructions}"),
    ("human", "{text}")
]).partial(format_instructions=parser.get_format_instructions())

# 4. Initialize Cohere LLM
llm = init_chat_model("command-r-plus", model_provider="cohere")

# 5. Create the chain
chain = prompt | llm | parser

# 6. Invoke with sample input
input_text = "Alice Johnson is 1.75 meters tall and has black hair."
result = chain.invoke({"text": input_text})

print(result)


name='Alice Johnson' hair_color='black' height_in_meters='1.75' dummy_field='dummy_value'


In [32]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

# Define a custom prompt to provide instructions and any additional context.
# 1) You can add examples into the prompt template to improve extraction quality
# 2) Introduce additional parameters to take context into account (e.g., include metadata
#    about the document from which the text was extracted.)
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert extraction algorithm. "
            "Only extract relevant information from the text. "
            "If you do not know the value of an attribute asked to extract, "
            "return null for the attribute's value.",
        ),
        # Please see the how-to about improving performance with
        # reference examples.
        # MessagesPlaceholder('examples'),
        ("human", "{text}"),
    ]
)

In [33]:
from langchain.chat_models import init_chat_model

llm = init_chat_model("command-r-plus", model_provider="cohere")

In [34]:
structured_llm = llm.with_structured_output(schema=Person)

In [35]:
text = "Alan Smith is 6 feet tall and has blond hair."
prompt = prompt_template.invoke({"text": text})
structured_llm.invoke(prompt)
# prompt

Person(name='Alan Smith', hair_color=None, height_in_meters=None, dummy_field='6 feet')

In [36]:
result = structured_llm.invoke(prompt)
data = result.model_dump(exclude_unset=True)
data.pop("dummy_required_field", None)
print(data)


{'name': 'Alan Smith', 'height_in_meters': None, 'dummy_field': 'null'}


In [37]:
from langchain.output_parsers import PydanticOutputParser
from langchain_core.runnables import RunnableLambda

parser = PydanticOutputParser(pydantic_object=Person)

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an expert information extractor. Extract the following fields from the text. Respond in JSON:\n{format_instructions}"),
    ("human", "{text}")
]).partial(format_instructions=parser.get_format_instructions())


format_prompt = RunnableLambda(lambda x: prompt.format_messages(**x))

llm = init_chat_model("command-r-plus", model_provider="cohere")
chain = prompt | llm | parser

In [38]:
# 6. Invoke with sample input
input_text = "Alice Johnson is 1.75 meters tall and has black hair."
result = chain.invoke({"text": input_text})

print(result)

name='Alice Johnson' hair_color='black' height_in_meters='1.75' dummy_field='dummy'


In [43]:
from pydantic import BaseModel, Field
from typing import List, Optional
from langchain_core.runnables import RunnableLambda

class Data(BaseModel):
    people: List[Person]# = Field(..., description="List of people")  # note: no default_factory

# Force schema rebuild
Data.model_rebuild()

prompt = ChatPromptTemplate.from_messages([
    (
        "system", 
        "You are an expert extractor. Extract all people mentioned in the input text with their name, hair color, and height (in meters). Don't extract the field dummy_required_field, just set it as text NA "
        "Return a JSON object with a 'people' field that contains a list of entries. If a field is unknown, use string missing. Format:\n{format_instructions}"
    ),
    ("human", "{text}")
]).partial(format_instructions=parser.get_format_instructions())

parser = PydanticOutputParser(pydantic_object=Data)

format_prompt = RunnableLambda(lambda x: prompt.format_messages(**x))

chain = format_prompt | llm | parser


In [44]:
input_text = """
John Smith has black hair and is 1.82 meters tall. 
Emily Davis has blonde hair and is 1.65 meters tall.
"""
result = chain.invoke({"text": input_text})
result


Data(people=[Person(name='John Smith', hair_color='black', height_in_meters='1.82', dummy_field='NA'), Person(name='Emily Davis', hair_color='blonde', height_in_meters='1.65', dummy_field='NA')])

In [45]:
input_text = """
 "My name is Jeff, my hair is black and i am 6 feet tall. Anna has the same color hair as me."
"""
chain.invoke({"text": input_text})

Data(people=[Person(name='Jeff', hair_color='black', height_in_meters='1.8288', dummy_field='NA'), Person(name='Anna', hair_color='black', height_in_meters='missing', dummy_field='NA')])

In [46]:
messages = [
    {"role": "user", "content": "2 ðŸ¦œ 2"},
    {"role": "assistant", "content": "4"},
    {"role": "user", "content": "2 ðŸ¦œ 3"},
    {"role": "assistant", "content": "5"},
    {"role": "user", "content": "3 ðŸ¦œ 4"},
]

response = llm.invoke(messages)
print(response.content)

6


In [None]:
from langchain_core.utils.function_calling import tool_example_to_messages

examples = [
    (
        "The ocean is vast and blue. It's more than 20,000 feet deep.",
        Data(people=[]),
    ),
    (
        "Fiona traveled far from France to Spain.",
        Data(people=[Person(name="Fiona", height_in_meters=None, hair_color=None,dummy_field='NA')]),
    ),
]


messages = []

for txt, tool_call in examples:
    if tool_call.people:
        # This final message is optional for some providers
        ai_response = "Detected people."
    else:
        ai_response = "Detected no people."
    messages.extend(tool_example_to_messages(txt, [tool_call], ai_response=ai_response))

  messages.extend(tool_example_to_messages(txt, [tool_call], ai_response=ai_response))


In [49]:
for message in messages:
    message.pretty_print()


The ocean is vast and blue. It's more than 20,000 feet deep.
Tool Calls:
  Data (fafc1c01-405d-4fb8-8b57-a92c30e8edd0)
 Call ID: fafc1c01-405d-4fb8-8b57-a92c30e8edd0
  Args:
    people: []

You have correctly called this tool.

Detected no people.

Fiona traveled far from France to Spain.
Tool Calls:
  Data (837968e6-9f33-4afe-9658-b60e77b19fb4)
 Call ID: 837968e6-9f33-4afe-9658-b60e77b19fb4
  Args:
    people: [{'name': 'Fiona', 'hair_color': None, 'height_in_meters': None, 'dummy_field': 'NA'}]

You have correctly called this tool.

Detected people.


In [52]:
message_no_extraction = {
    "role": "user",
    "content": "The solar system is large, but earth has only 1 moon.",
}

chain.invoke({"text": message_no_extraction})

# structured_llm = llm.with_structured_output(schema=Data)
# structured_llm.invoke([message_no_extraction])

Data(people=[])

In [54]:
chain.invoke({"text":messages + [message_no_extraction]})

Data(people=[Person(name='Fiona', hair_color='missing', height_in_meters='missing', dummy_field='NA')])