# Output Parsers in LangChain

## Install Libraries

In [None]:
!pip install --upgrade transformers accelerate langchain-huggingface

# Import Libraries

In [42]:
import torch
from pydantic import BaseModel, Field

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

In [11]:
from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline

In [41]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser, PydanticOutputParser

# Import Model

## Model Download

In [7]:
model_id = "unsloth/gemma-3-1b-it"
# model_id = "Qwen/Qwen3-4B-Instruct-2507"

tok = AutoTokenizer.from_pretrained(model_id)
raw_model_llm = AutoModelForCausalLM.from_pretrained(
    model_id,
    dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto" ,
    offload_folder="offload",
    offload_state_dict=True
)

supports_json_mode = "json" in tok.chat_template.lower()
supports_tools = "tool" in tok.chat_template.lower() or "function" in tok.chat_template.lower()

print("JSON mode:", supports_json_mode)
print("Function/tool calling:", supports_tools)

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/670 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/902 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.00G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/233 [00:00<?, ?B/s]

JSON mode: False
Function/tool calling: False


## Pipeline Build

In [8]:
pipe = pipeline(
    task = "text-generation",
    model = raw_model_llm,
    tokenizer = tok
)

model_llm = HuggingFacePipeline(
    pipeline=pipe,
    model_kwargs={
        "max_new_tokens": 200,
        "temperature": 0.0
    }
)

Device set to use cuda:0


In [9]:
model_llm.invoke("What is the Capital city of Bangladesh?")

'What is the Capital city of Bangladesh?\n\nThe capital city of Bangladesh is Dhaka.\n\nDo you want to know about other things about Bangladesh?\n'

## Make Chat Model in Langchain

In [10]:
model = ChatHuggingFace(llm=model_llm, verbose=True)
model.invoke("What is the Capital city of Bangladesh?")

AIMessage(content='<bos><start_of_turn>user\nWhat is the Capital city of Bangladesh?<end_of_turn>\n<start_of_turn>model\nThe capital city of Bangladesh is **Dhaka**.\n', additional_kwargs={}, response_metadata={}, id='lc_run--019b0be3-b4af-7012-9ad2-28189a47a5db-0')

# String Prompt Template

## Template Build

In [14]:
template1 = PromptTemplate(
    template = "What is the Capital city of {country}?",
    input_variables=['country']
)

template2 = PromptTemplate(
    template = "Tell me 3 lines of poem for following text \n{capital}",
    input_variables = ['capital']
)

## Method 1: Sequential Lineup (Traditional Method)

In [16]:
prompts1 = template1.invoke({'country': 'bangladesh'})
result1 = model.invoke(prompts1)

In [17]:
prompts1

StringPromptValue(text='What is the Capital city of bangladesh?')

In [18]:
result1

AIMessage(content='<bos><start_of_turn>user\nWhat is the Capital city of bangladesh?<end_of_turn>\n<start_of_turn>model\nThe capital city of Bangladesh is **Dhaka**.\n', additional_kwargs={}, response_metadata={}, id='lc_run--019b0bef-6dde-7aa0-a739-91534eb78e67-0')

In [20]:
prompts2 = template2.invoke({'capital': result1.content})
result2 = model.invoke(prompts2)

In [21]:
prompts2

StringPromptValue(text='Tell me 3 lines of poem for following text \n<bos><start_of_turn>user\nWhat is the Capital city of bangladesh?<end_of_turn>\n<start_of_turn>model\nThe capital city of Bangladesh is **Dhaka**.\n')

In [23]:
result2.content

'<bos><start_of_turn>user\nTell me 3 lines of poem for following text \n<bos><start_of_turn>user\nWhat is the Capital city of bangladesh?<end_of_turn>\n<start_of_turn>model\nThe capital city of Bangladesh is **Dhaka**.<end_of_turn>\n<start_of_turn>model\nOkay, here are three lines of poetry inspired by that information:\n\nA river flows, a vibrant hue,\nWhere ancient tales forever renew,\nDhaka stands, a city strong and true.'

In [25]:
result2.content.split('<start_of_turn>')[-1]

'model\nOkay, here are three lines of poetry inspired by that information:\n\nA river flows, a vibrant hue,\nWhere ancient tales forever renew,\nDhaka stands, a city strong and true.'

## Method 2: Using Chains

In [31]:
parser = StrOutputParser()

In [32]:
chain = template1 | model | parser | template2 | model
chain_result = chain.invoke({'country': 'bangladesh'})
chain_result.content

'<bos><start_of_turn>user\nTell me 3 lines of poem for following text \n<bos><start_of_turn>user\nWhat is the Capital city of bangladesh?<end_of_turn>\n<start_of_turn>model\nThe capital city of Bangladesh is **Dhaka**.<end_of_turn>\n<start_of_turn>model\nOkay, here are three lines of poetry inspired by that information:\n\nA river flows, a vibrant hue,\nWhere history sleeps, and dreams accrue,\nDhaka stands, a city strong and true.'

In [33]:
chain_result.content.split('<start_of_turn>')[-1]

'model\nOkay, here are three lines of poetry inspired by that information:\n\nA river flows, a vibrant hue,\nWhere history sleeps, and dreams accrue,\nDhaka stands, a city strong and true.'

# Json Output Parser

In [34]:
json_parser = JsonOutputParser()

In [35]:
json_template = PromptTemplate(
    template = "Give me 5 facts about {topic} \n{format_instruction}",
    input_variable = ['topic'],
    partial_variables = {'format_instruction': json_parser.get_format_instructions()}
)

## Prompts

In [36]:
json_prompts = json_template.invoke({'topic': 'bangladesh'})
json_prompts

StringPromptValue(text='Give me 5 facts about bangladesh \nReturn a JSON object.')

In [37]:
json_result = model.invoke(json_prompts)
json_result.content

'<bos><start_of_turn>user\nGive me 5 facts about bangladesh \nReturn a JSON object.<end_of_turn>\n<start_of_turn>model\n```json\n{\n  "facts": [\n    {\n      "fact": "Bangladesh is the world’s largest exporter of ready-made garments.",\n      "source": "World Bank"\n    },\n    {\n      "fact": "The Ganges River is the longest river in the world by length.",\n      "source": "National Geographic"\n    },\n    {\n      "fact": "Bangladesh is home to the world\'s largest mangrove forest, the Sundarbans.",\n      "source": "UNESCO"\n    },\n    {\n      "fact": "The country has a rich history of Mughal and British colonial influence, evident in its architecture and culture.",\n      "source": "Wikipedia - History of Bangladesh"\n    },\n    {\n      "fact": "Bangladesh is a parliamentary democracy with a constitution that guarantees freedom of speech and expression.",\n      "source": "Transparency International"\n    }\n  ]\n}\n```\n'

In [38]:
json_result.content.split('<start_of_turn>')[-1]

'model\n```json\n{\n  "facts": [\n    {\n      "fact": "Bangladesh is the world’s largest exporter of ready-made garments.",\n      "source": "World Bank"\n    },\n    {\n      "fact": "The Ganges River is the longest river in the world by length.",\n      "source": "National Geographic"\n    },\n    {\n      "fact": "Bangladesh is home to the world\'s largest mangrove forest, the Sundarbans.",\n      "source": "UNESCO"\n    },\n    {\n      "fact": "The country has a rich history of Mughal and British colonial influence, evident in its architecture and culture.",\n      "source": "Wikipedia - History of Bangladesh"\n    },\n    {\n      "fact": "Bangladesh is a parliamentary democracy with a constitution that guarantees freedom of speech and expression.",\n      "source": "Transparency International"\n    }\n  ]\n}\n```\n'

## using chains

In [39]:
json_chain = json_template | model | json_parser
json_result2 = json_chain.invoke({'topic': 'bangladesh'})
type(json_result2)

dict

In [40]:
json_result2

{'facts_about_bangladesh': [{'fact': "Bangladesh is the world's most densely populated country, with over 170 million people.",
   'source': 'Worldometer'},
  {'fact': "The country is known as the 'Pearl of the East' due to its geographical location and rich cultural heritage.",
   'source': 'Wikipedia'},
  {'fact': 'Bangladesh is a landlocked country, with its location primarily along the Ganges River and the Brahmaputra River.',
   'source': 'World Atlas'},
  {'fact': 'The country has a long history of Islamic influence, particularly in its culture and traditions.',
   'source': 'Bangladesh Heritage'},
  {'fact': 'Bangladesh is a major producer of rice and jute, which are crucial agricultural commodities.',
   'source': 'FAO - Food and Agriculture Organization of the United Nations'}]}

# Pydantic Output Parser

In [43]:
class Person(BaseModel):
  name: str = Field(description='Name of the person')
  age: int = Field(ge=18, description='Age of that person')
  city: str = Field(description = "City which this man belongs from")

In [44]:
pydantic_parser = PydanticOutputParser(pydantic_object=Person)

## Template

In [47]:
pydantic_template = PromptTemplate(
    template = "Generate the name, age and city of a fictional {place} person \n {format_instruction}",
    input_variables = ['place'],
    partial_variables={'format_instruction': pydantic_parser.get_format_instructions()}
)

## Prompts

In [48]:
pydantic_prompts = pydantic_template.invoke({'place': 'bangladesh'})
pydantic_prompts

StringPromptValue(text='Generate the name, age and city of a fictional bangladesh person \n The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"name": {"description": "Name of the person", "title": "Name", "type": "string"}, "age": {"description": "Age of that person", "minimum": 18, "title": "Age", "type": "integer"}, "city": {"description": "City which this man belongs from", "title": "City", "type": "string"}}, "required": ["name", "age", "city"]}\n```')

## Traditional Method

In [54]:
pydantic_result

AIMessage(content='<bos><start_of_turn>user\nGenerate the name, age and city of a fictional bangladesh person \n The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"name": {"description": "Name of the person", "title": "Name", "type": "string"}, "age": {"description": "Age of that person", "minimum": 18, "title": "Age", "type": "integer"}, "city": {"description": "City which this man belongs from", "title": "City", "type": "string"}}, "required": ["name", "age", "city"]}\n```<end_of_turn>\n<start_of_turn>model\n```json\n{\n  "properties": {\n    "name": {\n      "descrip

In [None]:
pydantic_result = model.invoke(pydantic_prompts)
final_pydantic_result = pydantic_parser.invoke(pydantic_result.content)
final_pydantic_result

## Chains

In [None]:
pydantic_chain = pydantic_template | model | pydantic_parser
pydantic_chain_result = pydantic_chain.invoke('bangladesh')
pydantic_chain_result