# Structured Output in LangChain

## Install Libraries

In [91]:
!pip install --upgrade transformers accelerate langchain-huggingface



# Import Libraries

In [92]:
from typing import TypedDict, Optional, Annotated, Literal
from pydantic import BaseModel, Field

In [93]:
import torch

In [94]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

In [95]:
from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline

# Model Build

## Model Download

In [96]:
model_id = "Qwen/Qwen2.5-1.5B-Instruct"
# model_id = "Qwen/Qwen3-4B-Instruct-2507"

tok = AutoTokenizer.from_pretrained(model_id)
raw_model_llm = AutoModelForCausalLM.from_pretrained(
    model_id,
    dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto" ,
    offload_folder="offload",
    offload_state_dict=True
)

supports_json_mode = "json" in tok.chat_template.lower()
supports_tools = "tool" in tok.chat_template.lower() or "function" in tok.chat_template.lower()

print("JSON mode:", supports_json_mode)
print("Function/tool calling:", supports_tools)

JSON mode: True
Function/tool calling: True


## Pipeline Build

In [97]:
pipe = pipeline(
    task = "text-generation",
    model = raw_model_llm,
    tokenizer = tok
)

model_llm = HuggingFacePipeline(
    pipeline=pipe,
    model_kwargs={
        "max_new_tokens": 200,
        "temperature": 0.0
    }
)

Device set to use cuda:0


In [98]:
model_llm.invoke("What is the Capital city of Bangladesh?")

'What is the Capital city of Bangladesh? The capital city of Bangladesh is Dhaka. It was established as a town in 1632 by Shah Jalal, the son of Sultan Muzaffar Hossain of Bengal. It became the seat of power after independence from British rule in 1947 and has since become one of the largest cities in South Asia. Dhaka is also known for its rich cultural heritage, including its famous Sultanganj Mosque and National Museum. Today, it is home to over 10 million people and serves as the center of politics, culture, education, business, and finance in Bangladesh. \n\nThe history of Dhaka dates back to ancient times when it was an important trading port on the Ganges River. Over time, it developed into a major commercial and administrative center, with several rulers establishing their capitals there. In 1538, the Mughals conquered Dhaka and made it their headquarters, but they lost control soon after.\n\nIn 1765, the Marathas took control of the region and built the present-day Fort Maidan

# Make Chat Model in Langchain

In [99]:
model = ChatHuggingFace(llm=model_llm, verbose=True)

In [100]:
model.invoke("What is the Capital city of Bangladesh?")

AIMessage(content='<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\nWhat is the Capital city of Bangladesh?<|im_end|>\n<|im_start|>assistant\nThe capital city of Bangladesh is Dhaka.', additional_kwargs={}, response_metadata={}, id='lc_run--019b0900-e127-71d2-b7ec-2acb861cf736-0')

# Review

In [101]:
review = """
The S-Pen integration is a great touch for note-taking and quick sketches, though I don't use it often. What really blew me away is the 200MP camera—the night mode is stunning, capturing crisp, vibrant images even in low light. Zooming up to 100x actually works well for distant objects, but anything beyond 30x loses quality.
However, the weight and size make it a bit uncomfortable for one-handed use. Also, Samsung’s One UI still comes with bloatware—why do I need five different Samsung apps for things Google already provides? The $1,300 price tag is also a hard pill to swallow.

Review by Mr. X
"""

# `TypedDict`

In [102]:
class Review(TypedDict):
  key_themes: Annotated[list[str], "Write down the key themes, main points of this review"]
  summary: Annotated[str, "Write down the short summary of this review"]
  sentiment: Annotated[Literal['Positive', 'Negative'], "Return sentiment of this review"]
  reviewer: Annotated[Optional[str], "Write the name of the reviewer"]

In [103]:
typeddict_structed_model = model.with_structured_output(Review)
typeddict_result = typeddict_structed_model.invoke(review)

In [104]:
typeddict_result

In [105]:
review_text = "The product quality was excellent, shipping was delayed, and customer support was friendly. Reviewer: John Doe."

result = typeddict_structed_model.invoke(review_text)
print(result)

None


# `Pydantic`

In [106]:
# schema
class Review(BaseModel):

    key_themes: list[str] = Field(description="Write down all the key themes discussed in the review in a list")
    summary: str = Field(description="A brief summary of the review")
    sentiment: Literal["pos", "neg"] = Field(description="Return sentiment of the review either negative, positive or neutral")
    pros: Optional[list[str]] = Field(default=None, description="Write down all the pros inside a list")
    cons: Optional[list[str]] = Field(default=None, description="Write down all the cons inside a list")
    name: Optional[str] = Field(default=None, description="Write the name of the reviewer")


structured_model = model.with_structured_output(Review)

result = structured_model.invoke("""I recently upgraded to the Samsung Galaxy S24 Ultra, and I must say, it’s an absolute powerhouse! The Snapdragon 8 Gen 3 processor makes everything lightning fast—whether I’m gaming, multitasking, or editing photos. The 5000mAh battery easily lasts a full day even with heavy use, and the 45W fast charging is a lifesaver.

The S-Pen integration is a great touch for note-taking and quick sketches, though I don't use it often. What really blew me away is the 200MP camera—the night mode is stunning, capturing crisp, vibrant images even in low light. Zooming up to 100x actually works well for distant objects, but anything beyond 30x loses quality.

However, the weight and size make it a bit uncomfortable for one-handed use. Also, Samsung’s One UI still comes with bloatware—why do I need five different Samsung apps for things Google already provides? The $1,300 price tag is also a hard pill to swallow.

Pros:
Insanely powerful processor (great for gaming and productivity)
Stunning 200MP camera with incredible zoom capabilities
Long battery life with fast charging
S-Pen support is unique and useful

Review by Nitish Singh
""")

print(result)

NotImplementedError: Pydantic schema is not supported for function calling

# `Json_Schema`

In [107]:
# schema
json_schema = {
  "title": "Review",
  "type": "object",
  "properties": {
    "key_themes": {
      "type": "array",
      "items": {
        "type": "string"
      },
      "description": "Write down all the key themes discussed in the review in a list"
    },
    "summary": {
      "type": "string",
      "description": "A brief summary of the review"
    },
    "sentiment": {
      "type": "string",
      "enum": ["pos", "neg"],
      "description": "Return sentiment of the review either negative, positive or neutral"
    },
    "pros": {
      "type": ["array", "null"],
      "items": {
        "type": "string"
      },
      "description": "Write down all the pros inside a list"
    },
    "cons": {
      "type": ["array", "null"],
      "items": {
        "type": "string"
      },
      "description": "Write down all the cons inside a list"
    },
    "name": {
      "type": ["string", "null"],
      "description": "Write the name of the reviewer"
    }
  },
  "required": ["key_themes", "summary", "sentiment"]
}


structured_model = model.with_structured_output(json_schema)

result = structured_model.invoke("""I recently upgraded to the Samsung Galaxy S24 Ultra, and I must say, it’s an absolute powerhouse! The Snapdragon 8 Gen 3 processor makes everything lightning fast—whether I’m gaming, multitasking, or editing photos. The 5000mAh battery easily lasts a full day even with heavy use, and the 45W fast charging is a lifesaver.

The S-Pen integration is a great touch for note-taking and quick sketches, though I don't use it often. What really blew me away is the 200MP camera—the night mode is stunning, capturing crisp, vibrant images even in low light. Zooming up to 100x actually works well for distant objects, but anything beyond 30x loses quality.

However, the weight and size make it a bit uncomfortable for one-handed use. Also, Samsung’s One UI still comes with bloatware—why do I need five different Samsung apps for things Google already provides? The $1,300 price tag is also a hard pill to swallow.

Pros:
Insanely powerful processor (great for gaming and productivity)
Stunning 200MP camera with incredible zoom capabilities
Long battery life with fast charging
S-Pen support is unique and useful

Review by Nitish Singh
""")

print(result)

None


# TypedDict + Json_Schema

In [108]:
from typing import TypedDict, Optional, Annotated, Literal
import json

# Define nested TypedDicts to match the model's actual output structure
class FeedbackDetails(TypedDict):
  overall_rating: str
  shipping_status: str
  customer_support: str

class ReviewerInfo(TypedDict):
  name: str

# Adapt the main TypedDict schema to precisely match the model's actual output structure
class Review(TypedDict):
  product: str
  feedback: FeedbackDetails
  reviewer: ReviewerInfo

# Prompt with explicit instruction, emphasizing JSON output
review_text = "The product was excellent, shipping was delayed, and customer support was friendly. Reviewer: John Doe."
explicit_prompt = f"{review_text}\n\nExtract the information above into a JSON object strictly conforming to the 'Review' schema.\n"

# Get the raw output from the model without the structured output wrapper
raw_response = model.invoke(explicit_prompt)
print("Raw Model Response (AIMessage object):")
print(raw_response)

# Extract content from AIMessage
if hasattr(raw_response, 'content'):
    model_output_content = raw_response.content
    print("\nModel Output Content:")
    print(model_output_content)

    # Attempt to parse the content as JSON
    try:
        # Look for the JSON block, often models wrap it in markdown
        json_start = model_output_content.find('```json')
        json_end = model_output_content.find('```', json_start + 1)

        if json_start != -1 and json_end != -1:
            json_string = model_output_content[json_start + len('```json'):json_end].strip()
            parsed_json = json.loads(json_string)
            # Now convert the parsed JSON dict to the TypedDict object
            # This should now work with the adapted schema
            typed_dict_result = Review(**parsed_json)
            print("\nManually Parsed TypedDict Result (after conversion to Review object):")
            print(typed_dict_result)
        else:
            print("\nNo '```json' block found in the model's output. The model might not be generating JSON as expected.")
            typed_dict_result = None
    except json.JSONDecodeError as e:
        print(f"\nJSON parsing failed: {e}")
        typed_dict_result = None
    except TypeError as e:
        print(f"\nType conversion to TypedDict failed: {e}. Check if the schema matches the model's output.")
        typed_dict_result = None
    except Exception as e:
        print(f"\nAn unexpected error occurred during parsing: {e}")
        typed_dict_result = None
else:
    print("\nRaw response does not have a 'content' attribute.")
    typed_dict_result = None

# The final result is either the parsed TypedDict or None
# print(typed_dict_result) # You can uncomment this to see the final object

Raw Model Response (AIMessage object):
content='<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\nThe product was excellent, shipping was delayed, and customer support was friendly. Reviewer: John Doe.\n\nExtract the information above into a JSON object strictly conforming to the \'Review\' schema.\n<|im_end|>\n<|im_start|>assistant\n```json\n{\n  "Reviewer": {\n    "Name": "John Doe",\n    "Rating": null,\n    "Comments": [\n      "Product was excellent.",\n      "Shipping was delayed.",\n      "Customer support was friendly."\n    ]\n  }\n}\n```' additional_kwargs={} response_metadata={} id='lc_run--019b0901-693a-74f3-940e-935efa54303c-0'

Model Output Content:
<|im_start|>system
You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>
<|im_start|>user
The product was excellent, shipping was delayed, and customer support was friendly. Reviewer: John Doe.

Extract the information above into a JSO

In [109]:
raw_response

AIMessage(content='<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\nThe product was excellent, shipping was delayed, and customer support was friendly. Reviewer: John Doe.\n\nExtract the information above into a JSON object strictly conforming to the \'Review\' schema.\n<|im_end|>\n<|im_start|>assistant\n```json\n{\n  "Reviewer": {\n    "Name": "John Doe",\n    "Rating": null,\n    "Comments": [\n      "Product was excellent.",\n      "Shipping was delayed.",\n      "Customer support was friendly."\n    ]\n  }\n}\n```', additional_kwargs={}, response_metadata={}, id='lc_run--019b0901-693a-74f3-940e-935efa54303c-0')