In [None]:
# Install LangChain library
# LangChain helps us connect Large Language Models (LLMs) with prompts, chains, and tools
# LangChain → Framework to build AI applications using LLMs
!pip install langchain

# Install OpenAI wrapper (optional, future-ready)
# Useful if we switch from Gemini to OpenAI models later
# Wrapper → A layer that connects external models with LangChain
!pip install langchain_openai

# Install Google Gemini integration for LangChain
# Enables us to use Google’s Gemini models inside LangChain
!pip install langchain_google_genai

In [68]:
import langchain
from langchain_google_genai import ChatGoogleGenerativeAI ,GoogleGenerativeAI

In [29]:
import os
from google.colab import userdata
gen = userdata.get('Gemini')
os.environ["GEMINI_API_KEY"] = gen

In [30]:
model =ChatGoogleGenerativeAI(model = "gemini-3-flash-preview")

In [31]:
!pip install --upgrade pymupdf



In [32]:
import typing
import pydantic

In [33]:
from typing import TypedDict # It is a parent class which will give your class a one power (Type checking (Type hinting)only)

In [34]:
class DataFormat(TypedDict):
  name:str
  age:int

In [35]:
DataFormat(name="Shrutika",age=23)

{'name': 'Shrutika', 'age': 23}

In [36]:
from pydantic import BaseModel

### BaseModel from Pydantic is a smart Python class that:

- (Data validation) Checks your data – makes sure the types match (int, str, etc.)

- (data coercing) Converts automatically – changes data to the correct type if possible

- Handles missing values – allows defaults or optional fields

- (data parsing) Easily converts to dict/JSON – ready for APIs or saving

- Supports nested data – models inside models

In [37]:
class Schema(BaseModel):
  name:str
  age:int

In [38]:
Schema(name ="Shrutika",age=23)

Schema(name='Shrutika', age=23)

#### The above example demonstrates Pydantic’s data validation and parsing features.

1. Data validation: Ensures the field matches the expected type.

2. Data parsing: Attempts to convert the input into the correct type.

In [39]:

Schema(name ="Shrutika",age="23a") #incorrect, because "23a" is not a valid integer

# Pydantic tried to parse "23a" into an integer
# But "23a" contains letters, so it cannot convert it to an int

ValidationError: 1 validation error for Schema
age
  Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='23a', input_type=str]
    For further information visit https://errors.pydantic.dev/2.12/v/int_parsing

- Validation = checking if the data type is correct

- Parsing = trying to convert it to the right type

- Error occurs when parsing fails

In [40]:
# Correct way:
Schema(name ="Shrutika",age="23")

Schema(name='Shrutika', age=23)

In [41]:
class Schema(BaseModel):
  name:str
  age:int=10 # default value

  #Note: Typing doesn't support default vales

Schema(name="Shrutika")

Schema(name='Shrutika', age=10)

In [42]:
# 3. pydantic class supports data coercing : Type conversion (automatically)

In [43]:
class Schema(BaseModel):
  name:str
  age:int=10

In [44]:
Schema(name="Shrutika",age="23")

Schema(name='Shrutika', age=23)

In [45]:
Schema(name="Shrutika",age=21.7)

ValidationError: 1 validation error for Schema
age
  Input should be a valid integer, got a number with a fractional part [type=int_from_float, input_value=21.7, input_type=float]
    For further information visit https://errors.pydantic.dev/2.12/v/int_from_float

### The above error occurs due to type validation failure.

- When comparing int and float, float has higher precedence than int.

- Converting an int to float does not cause any data loss.

- However, converting a float to int can result in data loss.

- Since the given value cannot be safely converted to an int, validation fails and the error occurs.

In [46]:
from typing import Annotated

In [47]:
class Schema(BaseModel):
  name:Annotated[str,"extract the name"] #This is not use by the pydantic
  age:int

# Here this code not return any error but that message that able to go as a additional message for pydantic model.

In [48]:
from pydantic import Field

In [49]:
class Schema(BaseModel):
  name:Annotated[str,Field(description="extract the name")] #always use this
  age:int

In [50]:
class Schema(BaseModel):
  name:str =Field(default="Shrutika",description="extract the name")
  age:int=Field(default=23,description="extract the age if the age>20",gt=20)

In [52]:
Schema(age=22, name="Shrutika")

Schema(name='Shrutika', age=22)

In [54]:
Schema(age=19, name="Shrutika")
# Error occupied because age input should be greater than 20

ValidationError: 1 validation error for Schema
age
  Input should be greater than 20 [type=greater_than, input_value=19, input_type=int]
    For further information visit https://errors.pydantic.dev/2.12/v/greater_than

### When to use what?
1. Use Annotated when:

- Building API schemas

- Working with FastAPI

- Creating LLM tool inputs

- You want required fields

2. Use Field directly when:

- You need default values

- You need validation rules

- You’re doing data modeling or config objects

In [55]:
from typing import Optional,Literal

In [56]:
class Schema(BaseModel):
  name:str = Field(default="Shrutika",description="extract the name")
  age:int = Field(description="extract the age if the age>20",gt=20)
  skills:list[str] =Field(description="extract the skills")
  cons: Optional[str] = Field(default="na" ,description="extract the cons if existed")
  position:Literal["Ceo","hr","trainer"] =Field(default= "hr")


In [58]:
Schema(name="shrutika",age=21 ,skills=["agentic ai","deep learning"],cons="lots of breaks",position="hr")

Schema(name='shrutika', age=21, skills=['agentic ai', 'deep learning'], cons='lots of breaks', position='hr')

## Create an LLM that extracts structured information about a person and then generates a concise summary from that information.

In [88]:
model =ChatGoogleGenerativeAI(model = "gemini-3-flash-preview")

In [89]:
result = model.invoke("Tell me in detail about ms dhoni")

In [80]:
result.content

[{'type': 'text',
  'text': 'Mahendra Singh Dhoni, popularly known as **MS Dhoni** or **Mahi**, is widely regarded as one of the greatest cricket captains and finishers in the history of the sport. His journey from a small-town boy in Ranchi to a global sporting icon is one of the most inspiring stories in Indian history.\n\nHere is a detailed look at the life and career of MS Dhoni:\n\n---\n\n### 1. Early Life and Background\n*   **Birth:** Born on July 7, 1981, in Ranchi, Bihar (now in Jharkhand).\n*   **Family:** His father, Pan Singh, worked in a junior management position at MECON.\n*   **Initial Interests:** Interestingly, cricket was not his first love. He was an excellent football goalkeeper and also played badminton at the district level.\n*   **The Switch:** On the advice of his school coach, he tried his hand at cricket as a wicket-keeper.\n*   **The Struggle:** Before making it big, Dhoni worked as a **Traveling Ticket Examiner (TTE)** at Kharagpur railway station from 2001

In [93]:
result = result.content[0]['text']

In [91]:
class Schema(BaseModel):
  name:str = Field(description="extract the name")
  age:Optional[int] = Field(default=0,description =" extract the age if the text contains the age")
  role: str = Field(description="extract the role")
  summary:str = Field(description="generate the summary")

In [92]:
fm = model.with_structured_output(Schema)

In [95]:
out = fm.invoke(result)

In [97]:
out.name

'Mahendra Singh Dhoni'

In [102]:
out.age #NONE

In [103]:
out.summary

"Mahendra Singh Dhoni, famously known as MS Dhoni, is a legendary Indian cricketer and the only captain to win all three major ICC trophies. His career progressed from working as a Train Ticket Examiner to becoming a global sporting icon, leading India to the 2007 T20 World Cup, 2011 World Cup, and 2013 Champions Trophy titles. Renowned for his 'Captain Cool' demeanor, finishing abilities, and lightning-fast wicketkeeping, he also led the Chennai Super Kings to five IPL titles before retiring from international cricket in 2020."

In [105]:
out.model_dump()

{'name': 'Mahendra Singh Dhoni',
 'age': None,
 'role': 'Former Indian Cricket Captain and Wicketkeeper-batsman',
 'summary': "Mahendra Singh Dhoni, famously known as MS Dhoni, is a legendary Indian cricketer and the only captain to win all three major ICC trophies. His career progressed from working as a Train Ticket Examiner to becoming a global sporting icon, leading India to the 2007 T20 World Cup, 2011 World Cup, and 2013 Champions Trophy titles. Renowned for his 'Captain Cool' demeanor, finishing abilities, and lightning-fast wicketkeeping, he also led the Chennai Super Kings to five IPL titles before retiring from international cricket in 2020."}

In [106]:
out.model_dump_json()

'{"name":"Mahendra Singh Dhoni","age":null,"role":"Former Indian Cricket Captain and Wicketkeeper-batsman","summary":"Mahendra Singh Dhoni, famously known as MS Dhoni, is a legendary Indian cricketer and the only captain to win all three major ICC trophies. His career progressed from working as a Train Ticket Examiner to becoming a global sporting icon, leading India to the 2007 T20 World Cup, 2011 World Cup, and 2013 Champions Trophy titles. Renowned for his \'Captain Cool\' demeanor, finishing abilities, and lightning-fast wicketkeeping, he also led the Chennai Super Kings to five IPL titles before retiring from international cricket in 2020."}'

In [107]:
!pip install langchain_huggingface

Collecting langchain_huggingface
  Downloading langchain_huggingface-1.2.0-py3-none-any.whl.metadata (2.8 kB)
Collecting huggingface-hub<1.0.0,>=0.33.4 (from langchain_huggingface)
  Downloading huggingface_hub-0.36.2-py3-none-any.whl.metadata (15 kB)
Downloading langchain_huggingface-1.2.0-py3-none-any.whl (30 kB)
Downloading huggingface_hub-0.36.2-py3-none-any.whl (566 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m566.4/566.4 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: huggingface-hub, langchain_huggingface
  Attempting uninstall: huggingface-hub
    Found existing installation: huggingface_hub 1.3.7
    Uninstalling huggingface_hub-1.3.7:
      Successfully uninstalled huggingface_hub-1.3.7
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
transformers 5.0.0 requires huggingface-hub<2.0,>=1.3.0, 

In [109]:
from google.colab import userdata
hf = userdata.get('hf1')

In [110]:
import os
os.environ["HF_TOKEN"] =hf

In [111]:
from langchain_huggingface import HuggingFaceEndpoint,ChatHuggingFace

In [113]:
endpoint = HuggingFaceEndpoint(repo_id="deepseek-ai/DeepSeek-R1")

In [116]:
deepseek = ChatHuggingFace(llm=endpoint)

In [117]:
deepseek.with_structured_output(Schema)

NotImplementedError: Pydantic schema is not supported for function calling

In [118]:
# Deepseek is not powerful enough to create a prompt and generate the output as you like.
# So this kind of LLM we can't use structure output for this type of LLM we are going to use output parser

#### What is output parser: It is a beautiful technique provided by langchain, By using which we users can convert unstructured data given by LLM to structured and semi-structured.

#### Main Point : most of the closed source LLM supports structured output but most of the open source LLM Which we're accessing from hugging face that doesn't support structured output at that time we need to use output parser

#### Types of Output parser:
1. Comma-Seperated Output parser
2. JSON Output parser
3. Pydantic Output parser
4. Time delta output parser
5. String Output parserm