# <font color=red>LangChain:  Using the PydanticOutputParser </br>to derive Pydantic Models from the output of LLMs</font>
- https://docs.langchain.com/docs

<h4>
We use the PydanticOutputParser to parse output from the LLM into a Pydantic Model.</br>
We ask the LLM to produce output conforming to a JSON schema for the Pydantic Model.</br>  
This gives us a nice structured result providing a comfortable programming model.
</h4>
<span style="font-family:'Comic Sans MS', cursive, sans-serif; font-size:18px;"><font color=orange>
## Demo 1 - Use an OpenAI gpt-x model withOUT a chain, just directly using the model
</font></span></br></br>
The OpenAI models are good at producing output that conforms to a provided JSON schema.</br>

In [None]:
import sys, os, time, json

from pydantic import BaseModel, Field, validator

from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import PydanticOutputParser


class Info(BaseModel):
    first_name:  str = Field(description="first name of the person")
    last_name:   str = Field(description="last name  of the person")
    age_died:    int = Field(description="age of the perso when they died")
    spouse_name: str = Field(description="name of the person's spouse")
    dog_name:    str = Field(description="name of the person's dog")     # often not found
    widget_name: str = Field(description="name of the person's widget")  # None

    @validator("age_died")  # just for demo validation
    @classmethod            # better to have this in v2
    def check_age_died(cls, age):
        if age < 1 or age > 101:
            raise ValueError("probable wrong value for age_died ")
        return age


template_string = """
You are a helpful assistant providing information about American history.
Use the formatting instructions below to provide the answers to user queries.

QUERY:
{query}

FORMATTING_INSTRUCTIONS:
{format_instructions}
If you can not find a value for a field, then assign it the value "None".
"""

llm = ChatOpenAI(model="gpt-3.5-turbo",temperature=0.0)
print("MODELNAME",llm.model_name)

pydantic_parser = PydanticOutputParser(pydantic_object=Info)
format_instructions = pydantic_parser.get_format_instructions()

prompt = ChatPromptTemplate.from_template(template=template_string)

query = "How old was the first president of the USA when he died?"
messages = prompt.format_messages(query=query, format_instructions=format_instructions)

print("MSGS",messages)

output = llm(messages)

print("OUTCONTENT",output.content)

info1 = pydantic_parser.parse(output.content)

print("info1TYPE",type(info1))
print("info1CONTENT",info1)
print("info1",info1.first_name,info1.last_name,info1.age_died)

#### LangChain provides:
####     """## Output FixingParser"""
####     """## RetryOutputParser
#### But I have not needed them using OpenAI.
#### I used them with some success in the Mistral demo.

<span style="font-family:'Comic Sans MS', cursive, sans-serif; font-size:18px;"><font color=orange>
## Demo 2 - Use an OpenAI gpt-x model WITH a chain
</font></span></br>

In [None]:
import sys, os, time, json

from pydantic import BaseModel, Field, validator

from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import PydanticOutputParser


class Info(BaseModel):
    first_name:  str = Field(description="first name of the person")
    last_name:   str = Field(description="last name  of the person")
    age_died:    int = Field(description="age of the perso when they died")
    spouse_name: str = Field(description="name of the person's spouse")
    dog_name:    str = Field(description="name of the person's dog")     # often not found
    widget_name: str = Field(description="name of the person's widget")  # None

    @validator("age_died")  # just for demo validation
    @classmethod            # better to have this in v2
    def check_age_died(cls, age):
        if age < 1 or age > 101:
            raise ValueError("probable wrong value for age_died ")
        return age


template_string = """
You are a helpful assistant providing information about American history.
Use the formatting instructions below to provide the answers to user queries.

QUERY:
{query}

FORMATTING_INSTRUCTIONS:
{format_instructions}
If you can not find a value for a field, then assign it the value "None".
"""

pydantic_parser = PydanticOutputParser(pydantic_object=Info)
format_instructions = pydantic_parser.get_format_instructions()

prompt = ChatPromptTemplate.from_template(template=template_string,)

llm = ChatOpenAI(model="gpt-3.5-turbo",temperature=0.0)
print("MODELNAME",llm.model_name)
chain = LLMChain(llm=llm, prompt=prompt, output_parser=pydantic_parser)

query = "How old was the first president of the USA when he died?"

info1 = chain.predict(query=query, format_instructions=format_instructions)
print("DBG",type(info1))

print("info1TYPE",type(info1))
print("info1CONTENT",info1)
print("info1",info1.first_name,info1.last_name,info1.age_died)

#### LangChain provides:
####     """## Output FixingParser"""
####     """## RetryOutputParser
#### But I have not needed them using OpenAI.
#### I used them with some success in the Mistral demo.

<span style="font-family:'Comic Sans MS', cursive, sans-serif; font-size:18px;"><font color=orange>
## Demo 3 - Use the Mistral model
</font></span></br>
The Mistral and Zephyr models were derived from Llama-2.</br>
They are not quite as good as the OpenAI models at following a JSON schema 
to produce output which can be parsed into a Pydantic Model.  </br>
But, they do an adequate job, and a little re-try logic typically gets the job done.

In [None]:
import sys, os, time, json

from pydantic import BaseModel, Field, validator

from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langchain.chains import LLMChain
from langchain.llms import HuggingFacePipeline

import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

class Info(BaseModel):
    first_name:  str = Field(description="first name of the person")
    last_name:   str = Field(description="last name  of the person")
    age_died:    int = Field(description="age of the perso when they died")
    spouse_name: str = Field(description="name of the person's spouse")
    dog_name:    str = Field(description="name of the person's dog")     # often not found
    widget_name: str = Field(description="name of the person's widget")  # None

    @validator("age_died")  # demo validation
    @classmethod            # better to have this in v2
    def check_age_died(cls, age):
        if age < 1 or age > 101:
            raise ValueError("probable wrong value for age_died ")
        return age

## this top part is setting up to use Mistral with LangChain instead of gpt-x ########

model_id = "./Mistral-7B-Instruct-v0.1"   # first, setup the model
model = AutoModelForCausalLM.from_pretrained(
    model_id, device_map='auto', # torch_dtype=torch.float16,
)
model.eval()
print("MODELID",model_id)

tokenizer = AutoTokenizer.from_pretrained(model_id)  # second, setup the tokenizer

pipe = pipeline(            # third, setup the pipeline using the model and tokenizer
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=200,
    temperature=0.1,
    do_sample=True,
    repetition_penalty=1.1,
    return_full_text=True,
    device_map='auto'
)

llm = HuggingFacePipeline(pipeline=pipe)   # fourth / last, setup the LLM

## conclude setting up to use Mistral with LangChain instead of gpt-x ########

template_string = """
You are a helpful assistant providing information about American history.
Use the formatting instructions below to provide the answers to user queries.

QUERY:
{query}

FORMATTING_INSTRUCTIONS:
{format_instructions}
"""
# sometimes uses '' for un-found values; did not help to have this line:
#    If you can not find a value for a field, then assign it the value "None".

pydantic_parser = PydanticOutputParser(pydantic_object=Info)
format_instructions = pydantic_parser.get_format_instructions()

prompt = ChatPromptTemplate.from_template(template=template_string,)

chain = LLMChain(llm=llm, prompt=prompt, output_parser=pydantic_parser)

query = "How old was the first president of the USA when he died?"

try:  # this code usually works fine
    info1 = chain.predict(query=query, format_instructions=format_instructions)
    print("info1CONTENT",info1)
    print("info1TYPE",type(info1))
except:  # this code has sometimes come into play, and it has worked fine
    print("\n**** TRYING FIX\n")
    from langchain.output_parsers import OutputFixingParser
    fix_parser = OutputFixingParser.from_llm(parser=pydantic_parser, llm=llm)
    fix_chain = LLMChain(llm=llm, prompt=prompt, output_parser=fix_parser)
    try:  # this code has run before with success at getting an answer
        info1 = fix_chain.predict(query=query, format_instructions=format_instructions)
        print("info1CONTENT",info1)
        print("info1TYPE",type(info1))
    except:  # this code has not successfully tested to get an answer
        print("\n**** DOING RETRY\n")
        from langchain.output_parsers import RetryWithErrorOutputParser
        retry_parser = RetryWithErrorOutputParser.from_llm(parser=pydantic_parser, llm=llm)
        retry_chain = LLMChain(llm=llm, prompt=prompt, output_parser=retry_parser)
        try:  # this code has not yet run so I am not sure it works
            info1 = retry_chain.predict(query=query, format_instructions=format_instructions)
            print("info1CONTENT",info1)
            print("info1TYPE",type(info1))
        except:
            print("**** UNABLE to handle query:",query)
            exit(-1)