# Ouput Parsers
* Format the answer of the LLM.

In [1]:
#pip install python-dotenv

In [1]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
openai_api_key = os.environ["OPENAI_API_KEY"]

#### Install LangChain

If you are using the pre-loaded poetry shell, you do not need to install the following package because it is already pre-loaded for you:

In [3]:
#!pip install langchain

## Connect with an LLM

If you are using the pre-loaded poetry shell, you do not need to install the following package because it is already pre-loaded for you:

In [4]:
#!pip install langchain-openai

* NOTE: Since right now is the best LLM in the market, we will use OpenAI by default. You will see how to connect with other Open Source LLMs like Llama3 or Mistral in a next lesson.

## LLM Model
* The trend before the launch of chatGPT-4.
* See LangChain documentation about LLM Models [here](https://python.langchain.com/v0.1/docs/modules/model_io/llms/).

In [2]:
from langchain_openai import OpenAI

llmModel = OpenAI()

## Chat Model
* The general trend after the launch of chatGPT-4.
    * Frequently known as "Chatbot". 
    * Conversation between Human and AI.
    * Can have a system prompt defining the tone or the role of the AI. 
* See LangChain documentation about Chat Models [here](https://python.langchain.com/v0.1/docs/modules/model_io/chat/).
* By default we will work with ChatOpenAI. See [here](https://python.langchain.com/v0.1/docs/integrations/chat/openai/) the LangChain documentation page about it.

In [3]:
from langchain_openai import ChatOpenAI

chatModel = ChatOpenAI(model="gpt-3.5-turbo-0125")

## usign llama-3

In [1]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

In [2]:
#!pip install langchain-groq

In [4]:
from langchain_groq import ChatGroq

llamaChatModel = ChatGroq(
    model="llama3-70b-8192"
)

## Parsing Outputs
* See the corresponding LangChain Documentation page [here](https://python.langchain.com/v0.1/docs/modules/model_io/output_parsers/).
* Language models output text. But sometimes we would like to have those answers in a different format, like a JSON dictionary or a XML document. In order to achieve that, we use the Output Parsers.

In [5]:
from langchain_core.prompts import PromptTemplate
from langchain.output_parsers.json import SimpleJsonOutputParser

json_prompt = PromptTemplate.from_template(
    "Return a JSON object with an `answer` key that answers the following question: {question}"
)

json_parser = SimpleJsonOutputParser()

json_chain = json_prompt | llamaChatModel | json_parser

#### The previous prompt template includes the parser instructions

In [6]:
json_parser.get_format_instructions()

'Return a JSON object.'

In [8]:
json_chain.invoke({"question": "breif about china country?"})

{'answer': {'name': 'China',
  'capital': 'Beijing',
  'population': 'over 1.44 billion',
  'area': 'approximately 9.6 million square kilometers',
  'government': 'One-party socialist republic',
  'economy': 'Second-largest economy in the world',
  'language': 'Mandarin Chinese (official), many other languages and dialects',
  'religion': 'Buddhism, Taoism, Islam, Christianity, and others',
  'history': 'One of the oldest civilizations in the world, with a history dating back over 4,000 years',
  'landmarks': 'Great Wall of China, Forbidden City, Terracotta Warriors, and many others'}}

In [9]:
json_chain.invoke({"question": "breif about india country?"})

{'answer': {'name': 'India',
  'capital': 'New Delhi',
  'location': 'South Asia',
  'population': 'over 1.38 billion people',
  'language': 'Hindi, English, and 22 other official languages',
  'religion': 'Hinduism, Islam, Christianity, Sikhism, Buddhism, and Jainism',
  'area': '3,287,590 square kilometers (1,269,219 sq mi)',
  'currency': 'Indian rupee (INR)',
  'known_for': 'Diverse culture, rich history, vibrant cities, beautiful landscapes, and spiritual heritage'}}

#### Optionally, you can use Pydantic to define a custom output format

In [7]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI

In [8]:
# Define a Pydantic Object with the desired output format.
class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")

In [9]:
# Define the parser referring the Pydantic Object
parser = JsonOutputParser(pydantic_object=Joke)

# Add the parser format instructions in the prompt definition.
prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

# Create a chain with the prompt and the parser
chain = prompt | chatModel | parser

chain.invoke({"query": "Tell me a joke."})

{'setup': "Why couldn't the bicycle stand up by itself?",
 'punchline': 'Because it was two tired!'}

## xml parser

In [10]:
## to access the api key

import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

In [11]:
from langchain_groq import ChatGroq
llamachatmodel=ChatGroq(model="llama3-70b-8192")

In [None]:
from langchain_core.output_parsers import XMLOutputParser
from langchain_core.prompts import PromptTemplate
#prompt=PromptTemplate(template="Return an XML object with an `answer` key that answers the following question: {question}")
xml_parser=XMLOutputParser() ## Initialize XML Output Parser

In [41]:
parser = XMLOutputParser(tags=["movies", "actor", "film", "name", "genre"])
prompt = PromptTemplate(
    template="""{query}\n{format_instructions}""",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)




In [35]:
!pip install defusedxml



In [42]:
chain=prompt|llamachatmodel|xml_parser
#chain.invoke({"question":"breif about bahubali movie theaters relesing in china?"})

In [56]:
llm_output=chain.invoke({"query":"breif about bahubali movie ?"})

In [57]:
llm_output

{'movies': [{'film': [{'name': 'Bahubali'},
    {'genre': 'Fantasy Action'},
    {'actor': [{'name': 'Prabhas'}, {'role': 'Baahubali/Shivudu'}]},
    {'actor': [{'name': 'Rana Daggubati'}, {'role': 'Bhallala Deva'}]},
    {'actor': [{'name': 'Anushka Shetty'}, {'role': 'Devasena'}]},
    {'brief': 'Baahubali is a two-part Indian fantasy action film directed by S. S. Rajamouli. The film is set in the fictional kingdom of Mahishmati and tells the story of Shivudu, an young man who sets out on a journey to claim his rightful place as the king of Mahishmati.'}]}]}

In [62]:
import json

In [63]:
# Convert the Python dictionary to a JSON string
json_data = json.dumps(llm_output, indent=4)

# Print the JSON string
print(json_data)

{
    "movies": [
        {
            "film": [
                {
                    "name": "Bahubali"
                },
                {
                    "genre": "Fantasy Action"
                },
                {
                    "actor": [
                        {
                            "name": "Prabhas"
                        },
                        {
                            "role": "Baahubali/Shivudu"
                        }
                    ]
                },
                {
                    "actor": [
                        {
                            "name": "Rana Daggubati"
                        },
                        {
                            "role": "Bhallala Deva"
                        }
                    ]
                },
                {
                    "actor": [
                        {
                            "name": "Anushka Shetty"
                        },
                        {
             

In [64]:
import xml.etree.ElementTree as ET
def dict_to_xml(tag, dictionary):
    """
    Recursively converts a dictionary to an XML element.
    """
    elem = ET.Element(tag)  # Create an XML element with the given tag
    for key, value in dictionary.items():
        if isinstance(value, dict):  # If the value is a dictionary, recurse
            child = dict_to_xml(key, value)
            elem.append(child)
        elif isinstance(value, list):  # If the value is a list, iterate through it
            for item in value:
                if isinstance(item, dict):
                    child = dict_to_xml(key, item)
                    elem.append(child)
                else:
                    child = ET.Element(key)
                    child.text = str(item)
                    elem.append(child)
        else:  # Otherwise, add the key-value pair as an XML element
            child = ET.Element(key)
            child.text = str(value)
            elem.append(child)
    return elem

# Convert the dictionary to an XML tree
root = dict_to_xml('root', {'movies': llm_output['movies']})  # Wrap the data in a root element

# Create an ElementTree object
tree = ET.ElementTree(root)

# Write the XML tree to a file
with open('bahubali.xml', 'wb') as xml_file:
    tree.write(xml_file)
    

## CSV parser

In [None]:
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain_core.prompts import PromptTemplate
#from langchain_openai import ChatOpenAI

output_parser = CommaSeparatedListOutputParser()

format_instructions = output_parser.get_format_instructions()
prompt = PromptTemplate(
    template="List five {subject}.\n{format_instructions}",
    input_variables=["subject"],
    partial_variables={"format_instructions": format_instructions},
)

#model = ChatOpenAI(temperature=0)

chain = prompt | llamachatmodel | output_parser

In [66]:
chain.invoke({"subject": "ice cream flavors"})

['Here are five ice cream flavors:\n\nVanilla',
 'Chocolate',
 'Strawberry',
 'Cookies and Cream',
 'Mint Chocolate Chip']

## customised out parsers(json , xml, csv etc.,)

In [None]:
from langchain.output_parsers import (
    CommaSeparatedListOutputParser,
    XMLOutputParser,
    #SimpleJsonOutputParser,
)
from langchain.output_parsers.json import SimpleJsonOutputParser
from langchain.prompts import PromptTemplate
#from langchain.chains import LLMChain
from langchain.llms import BaseLLM  # Replace with your actual LLM class


def get_output_parser(output_format):
    """
    Returns the appropriate output parser based on the specified format.
    """
    if output_format == "list":
        return CommaSeparatedListOutputParser()
    elif output_format == "xml":
        return XMLOutputParser(tags=["movies", "actor", "film", "name", "genre"])
    elif output_format == "json":
        return SimpleJsonOutputParser()
    else:
        raise ValueError(f"Unsupported output format: {output_format}")


def generate_prompt_template(output_format, template_type="default"):
    """
    Generates a prompt template based on the output format and template type.
    """
    parser = get_output_parser(output_format)
    format_instructions = parser.get_format_instructions()

    if template_type == "list":
        return PromptTemplate(
            template="List five {subject}.\n{format_instructions}",
            input_variables=["subject"],
            partial_variables={"format_instructions": format_instructions},
        )
    elif template_type == "query":
        return PromptTemplate(
            template="{query}\n{format_instructions}",
            input_variables=["query"],
            partial_variables={"format_instructions": format_instructions},
        )
    elif template_type == "json":
        return PromptTemplate.from_template(
            "Return a JSON object with an `answer` key that answers the following question: {question}"
        )
    else:
        raise ValueError(f"Unsupported template type: {template_type}")


def get_custom_output(llm_model: BaseLLM, input_data, output_format, template_type="default"):
    """
    Custom function to generate output in the desired format using the provided LLM model.
    """
    # Step 1: Generate the appropriate prompt template
    prompt_template = generate_prompt_template(output_format, template_type)

    # Step 2: Create the chain with the LLM model and output parser
    parser = get_output_parser(output_format)
    chain = prompt_template | llm_model | parser

    # Step 3: Invoke the chain with the input data
    if template_type == "list":
        result = chain.invoke({"subject": input_data})
    elif template_type == "query":
        result = chain.invoke({"query": input_data})
    elif template_type == "json":
        result = chain.invoke({"question": input_data})
    else:
        raise ValueError(f"Unsupported template type: {template_type}")

    return result


# Example Usage
if __name__ == "__main__":
    # Replace `llamaChatModel` with your actual LLM instance
    # from langchain.llms import OpenAI  # Example LLM

    # llm_model = OpenAI(temperature=0.7)

    # Example 1: Generate a comma-separated list
    print(get_custom_output(llamachatmodel, "fruits", "list", template_type="list"))

    # Example 2: Generate XML output
    print(get_custom_output(llamachatmodel, "What are some popular movies?", "xml", template_type="query"))

    # Example 3: Generate JSON output
    print(get_custom_output(llamachatmodel, "Brief about China country?", "json", template_type="json"))

['Apple', 'Banana', 'Mango', 'Orange', 'Watermelon']
{'movies': [{'film': [{'name': 'The Shawshank Redemption'}, {'genre': 'Drama'}, {'actor': 'Morgan Freeman'}]}, {'film': [{'name': 'The Godfather'}, {'genre': 'Crime'}, {'actor': 'Marlon Brando'}]}, {'film': [{'name': 'The Dark Knight'}, {'genre': 'Action'}, {'actor': 'Christian Bale'}]}, {'film': [{'name': 'Inception'}, {'genre': 'Science Fiction'}, {'actor': 'Leonardo DiCaprio'}]}, {'film': [{'name': 'The Lord of the Rings: The Return of the King'}, {'genre': 'Fantasy'}, {'actor': 'Elijah Wood'}]}]}
{'answer': {'country': 'China', 'capital': 'Beijing', 'population': '1.443 billion (2022 est.)', 'language': 'Mandarin Chinese (official), other dialects', 'religion': 'Buddhism, Taoism, Islam, Christianity', 'area': '9,712,963 km² (3,755,767 sq mi)', 'government': 'One-party socialist republic', 'economy': 'Second-largest economy in the world, driven by manufacturing, technology, and services', 'currency': 'Renminbi (RMB)', 'history': "

## How to execute the code from Visual Studio Code
* In Visual Studio Code, see the file 005-output-parsers.py
* In terminal, make sure you are in the directory of the file and run:
    * python 005-output-parsers.py