In [1]:
import json
import os

from langchain.callbacks import get_openai_callback
from langchain.chat_models import AzureChatOpenAI
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import PromptTemplate

In [2]:
# Initialize all the API-related environment variables
os.environ["REQUESTS_CA_BUNDLE"] = r"../ca-bundle-full.crt"
if not os.getenv("AZURE_OPENAI_API_KEY"):
    os.environ["AZURE_OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
if not os.getenv("AZURE_OPENAI_ENDPOINT"):
    os.environ["AZURE_OPENAI_ENDPOINT"] = os.getenv("OPENAI_ENDPOINT")
engine = os.getenv("OPENAI_ENGINE") if os.getenv("OPENAI_ENGINE") else "gpt-35-turbo"
openai_api_version = "2023-03-15-preview"

In [3]:
def get_player_info_text(text: str) -> str:
    '''
        This function would return the result in plain text. 
        We have to write a custom parser to extract the necessary information
    '''
    prompt = PromptTemplate(
        template="""I would be giving you player information. 
                    Return the player name and DateOfBirth\n{text}.""",
        input_variables=["text"],
    )
    azure_chat_open_ai = AzureChatOpenAI(openai_api_version=openai_api_version,
                                         azure_deployment=engine)

    chain = prompt | azure_chat_open_ai
    with get_openai_callback() as cb:
        response = chain.invoke({"text": text})
        print(
            f"Total Cost (USD): ${format(cb.total_cost, '.4f')}"
        )

    return response

In [4]:
def get_player_info_json(text: str) -> str:
    '''
        This function would return the result in JSON, but the output may not be in perfect JSON structure. 
        We have to write a custom parser to extract the necessary information
    '''
    prompt = PromptTemplate(
        template="""I would be giving you player information. 
                    Return the response in json format for the below fields
                    "Name": string // Name of the player
                    "DateOfBirth": string // Date of birth of the player in DD/MM/YYYY
                    \n{text}.""",
        input_variables=["text"],
    )
    azure_chat_open_ai = AzureChatOpenAI(openai_api_version=openai_api_version,
                                         azure_deployment=engine)

    chain = prompt | azure_chat_open_ai 
    with get_openai_callback() as cb:
        response = chain.invoke({"text": text})
        print(
            f"Total Cost (USD): ${format(cb.total_cost, '.4f')}"
        )

    return response

In [5]:
def get_player_info_structured(text: str) -> str:
    '''
        This function would return the result in JSON structure only. 
        We don't have to write a custom parse to convert string to JSON LangChain already provides with the a parser
    '''
    response_schemas = [
        ResponseSchema(name="Name", description="Name of the player."),
        ResponseSchema(name="DateOfBirth", description="Date of birth of the player in DD/MM/YYYY"),
    ]
    output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

    format_instructions = output_parser.get_format_instructions()
    prompt = PromptTemplate(
        template="""I would be giving you player information. 
                    {format_instructions}\n{text}.""",
        input_variables=["text"],
        partial_variables={"format_instructions": format_instructions}
    )
    azure_chat_open_ai = AzureChatOpenAI(openai_api_version=openai_api_version,
                                         azure_deployment=engine)

    chain = prompt | azure_chat_open_ai | output_parser
    with get_openai_callback() as cb:
        response = chain.invoke({"text": text})
        print(
            f"Total Cost (USD): ${format(cb.total_cost, '.4f')}"
        )

    return response

In [6]:
player = """Sachin Ramesh Tendulkar, (/ˌsʌtʃɪn tɛnˈduːlkər/ ⓘ; pronounced [sətɕin teːɳɖulkəɾ]; 
            born 24 April 1973) is an Indian former international cricketer who captained the Indian national team. 
            He is widely regarded as one of the greatest batsmen in the history of cricket. 
            Hailed as the world's most prolific batsman of all time, 
            he is the all-time highest run-scorer in both ODI and Test cricket with more than 18,000 runs and 15,000 runs, respectively.
            He also holds the record for receiving the most player of the match awards in international cricket.
            Tendulkar was a Member of Parliament, Rajya Sabha by nomination from 2012 to 2018."""

In [7]:
print("---------------------Response in text--------------------")
print(get_player_info_text(player))
print("---------------------Response in json--------------------")
print(get_player_info_json(player))
print("---------------------Response in structured--------------------")
print(get_player_info_structured(player))

---------------------Response in text--------------------
Total Cost (USD): $0.0004
content='Player Name: Sachin Ramesh Tendulkar\nDate of Birth: 24 April 1973'
---------------------Response in json--------------------
Total Cost (USD): $0.0004
content='{\n  "Name": "Sachin Ramesh Tendulkar",\n  "DateOfBirth": "24/04/1973"\n}'
---------------------Response in structured--------------------
Total Cost (USD): $0.0005
{'Name': 'Sachin Ramesh Tendulkar', 'DateOfBirth': '24/04/1973'}
