In [1]:
import os
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.globals import set_llm_cache
from langchain.cache import InMemoryCache
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Read the api key

with open('openai_api_key_.txt', 'r') as f:
    api_key = f.read()

os.environ['OPENAI_API_KEY'] = api_key

In [5]:
# Load the model

llm = OpenAI()
chat = ChatOpenAI()

set_llm_cache(InMemoryCache())

In [7]:
# Creating a CSV parser and getting the format instruction

from langchain.output_parsers import CommaSeparatedListOutputParser

output_parser = CommaSeparatedListOutputParser()

format_instructions = output_parser.get_format_instructions()

In [8]:
output_parser.get_format_instructions()

'Your response should be a list of comma separated values, eg: `foo, bar, baz`'

In [9]:
# Pass the instructions to the model

from langchain.prompts import HumanMessagePromptTemplate, ChatPromptTemplate

human_template = "{human_message}\n{format_instructions}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
prompt = chat_prompt.format_prompt(human_message = "What are the different datatypes in python ?", format_instructions = output_parser.get_format_instructions())

prompt

ChatPromptValue(messages=[HumanMessage(content='What are the different datatypes in python ?\nYour response should be a list of comma separated values, eg: `foo, bar, baz`')])

In [10]:
messages = prompt.messages

response = chat(messages=messages)

print(response.content)


int, float, str, bool, list, tuple, dictionary


In [11]:
output = output_parser.parse(response.content)
output 

['int', 'float', 'str', 'bool', 'list', 'tuple', 'dictionary']

## What to do when the parser fails??

In [13]:
from langchain.output_parsers import DatetimeOutputParser

output_parser = DatetimeOutputParser()

format_instructions = output_parser.get_format_instructions()

print(format_instructions)

Write a datetime string that matches the 
            following pattern: "%Y-%m-%dT%H:%M:%S.%fZ". Examples: 1200-04-23T17:21:55.546654Z, 0319-06-17T14:45:03.898989Z, 1393-09-24T08:41:39.724579Z


In [15]:
human_template = '{human_message}\n{format_instructions}'
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
prompt = chat_prompt.format_prompt(human_message = 'When did India get independence ?', format_instructions = output_parser.get_format_instructions())

messages = prompt.to_messages()


In [17]:
response = chat(messages = messages)
response

AIMessage(content='India got independence on 1947-08-15T00:00:00.000000Z.')

In [18]:
output = output_parser.parse(response.content)
output

OutputParserException: Could not parse datetime string: India got independence on 1947-08-15T00:00:00.000000Z.

In [19]:
# Fixing the parser using OutputFixingParser

from langchain.output_parsers import OutputFixingParser

fixing_parser = OutputFixingParser.from_llm(parser=output_parser, llm=chat)
fixed_output = fixing_parser.parse(response.content)
fixed_output

datetime.datetime(1947, 8, 15, 0, 0)

In [21]:
# Fixing might not always work, So let's try multiple times

for chance in range(1, 5):
    try:
        fixed_output = fixing_parser.parse(response.content)
    except:
        continue
    else:
        break
    
fixed_output

datetime.datetime(1947, 8, 15, 0, 0)

## Custom Parsers

#### Structured Output Parser

In [22]:
from langchain.output_parsers import ResponseSchema, StructuredOutputParser

response_schemas = [
    ResponseSchema(name="answer", description="answer to the user's question"),
    ResponseSchema(
        name="psuedo code",
        description="psuedo code to the user's question",
    ),
]
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

print(format_instructions)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"answer": string  // answer to the user's question
	"psuedo code": string  // psuedo code to the user's question
}
```


In [26]:
human_template = "{human_message}\n{format_instructions}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
prompt = chat_prompt.format_prompt(human_message="write a code to mind the missing smallest positive in a list: [1, 2, 3]", format_instructions=output_parser.get_format_instructions())

prompt

ChatPromptValue(messages=[HumanMessage(content='write a code to mind the missing smallest positive in a list: [1, 2, 3]\nThe output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":\n\n```json\n{\n\t"answer": string  // answer to the user\'s question\n\t"psuedo code": string  // psuedo code to the user\'s question\n}\n```')])

In [27]:
messages = prompt.to_messages()

response = chat(messages=messages)

output = output_parser.parse(response.content)

output

{'answer': 'The missing smallest positive in the list [1, 2, 3] is 4.',
 'psuedo code': "Initialize a variable 'smallest' to 1. Iterate through the list and check if 'smallest' is present. If it is, increment 'smallest' by 1. Once the loop ends, 'smallest' will be the missing smallest positive."}