In [1]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
model = ChatOpenAI(model="gpt-4o-mini")

parser = StrOutputParser()

In [3]:
review_compression_prompt = """
Give sentiment and themes for the given input review.

Give output as json with "sentiment" key and value should be either of [POSITIVE, NEUTRAL, NEGATIVE]. 
And another key "themes" with list of themes talked about in the review, for example "Producut Issue", "Packaging Issue", etc.

Strictly only give json output as string and nothing else. Do not give markdown.

Input Review:
```
{input_review}
```
"""

In [4]:
input_review = "This bedsheet is beautiful and looks just like in the picture. The print is very pretty. It is really comfortable and smooth. Fabric is very nice , color doesn’t fade even after the wash. It fits our queen size bed perfectly ( it can be easy used for King size bed) . Go for it, value for money."

prompt_template = PromptTemplate(template=review_compression_prompt)

input_prompt = prompt_template.invoke({"input_review":input_review})
print(input_prompt.text)


Give sentiment and themes for the given input review.

Give output as json with "sentiment" key and value should be either of [POSITIVE, NEUTRAL, NEGATIVE]. 
And another key "themes" with list of themes talked about in the review, for example "Producut Issue", "Packaging Issue", etc.

Strictly only give json output as string and nothing else. Do not give markdown.

Input Review:
```
This bedsheet is beautiful and looks just like in the picture. The print is very pretty. It is really comfortable and smooth. Fabric is very nice , color doesn’t fade even after the wash. It fits our queen size bed perfectly ( it can be easy used for King size bed) . Go for it, value for money.
```



In [5]:
result = model.invoke(input_prompt)
compressed_review = parser.invoke(result)

print(compressed_review)

{"sentiment":"POSITIVE","themes":["Product Quality","Comfort","Value for Money","Design"]}


In [6]:
compressed_review["sentiment"]

TypeError: string indices must be integers, not 'str'

# JSON output parser and format instruction through LangChain

Reference: https://python.langchain.com/v0.1/docs/modules/model_io/output_parsers/types/json/

In [7]:
from langchain_core.output_parsers import JsonOutputParser
from pydantic import BaseModel, Field
from enum import Enum

In [8]:
class Sentiment(str, Enum):
    positive = 'POSITIVE'
    neutral = 'NEUTRAL'
    negative = 'NEGATIVE'


class CompressedReview(BaseModel):
    sentiment: Sentiment = Field(description="gives sentiment value for the review")
    themes: list = Field(description="themes expressed in the review")

In [9]:
parser = JsonOutputParser(pydantic_object=CompressedReview)

In [10]:
prompt = PromptTemplate(
    template="Give sentiment and themes for a review.\n{format_instructions}\nInput Review:\n```\n{input_review}\n```\n",
    input_variables=["input_review"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

In [11]:
print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"$defs": {"Sentiment": {"enum": ["POSITIVE", "NEUTRAL", "NEGATIVE"], "title": "Sentiment", "type": "string"}}, "properties": {"sentiment": {"$ref": "#/$defs/Sentiment", "description": "gives sentiment value for the review"}, "themes": {"description": "themes expressed in the review", "items": {}, "title": "Themes", "type": "array"}}, "required": ["sentiment", "themes"]}
```


In [12]:
formatted_prompt = prompt.invoke({"input_review": input_review})
print(formatted_prompt.text)

Give sentiment and themes for a review.
The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"$defs": {"Sentiment": {"enum": ["POSITIVE", "NEUTRAL", "NEGATIVE"], "title": "Sentiment", "type": "string"}}, "properties": {"sentiment": {"$ref": "#/$defs/Sentiment", "description": "gives sentiment value for the review"}, "themes": {"description": "themes expressed in the review", "items": {}, "title": "Themes", "type": "array"}}, "required": ["sentiment", "themes"]}
```
Input Review:
```
This bedsheet is beautiful and looks just like in the picture. The print is very pretty. It is really comfortable

In [13]:
formatted_prompt = prompt.invoke({"input_review": input_review})
result = model.invoke(formatted_prompt)
compressed_review = parser.invoke(result)

In [14]:
compressed_review

{'sentiment': 'POSITIVE',
 'themes': ['Aesthetics', 'Comfort', 'Quality', 'Value for Money', 'Fit']}

In [15]:
compressed_review["sentiment"]

'POSITIVE'

In [16]:
# TODO: add the second part of prompt for actionable insight using StrOutputParser itself since it is a string. 