In [3]:
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
)
from langchain.schema import SystemMessage
from langchain_mistralai.chat_models import ChatMistralAI
from langchain_community.chat_models.azureml_endpoint import (
    AzureMLChatOnlineEndpoint,
    AzureMLEndpointApiType,
    LlamaChatContentFormatter,
)
from dotenv import load_dotenv
import pandas as pd
import os 

load_dotenv()

OPEN_AI_API_KEY = os.getenv("OPEN_AI_API_KEY")

MISTRAL_SMALL_ENDPOINT = os.getenv("MISTRAL_SMALL_ENDPOINT")
MISTRAL_SMALL_API_KEY = os.getenv("MISTRAL_SMALL_API_KEY")

MISTRAL_LARGE_ENDPOINT = os.getenv("MISTRAL_LARGE_ENDPOINT")
MISTRAL_LARGE_API_KEY = os.getenv("MISTRAL_LARGE_API_KEY")

LLAMA3_8B_INSTRUCT_ENDPOINT = os.getenv("LLAMA3_8B_INSTRUCT_ENDPOINT")
LLAMA3_8B_INSTRUCT_API_KEY = os.getenv("LLAMA3_8B_INSTRUCT_API_KEY")
LLAMA3_8B_INSTRUCT_ENDPOINT_URL = LLAMA3_8B_INSTRUCT_ENDPOINT + "/v1/chat/completions"

LLAMA3_70B_INSTRUCT_ENDPOINT = os.getenv("LLAMA3_70B_INSTRUCT_ENDPOINT")
LLAMA3_70B_INSTRUCT_API_KEY = os.getenv("LLAMA3_70B_INSTRUCT_API_KEY")
LLAMA3_70B_INSTRUCT_ENDPOINT_URL = LLAMA3_70B_INSTRUCT_ENDPOINT + "/v1/chat/completions"

## 1. Data

In [6]:
df = pd.read_csv("./inputs/yelp.csv")
df_sample = df.head(100)

In [8]:
df_sample['text']

0     My wife took me here on my birthday for breakf...
1     I have no idea why some people give bad review...
2     love the gyro plate. Rice is so good and I als...
3     Rosie, Dakota, and I LOVE Chaparral Dog Park!!...
4     General Manager Scott Petello is a good egg!!!...
                            ...                        
95         Awesome subs clean and friendly well priced.
96    Had dinner and brunch, not on the same day...t...
97    This is a very interesting place.  Don't go he...
98    I LOVE Chic Nails!\n\nI used to go to Tip & To...
99    After the Padres Spring Training game, we had ...
Name: text, Length: 100, dtype: object

## 2. Output Structure

In [19]:
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import Optional, List
from langchain.output_parsers import PydanticOutputParser


class ReviewInfo(BaseModel):
    """Information extracted from the text."""
    summary: str = Field(
        description="A one-sentence summary of the review, maximum 50 words."
    )
    food: str = Field( 
        description="Classify the customer sentiment regarding the food of the restaurant in the review as positive, negative, or neutral. If there is no mention of food, return none.",
        enum=["positive", "negative", "neutral", "none"]
    )
    service: str = Field( 
        description="Classify the customer sentiment regarding the service of the restaurant in the review as positive, negative, or neutral. If there is no mention of service, return none.",
        enum=["positive", "negative", "neutral", "none"]
    )
    price: str = Field( 
        description="Classify the customer evaluation regarding the pricing of the restaurant in the review as positive, negative, or neutral. If there is no mention of pricing, return none.",
        enum=["positive", "negative", "neutral", "none"]
    )
    ambience: str = Field(
        description="Classify the customer sentiment regarding the ambience of the restaurant in the review as positive, negative, or neutral. If there is no mention of ambience, return none.",
        enum=["positive", "negative", "neutral", "none"]
    )

review_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert extraction algorithm, specialized in restaurant review and customer sentiment analysis."
            "Only extract relevant information from the text as specified by the provided JSON schema. Do not generate any new information or exrta characters outside of the JSON schema."
        ),
        ("human", "{user_input}"),
    ]
)

pydantic_parser = PydanticOutputParser(pydantic_object=ReviewInfo)

In [10]:
# test with Mistral small:
llm = ChatMistralAI(
    endpoint=MISTRAL_SMALL_ENDPOINT,
    mistral_api_key=MISTRAL_SMALL_API_KEY,
)
chain = review_prompt | llm.with_structured_output(schema=ReviewInfo)

In [17]:
df_sample['text'][15]

'Was it worth the 21$ for a salad and small pizza? Absolutely not! Bad service. Maybe the guys grandma died I don\'t know. I want to tell you what really made me mad about the experience. We order the small pizza and salad and the guys could have cared less and took our $ and we sat down. We were looking around and hmm, there\'s a sign saying "x large pizza and large salad only 23$". Wow that would have been nice if the guy told us that. I left hungry, mad and unsatisfied. \n\nTo the owner: teach your employees the value of upselling and telling the specials. Something so small can affect a customers experience negatively. \n\nAnd your salads are severely overpriced \n\nWon\'t go back unless I\'m desperate.'

In [18]:
sample_text = df_sample['text'][15]
review_structured = chain.invoke({"user_input": sample_text})
review_structured.dict()

{'summary': 'The restaurant served overpriced food with poor service and did not inform about better deals.',
 'food': 'negative',
 'service': 'negative',
 'price': 'negative',
 'ambience': 'none'}

In [21]:
review_structured

ReviewInfo(summary='The restaurant served overpriced food with poor service and did not inform about better deals.', food='negative', service='negative', price='negative', ambience='none')

In [23]:
# validate the output with pydantic parser:
# not necessary, as the output is already validated by the chain
# we can add a try-except block to catch any errors