In [1]:
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
)
from langchain.schema import SystemMessage
from langchain_mistralai.chat_models import ChatMistralAI
from langchain_community.chat_models.azureml_endpoint import (
    AzureMLChatOnlineEndpoint,
    AzureMLEndpointApiType,
    LlamaChatContentFormatter,
)
from dotenv import load_dotenv
import pandas as pd
import os 

load_dotenv()

OPENAI_API_KEY = os.getenv("OPEN_AI_API_KEY")
MISTRAL_API_KEY = os.getenv("MISTRAL_SMALL_API_KEY")


## 1. Data

In [2]:
df = pd.read_csv("./inputs/yelp.csv")
df_sample = df.head(100)

In [3]:
df_sample['text']

0     My wife took me here on my birthday for breakf...
1     I have no idea why some people give bad review...
2     love the gyro plate. Rice is so good and I als...
3     Rosie, Dakota, and I LOVE Chaparral Dog Park!!...
4     General Manager Scott Petello is a good egg!!!...
                            ...                        
95         Awesome subs clean and friendly well priced.
96    Had dinner and brunch, not on the same day...t...
97    This is a very interesting place.  Don't go he...
98    I LOVE Chic Nails!\r\n\r\nI used to go to Tip ...
99    After the Padres Spring Training game, we had ...
Name: text, Length: 100, dtype: object

## 2. Output Structure

In [4]:
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import Optional, List
from langchain.output_parsers import PydanticOutputParser


class ReviewInfo(BaseModel):
    """Information extracted from the text."""
    summary: str = Field(
        description="A one-sentence summary of the review, maximum 50 words."
    )
    food: str = Field( 
        description="Classify the customer sentiment regarding the food of the restaurant in the review as positive, negative, or neutral. If there is no mention of food, return none.",
        enum=["positive", "negative", "neutral", "none"]
    )
    service: str = Field( 
        description="Classify the customer sentiment regarding the service of the restaurant in the review as positive, negative, or neutral. If there is no mention of service, return none.",
        enum=["positive", "negative", "neutral", "none"]
    )
    price: str = Field( 
        description="Classify the customer evaluation regarding the pricing of the restaurant in the review as positive, negative, or neutral. If there is no mention of pricing, return none.",
        enum=["positive", "negative", "neutral", "none"]
    )
    ambience: str = Field(
        description="Classify the customer sentiment regarding the ambience of the restaurant in the review as positive, negative, or neutral. If there is no mention of ambience, return none.",
        enum=["positive", "negative", "neutral", "none"]
    )
    other: Optional[str] = Field(
        description="Extract any other useful information from the review text to the business owner. If there is no other information, return an empty string."
    )

review_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert extraction algorithm, specialized in restaurant review and customer sentiment analysis."
            "Only extract relevant information from the text as specified by the provided JSON schema. Do not generate any new information or exrta characters outside of the JSON schema."
        ),
        ("human", "{user_input}"),
    ]
)

pydantic_parser = PydanticOutputParser(pydantic_object=ReviewInfo)

### Using `with_structured_output`

In [5]:
# test with Mistral small:
llm = ChatMistralAI(model="mistral-small-2402")
chain = review_prompt | llm.with_structured_output(schema=ReviewInfo)
chain

ChatPromptTemplate(input_variables=['user_input'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are an expert extraction algorithm, specialized in restaurant review and customer sentiment analysis.Only extract relevant information from the text as specified by the provided JSON schema. Do not generate any new information or exrta characters outside of the JSON schema.')), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['user_input'], template='{user_input}'))])
| RunnableBinding(bound=ChatMistralAI(client=<httpx.Client object at 0x000001C1592F9050>, async_client=<httpx.AsyncClient object at 0x000001C15853D3D0>, mistral_api_key=SecretStr('**********'), model='mistral-small-2402'), kwargs={'tools': [{'type': 'function', 'function': {'name': 'ReviewInfo', 'description': 'Information extracted from the text.', 'parameters': {'type': 'object', 'properties': {'summary': {'description': 'A one-sentence summary of the review, m

In [6]:
sample_text = df_sample['text'][15]
review_structured = chain.invoke({"user_input": sample_text})
review_structured.dict()

{'summary': 'Customer was unhappy with the food, service, and pricing, and felt the salads were overpriced.',
 'food': 'negative',
 'service': 'negative',
 'price': 'negative',
 'ambience': 'none',
 'other': 'The customer suggests teaching employees about upselling and informing customers about specials.'}

### Using tool calling

In [25]:
tools = [ReviewInfo]
llm = ChatMistralAI(model="mistral-small-2402")
llm_with_tools = llm.bind_tools(tools, tool_choice="any") # use this to force the model to call at least one tool, supported by OpenAI, MistralAI, FireworksAI, and Groq

In [26]:
query = review_prompt.format(user_input=sample_text)
ai_message = llm_with_tools.invoke(query)

In [28]:
ai_message.additional_kwargs['tool_calls']

[{'id': 'C7Dp77JDH',
  'function': {'name': 'ReviewInfo',
   'arguments': '{"summary": "The customer was disappointed with the service, pricing, and quality of food.", "food": "negative", "service": "negative", "price": "negative", "ambience": "none", "other": "The customer suggests the employees should upsell and inform about specials. Also, the customer finds the salads overpriced."}'}}]

In [34]:
# retry with another supported model
llm = ChatMistralAI(model="open-mixtral-8x22b")
llm_with_tools = llm.bind_tools(tools, tool_choice="any")
query = review_prompt.format(user_input=sample_text)
try:
    ai_message = llm_with_tools.invoke(query)
except Exception as e:
    print(e)
ai_message.additional_kwargs['tool_calls']

[{'id': 'csG0rYxTa',
  'function': {'name': 'ReviewInfo',
   'arguments': '{"summary": "The customer was disappointed with their experience due to poor service and overpriced food, and felt they could have saved money with a special offer.", "food": "negative", "service": "negative", "price": "negative", "ambience": "none", "other": "The customer suggests the owner should train employees on upselling and informing customers about specials."}'}}]

In [31]:
# test with another model that does not support function calling:
# using Mistral API, it will tell us that the model does not support function calling
# this is a good way to test if the model supports function calling or not without second guessing

llm = ChatMistralAI(model="open-mistral-7b")
chain = review_prompt | llm.with_structured_output(schema=ReviewInfo)
sample_text = df_sample['text'][15]
try:
    review_structured = chain.invoke({"user_input": sample_text})
    review_structured.dict()
except Exception as e:
    print(e)


llm_with_tools = llm.bind_tools(tools, tool_choice="any")
query = review_prompt.format(user_input=sample_text)
try:
    ai_message = llm_with_tools.invoke(query)
except Exception as e:
    print(e)

Error response 400 while fetching https://api.mistral.ai/v1/chat/completions: {"object":"error","message":"Function calling is not enabled for this model","type":"invalid_request_error","param":null,"code":null}
Error response 400 while fetching https://api.mistral.ai/v1/chat/completions: {"object":"error","message":"Function calling is not enabled for this model","type":"invalid_request_error","param":null,"code":null}


In [23]:
llm.with_structured_output(schema=ReviewInfo).first.kwargs

{'tools': [{'type': 'function',
   'function': {'name': 'ReviewInfo',
    'description': 'Information extracted from the text.',
    'parameters': {'type': 'object',
     'properties': {'summary': {'description': 'A one-sentence summary of the review, maximum 50 words.',
       'type': 'string'},
      'food': {'description': 'Classify the customer sentiment regarding the food of the restaurant in the review as positive, negative, or neutral. If there is no mention of food, return none.',
       'enum': ['positive', 'negative', 'neutral', 'none'],
       'type': 'string'},
      'service': {'description': 'Classify the customer sentiment regarding the service of the restaurant in the review as positive, negative, or neutral. If there is no mention of service, return none.',
       'enum': ['positive', 'negative', 'neutral', 'none'],
       'type': 'string'},
      'price': {'description': 'Classify the customer evaluation regarding the pricing of the restaurant in the review as positiv

In [None]:
# To-dos:
# use "mistral-7b" on snowflake, it will return result as nothing is wrong
# the 2 models on Azure under serverless API are Small and Large, which are commercial models and support functgion calling.