In [1]:
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
import boto3
import os
from dotenv import load_dotenv
import pandas as pd
import random
from langchain_openai.chat_models import AzureChatOpenAI
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage,SystemMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_experimental.synthetic_data import create_data_generation_chain
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field

from typing import List

from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field, validator
from langchain_openai import ChatOpenAI
from langchain.output_parsers import PydanticOutputParser

In [2]:
load_dotenv()
s3r = boto3.client('s3', 
                   aws_access_key_id=os.environ["DEV_ACCESS_KEY"], 
                   aws_secret_access_key=os.environ["DEV_SECRET_ACCESS_KEY"])

### Download test data from dev s3

In [None]:
name = f"df_test_v1.xlsx"
fullname = f'swapnil/form_selection_core/excel/' + name
s3r.download_file(Bucket = "datainsights-shared-coupadev-com", Key = fullname, Filename = name)

## load test data

In [3]:
test_data_path = "df_test_v1.xlsx"
test_data = pd.read_excel(test_data_path)
test_data.head(2)

Unnamed: 0,instance,easy_form_widget_response_id,updated_at,easy_form_id,easy_form_widget_id,easy_form_response_id,user_submitted_description,easy_form_widget_response_type,backing_attribute,field_name,...,subject_type,status,easy_form_name,easy_form_model,easy_form_status,easy_form_description,form_type,requistion_line_id,header_id,channel
0,cbre,,2023-11-01 11:09:43,,,,"""Warning Trip Hazard"" Sign 210mm x 148mm",,,,...,,,,,,,freeform,5889876.0,3013667.0,freeform
1,monash,,2023-09-11 00:20:35,,,,DEBDEN DAYPLANNER refil,,,,...,,,,,,,freeform,1091278.0,479534.0,freeform


### generate synthetic data

In [88]:
def initialize_llm(**kwargs):
    # initialize llm
    llm = AzureChatOpenAI(
        deployment_name=os.environ["AZURE_OPENAI_DEPLOYMENT"],
        api_key=os.environ["AZURE_OPENAI_API_KEY"],
        azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
        api_version=os.environ["AZURE_OPENAI_API_VERSION"],
        **kwargs
    )
    return llm

llm= initialize_llm(temperature = 0.75)

### define personas

In [89]:
all_personas = ['verbose','concise','step-by-step','casual','formal','spelling-mistake']
personas_attributes = {
    'verbose':
        'writes longer complete sentences that are friendlier and generally include punctuation.',
    'concise':
        'writes direct queries with minimal non-essential text. You usually omit capitalization and filler phrases',
    'step-by-step':
        'summarizes the goal of each step before explaining the detailed instructions.',
    'casual':
        'uses informal language that may not directly reference all the details',
    'formal':
        'likes to provide detailed information and formal language.',
    'spelling-mistake':
        'often makes spelling mistakes'
}

In [90]:
personas_attributes_string = ""
for key, value in personas_attributes.items():
    personas_attributes_string += "{} = {}\n ".format(key, value)

# To remove the last comma and space
personas_attributes_string = personas_attributes_string[:-2]

print(personas_attributes_string)

verbose = writes longer complete sentences that are friendlier and generally include punctuation.
 concise = writes direct queries with minimal non-essential text. You usually omit capitalization and filler phrases
 step-by-step = summarizes the goal of each step before explaining the detailed instructions.
 casual = uses informal language that may not directly reference all the details
 formal = likes to provide detailed information and formal language.
 spelling-mistake = often makes spelling mistakes


In [91]:
prompt = PromptTemplate.from_template("""You are an employee in a corporate business and you have the responsibility to purchase or procure goods or services for your organization.
You will be provided with a description of the item or service you need to request.
Your task is to present this request to an AI Assistant.
Use any crucial information such as quantity, amount, business justification, etc., from the provided fields to create a succinct one-line request.
Keep your request short and simple. Use the given description wisely and incorporate only the significant details.
You may add more information if necessary. If there are any extra preferences provided, utilize them while framing your sentence.
Instructions:
-You are a user whose persona aligns with the given persona description. Behave accordingly.
Item or Service Description: {description}
Persona Description: {persona_description}
Request:""")

generate = create_data_generation_chain(llm, prompt)

In [111]:
item_description =test_data['user_submitted_description'][10].replace('"','')
persona = random.choice(all_personas)
persona_description = personas_attributes[persona]
item_description, persona,persona_description

('Vanguard House - To remove damaged traffic signs and replace with new including 168mm bolt bollard',
 'spelling-mistake',
 'often makes spelling mistakes')

In [123]:
for chunk in generate.stream({
                "description": item_description,
                "persona_description":persona_description,
            }):
    print(chunk, end="")

{'description': 'Vanguard House - To remove damaged traffic signs and replace with new including 168mm bolt bollard', 'persona_description': 'often makes spelling mistakes', 'text': 'Please procure a Vanguard House service to remove damaged traffic signs and replace them with new ones, including a 168mm bolt bollard.'}

In [126]:
generate.invoke({
                "description": item_description,
                "persona_description":persona_description,
            })

{'description': 'Vanguard House - To remove damaged traffic signs and replace with new including 168mm bolt bollard',
 'persona_description': 'often makes spelling mistakes',
 'text': 'Hi AI Assistant, please procure a Vanguard House to remove damaged traffic signs and replace them with new ones, including a 168mm bolt bollard. Thank you!'}

In [113]:
chunk

{'description': 'Vanguard House - To remove damaged traffic signs and replace with new including 168mm bolt bollard',
 'persona_description': 'often makes spelling mistakes',
 'text': 'Dear AI Assistant, please assist in procuring a Vanguard House service to replace damaged traffic signs and install new ones, including a 168mm bolt bollard. Thank you.'}

### reflection

In [114]:
llm_reflection = initialize_llm(temperature=1e-9 , streaming=True)

In [115]:
reflection_template="""You are proficient in analyzing, critiquing, and recommending enhancements to the training data used for educating a chatbot on procurement requests.
The chatbot should be capable of generalizing an array of user inquiries. It's assumed that the chatbot will only cater to users with the following attributes.
{personas_attributes_string}
Below are the specifics regarding the generated query along with the original description utilized to generate the query:
Original Item or Service Description: {description}
Persona Description (Individual making the request): {persona_description}
Generated Query: {text}.

Your task is to analyse and report whether the 'Generated Query' follows the exact attributes of the Persona Description
\n{format_instructions}\n"""

In [116]:
input_reflection={}
input_reflection['personas_attributes_string']=personas_attributes_string
input_reflection['description']=chunk['description']
input_reflection['persona_description']=chunk['persona_description']
input_reflection['text']=chunk['text']
input_reflection

{'personas_attributes_string': 'verbose = writes longer complete sentences that are friendlier and generally include punctuation.\n concise = writes direct queries with minimal non-essential text. You usually omit capitalization and filler phrases\n step-by-step = summarizes the goal of each step before explaining the detailed instructions.\n casual = uses informal language that may not directly reference all the details\n formal = likes to provide detailed information and formal language.\n spelling-mistake = often makes spelling mistakes',
 'description': 'Vanguard House - To remove damaged traffic signs and replace with new including 168mm bolt bollard',
 'persona_description': 'often makes spelling mistakes',
 'text': 'Dear AI Assistant, please assist in procuring a Vanguard House service to replace damaged traffic signs and install new ones, including a 168mm bolt bollard. Thank you.'}

In [117]:
class IsQueryValid(BaseModel):
    observation: str = Field(description="Suggestion to improve the generated query take to make the query accurately follow persona atributes")
    reason_for_observation:str = Field(description="detailed reason for the specific observation")
    action: str = Field(description="action to take based on observation")
    valid: bool = Field(description="Whether the generated query for item/service description accurately follows persona atributes")

# Set up a parser + inject instructions into the prompt template.
reflection_parser = PydanticOutputParser(pydantic_object=IsQueryValid)

print(reflection_parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"observation": {"title": "Observation", "description": "Suggestion to improve the generated query take to make the query accurately follow persona atributes", "type": "string"}, "reason_for_observation": {"title": "Reason For Observation", "description": "detailed reason for the specific observation", "type": "string"}, "action": {"title": "Action", "description": "action to take based on observation", "type": "string"}, "valid": {"title": "Valid", "description": "Whether the generated query for item/service description accurat

In [118]:
input_reflection

{'personas_attributes_string': 'verbose = writes longer complete sentences that are friendlier and generally include punctuation.\n concise = writes direct queries with minimal non-essential text. You usually omit capitalization and filler phrases\n step-by-step = summarizes the goal of each step before explaining the detailed instructions.\n casual = uses informal language that may not directly reference all the details\n formal = likes to provide detailed information and formal language.\n spelling-mistake = often makes spelling mistakes',
 'description': 'Vanguard House - To remove damaged traffic signs and replace with new including 168mm bolt bollard',
 'persona_description': 'often makes spelling mistakes',
 'text': 'Dear AI Assistant, please assist in procuring a Vanguard House service to replace damaged traffic signs and install new ones, including a 168mm bolt bollard. Thank you.'}

In [119]:
reflection_prompt = PromptTemplate(
    template=reflection_template,
    input_variables=['personas_attributes_string','description', 'persona_description', 'text'],
    partial_variables={"format_instructions": reflection_parser.get_format_instructions()},
)

In [120]:
reflection_chain = reflection_prompt | llm_reflection | reflection_parser

In [121]:
reflection_result = reflection_chain.invoke(input_reflection)
reflection_result

IsQueryValid(observation='The generated query does not accurately follow the persona attributes.', reason_for_observation='The generated query is too formal and does not contain any spelling mistakes, which contradicts the persona description of often making spelling mistakes.', action='Revise the generated query to include spelling mistakes and use more informal language.', valid=False)

### create graph

In [None]:
from typing import TypedDict, Annotated, Sequence
import operator
from langchain_core.messages import BaseMessage

class AgentState(TypedDict):
    messages:Annotated[Sequence[BaseMessage],operator.add]
    

In [125]:
chunk

{'description': 'Vanguard House - To remove damaged traffic signs and replace with new including 168mm bolt bollard',
 'persona_description': 'often makes spelling mistakes',
 'text': 'Please procure a Vanguard House service to remove damaged traffic signs and replace them with new ones, including a 168mm bolt bollard.'}

In [129]:
reflection_result


IsQueryValid(observation='The generated query does not accurately follow the persona attributes.', reason_for_observation='The generated query is too formal and does not contain any spelling mistakes, which contradicts the persona description of often making spelling mistakes.', action='Revise the generated query to include spelling mistakes and use more informal language.', valid=False)

In [131]:
def generate_synthetic_query(state):
    messages = state['messages']
    args ={}
    args['description']=messages['description']
    args['persona_description']=messages['persona_description']
    args['text']=messages['text']
    generated_query = generate.invoke(args)['text']
    return {"messages":[generated_query]}

def reset_state(state):
    pass

def formatted_reflection_res(reflection_result):
    formatted_list_messages =[]
    formatted_list_messages.append(f'observation - {reflection_result.observation}')
    formatted_list_messages.append(f'reason_for_observation - {reflection_result.reason_for_observation}')
    formatted_list_messages.append(f'action - {reflection_result.action}')
    formatted_list_messages.append(f'valid - {reflection_result.valid}')
    return formatted_list_messages

def reflect(state):
    messages = state['messages']
    
    args ={}
    args['description']=messages['description']
    args['persona_description']=messages['persona_description']
    args['text']=messages['text']
    reflection_res = reflection_chain.invoke(input_reflection)
    formatted_res =formatted_reflection_res(reflection_res)
    return {"messages":formatted_res}

In [None]:
def should_continue(state):
    messages = state['messages']
    last_message = messages[-1]
    if "function_call" not in last_message.additional_kwargs:
        return "end"
    else:
        return "continue"

def call_model(state):
    messages = state['messages']
    response =llm_with_tools.invoke(messages)
    # we return a list that will get added to the existing list
    return {"messages":[response]}

def call_tool(state):
    messages = state['messages']
    last_message = messages[-1]
    
    action = ToolInvocation(
        tool = last_message.additional_kwargs["function_call"]["name"],
        tool_input = json.loads(last_message.additional_kwargs["function_call"]["arguments"]),
    )
    
    response  = tool_executor.invoke(action)
    
    # use the response to create a function message
    function_message = FunctionMessage(content = str(response), name = action.tool)
    
    # we return a list that will get added to the existing list
    return {"messages":[function_message]}

In [None]:
workflow = StateGraph(AgentState)

# create nodes
workflow.add_node("agent",call_model)
workflow.add_node("action",call_tool)

workflow.set_entry_point("agent")

# add conditional edge
workflow.add_conditional_edges(
    "agent",
    should_continue,
    {"continue":"action",
    "end":END
    }
)

# add a normal edge
# because we always want to go to the agent after going to the action
workflow.add_edge("action","agent")

app = workflow.compile()

### Archive

In [None]:
reflection_template="""You are proficient in analyzing, critiquing, and recommending enhancements to the training data used for educating a chatbot on procurement requests.
The chatbot should be capable of generalizing an array of user inquiries. It's assumed that the chatbot will only cater to users with the following attributes.
{personas_attributes_string}
For instance, if the Persona Description indicates that a user tends to make spelling errors, then at least one spelling mistake should be present in the generated query, otherwise, the query would be deemed invalid.
Similarly, if the persona possesses different traits, the generated query should align with those traits.
Below are the specifics regarding the generated query along with the original description utilized to generate the query:
Original Item or Service Description: {description}
Persona Description (Individual making the request): {persona_description}
Generated Query: {text}.
\n{format_instructions}\n"""

In [None]:


ret = generate.invoke({
                "description": item_description,
                "persona": persona,
                "persona_description":persona_description,
            })