In [2]:
from rag_functions_toolUse import run_loop
import json
message_list = [
    {
        "role": "user",
        "content": [ { "text": "Show me all complaints in region MC after 15th June 2024" } ]
    }
]

system_prompts = """You are an AI assistant within a corporate bank in the Complaints team. 
Your role is to retrieve and analyze complaints based on user queries. 
Use the provided tools to filter, retrieve, and generate responses."""


messages = run_loop("Summarise the complaints in region MC",system_prompts)

print("\nMESSAGES:\n")
print(json.dumps(messages, indent=4))


Cannot extract filters, using default settings.

MESSAGES:



NameError: name 'json' is not defined

In [3]:
messages

[{'role': 'user',
  'content': [{'text': 'Summarise the complaints in region MC'}]},
 {'role': 'assistant',
  'content': [{'text': '<thinking> To summarize the complaints in region MC, I need to first identify the appropriate filters for the complaints based on the region. Then, I will retrieve the complaints data using these filters and finally generate a summary response based on the retrieved data. </thinking>\n'},
   {'toolUse': {'toolUseId': 'tooluse_gbRHHbCUS5yPgvMwHlk6_Q',
     'name': 'identify_complaints_filters',
     'input': {'filter_dict': {'metadata_filter': {'client_region': ['MC']},
       'k_filter': 5}}}}]},
 {'role': 'user',
  'content': [{'toolResult': {'toolUseId': 'tooluse_gbRHHbCUS5yPgvMwHlk6_Q',
     'content': [{'json': {'filter_terms': {'filter_dict': {'metadata_filter': {'client_region': ['MC']},
          'k_filter': 5}}}}]}}]},
 {'role': 'assistant',
  'content': [{'text': '<thinking> Now that I have the filter terms, I will use them to retrieve the complai

In [None]:
import boto3, json, math
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from langchain.embeddings import BedrockEmbeddings
import pandas as pd

# Initialize the Bedrock runtime client
bedrock_client = boto3.client(
    service_name="bedrock-runtime",
    region_name='us-east-1',
)

# Create an instance of BedrockEmbeddings using the Bedrock client
embeddings = BedrockEmbeddings(
    client=bedrock_client, 
    model_id="amazon.titan-embed-text-v2:0"
)
try:
# Load the previously created FAISS vector store for complaints
    vector_store = FAISS.load_local('complaints.vs', embeddings, allow_dangerous_deserialization=True)
except:
    import faiss
    from langchain_community.docstore.in_memory import InMemoryDocstore
    from langchain_community.vectorstores import FAISS
    from uuid import uuid4
    from langchain_core.documents import Document
    documents = []
    df = pd.read_csv(r'complaints_fake.csv')
    for index, row in df.iterrows():
        document_temp = Document(page_content=row['complaint_text'],metadata={"client_name": row['client_name'],"client_region": row['client_region'],"theme": row['theme'],"complaint_date": row['complaint_date']})
        documents.append(document_temp)
    vector_store = FAISS.from_documents(documents=documents,embedding=embeddings)


In [None]:

def getContext(user_prompt:str,filter_terms:dict):
    """
    This function retrieves the context (complaints) based on the user's query.

    Parameters:
    user_prompt (str): The user's query.
    vector_store: The FAISS vector store containing the complaints.
    tool_list (list): The list of tools to use for filtering.

    Returns:
    pd.DataFrame: A DataFrame containing the retrieved complaints.
    """
    try:
        # Extract the metadata filter and the number of documents to return from the filter response
        metadata_filter = filter_terms['metadata_filter']
        k_filter = filter_terms['k_filter']
        print(f"filter by metadata: {metadata_filter} and k: {k_filter}")
        
        # Create a retriever with the extracted filters
        retriever = vector_store.as_retriever(search_kwargs={'filter': metadata_filter, 'k':k_filter})
    except:
        print('cannot extract out filters so going default')
        # Create a retriever with default settings if filters cannot be extracted
        retriever = vector_store.as_retriever(search_kwargs= {'k':100})

    # Retrieve the documents using the retriever
    docs = retriever.invoke(user_prompt)
    
    # Create a DataFrame to store the retrieved complaints
    master_df = pd.DataFrame()
    for doc in docs:
        test=doc.metadata
        test.update({'complaint_text':doc.page_content})
        temp_df = pd.DataFrame([test.values()],columns=test.keys())
        master_df = pd.concat([master_df,temp_df])
    
    return master_df

In [None]:

def getResponse(user_prompt:str,context_df:str):
    """
    This function generates a response to the user's query using the retrieved context.

    Parameters:
    user_prompt (str): The user's query.
    context_df (pd.DataFrame): The DataFrame containing the retrieved context.

    Returns:
    str: The generated response.
    """
    
    message_list = [
            {
                "role": "user",
                "content": [ { "text": user_prompt } ]
            }
        ]
    
    # Create a system message with the context
    rag_system_message = f"""
    System: You are an AI assistant in a corporate bank and your job is to answer the users query around complaints using only the context only you should mainly use the complaint_text column to generate answer but can use other columns to check the user query. 
    Human: Here is a set of context, contained in <context> tags:
    
    <context>
    {context_df}
    </context>
    
     If you don't know the answer, just say that you don't know, don't try to make up an answer.
    """
      # Call the Bedrock converse API with tool configuration
    response = bedrock_client.converse(
        modelId="amazon.nova-pro-v1:0",  # ID of the model to use
        messages=message_list,  # Messages to send to the model
        system=[{ 'text': rag_system_message }],  # System prompts
        inferenceConfig={  # Inference configuration
            "maxTokens": 2000,  # Maximum number of tokens to generate
            "temperature": 0.1  # Temperature for response randomness
        }
    )
    
    # Return the generated text response
    return response['output']['message']['content'][0]['text']

In [None]:
import boto3
import json

# Initialize the Bedrock runtime client
bedrock_client = boto3.client(
    service_name="bedrock-runtime",
    region_name='us-east-1',
)

def call_bedrock(message_list, system_prompts, tool_list):
    """
    This function interacts with Amazon Bedrock Converse API to generate responses using a specified model and tools.

    Parameters:
    message_list (list): A list of messages to send to the model.
    system_prompts (str): System prompts to guide the model's response.
    tool_list (list): A list of tools to be used by the model.

    Returns:
    dict: The response from the Bedrock service.
    """
    
    # Call the Bedrock converse API with tool configuration
    response = bedrock_client.converse(
        modelId="amazon.nova-pro-v1:0",  # ID of the model to use
        messages=message_list,  # Messages to send to the model
        system=[{ 'text': system_prompts }],  # System prompts
        inferenceConfig={  # Inference configuration
            "maxTokens": 2000,  # Maximum number of tokens to generate
            "temperature": 0.1  # Temperature for response randomness
        },
        toolConfig={ "tools": tool_list }  # Tool configuration
    )
    
    # Return the response from the Bedrock service
    return response

# Define the tool list
tool_list =[
    {
        "toolSpec": {
            "name": "identify_complaints_filters",
            "description": """"Your job is to first look at the query's query and determine if it requires filtering the complaints first based off the following columns 
            client_name - name of the company making the complaint, you will need to use the like filter since users might not type if the exact name
            client_region - possible values ['MC','LC','ICB'] where MC= mid-corporate, LC = Large corporate, ICB = International corprorate
            complaint_date - date of the complaint

            Note: if you're not sure then don't output anything. This is used for metadata filter for the vectorstore. There are two outputs x and y, where x is the column filters and y is the number of documents to return - if user doesnt specify then default is 100

            Example1: user_query: Show me all complaints in region MC
            output: = {'metadata_filter': {client_region": "MC"},'k_filter':100}

            Example2: user_query: Show me all complaints in region LC and after 15th June 2024
            output: {'metadata_filter':{"client_region": "MC","complaint_date": ">2024-06-15"},'k_filter':100}


            Example3: user_query: Show me 5 complaints in regions MC and LC
            output: {'metadata_filter':{client_region": ["MC",'LC']},'k_filter':5}  """,
            "inputSchema": {
                "json": {
                    "type": "object",
                    "properties": {
                        "filter_dict": {
                            "type": "dict",
                            "description": """output filter as a dict e.g. {'metadata_filter':{client_region": ["MC",'LC']},'k_filter':5} """
                        }
                    },
                    "required": ["filter_dict"]
                }
            }
        }
    },
    {
        "toolSpec": {
            "name": "get_complaintsData",
            "description": """This function retrieves the context (complaints) based on the user's query.""",
            "inputSchema": {
                "json": {
                    "type": "object",
                    "properties": {
                        "user_query": {
                            "type": "str",
                            "description": """original user query"""
                        },
                         "filter_terms": {
                            "type": "dict",
                            "description": """filter term from the output of tool identify_complaints_filters"""
                        }
                    },
                    "required": ["user_query","filter_terms"]
                }
            }
        }
    },
    {
        "toolSpec": {
            "name": "generateResponse",
            "description": """This function generates a response based on the user's query and the retrieved context (complaints) from the tool get_complaintsData.""",
            "inputSchema": {
                "json": {
                    "type": "object",
                    "properties": {
                        "user_query": {
                            "type": "str",
                            "description": """original user query"""
                        },
                         "context_df": {
                            "type": "string",
                            "description": """context df from the output of tool get_complaintsData"""
                        }
                    },
                    "required": ["user_query","context_df"]
                }
            }
        }
    }
]

# Define the message list and system prompts
message_list = [
    {
        "role": "user",
        "content": [ { "text": "Show me all complaints in region MC after 15th June 2024" } ]
    }
]

system_prompts = """You are an AI assistant within a corporate bank in the Complaints team. 
Your role is to retrieve and analyze complaints based on user queries. 
Use the provided tools to filter, retrieve, and generate responses."""

# # Process tool uses iteratively
# final_response = process_tool_uses(message_list, system_prompts, tool_list)
# print(f"Final response: {final_response}")

In [None]:
def handle_response(response_message):
    
    response_content_blocks = response_message['content']
    
    follow_up_content_blocks = []
    
    for content_block in response_content_blocks:
        if 'toolUse' in content_block:
            tool_use_block = content_block['toolUse']

            if tool_use_block['name']=='identify_complaints_filters':
                try:
                        follow_up_content_blocks.append({
                            "toolResult": {
                                "toolUseId": tool_use_block['toolUseId'],
                                "content": [
                                    { "json": { "filter_terms": tool_use_block['input'] } }
                                ]
                            }
                        })
                    
                except ToolError as e:
                    follow_up_content_blocks.append({ 
                        "toolResult": {
                            "toolUseId": tool_use_block['toolUseId'],
                            "content": [  { "text": repr(e) } ],
                            "status": "error"
                        }
                    })
            if tool_use_block['name']=='get_complaintsData':
                user_query =  tool_use_block['input']['user_query']
                filter_terms = tool_use_block['input']['filter_terms']
                try:
                    context_df = getContext(user_query,filter_terms)
                    follow_up_content_blocks.append({
                            "toolResult": {
                                "toolUseId": tool_use_block['toolUseId'],
                                "content": [
                                    { "json": { "context_df": context_df.to_csv(index=None)} }
                                ]
                            }
                        })
                except ToolError as e:
                    follow_up_content_blocks.append({ 
                        "toolResult": {
                            "toolUseId": tool_use_block['toolUseId'],
                            "content": [  { "text": repr(e) } ],
                            "status": "error"
                        }
                    })
            if tool_use_block['name']=='generateResponse':
                user_query =  tool_use_block['input']['user_query']
                context_df_str = tool_use_block['input']['context_df']
                try:
                    response = getResponse(user_query,context_df_str)
                    follow_up_content_blocks.append({
                            "toolResult": {
                                "toolUseId": tool_use_block['toolUseId'],
                                "content": [
                                    { "text": response }
                                ]
                            }
                        })
                except ToolError as e:
                    follow_up_content_blocks.append({ 
                        "toolResult": {
                            "toolUseId": tool_use_block['toolUseId'],
                            "content": [  { "text": repr(e) } ],
                            "status": "error"
                        }
                    })
    if len(follow_up_content_blocks) > 0:
        
        follow_up_message = {
            "role": "user",
            "content": follow_up_content_blocks,
        }
        
        return follow_up_message
    else:
        return None


In [None]:
def run_loop(prompt,system_prompts, tool_list):
    MAX_LOOPS = 10
    loop_count = 0
    continue_loop = True
    
    message_list = [
        {
            "role": "user",
            "content": [ { "text": prompt } ]
        }
    ]
    
    while continue_loop:
        response = call_bedrock(message_list,system_prompts, tool_list)
        
        response_message = response['output']['message']
        message_list.append(response_message)
        
        loop_count = loop_count + 1
        
        if loop_count >= MAX_LOOPS:
            print(f"Hit loop limit: {loop_count}")
            break
        
        follow_up_message = handle_response(response_message)
        
        if follow_up_message is None:
            # No remaining work to do, return final response to user
            continue_loop = False 
        else:
            message_list.append(follow_up_message)
            
    return message_list


In [None]:
message_list = [
    {
        "role": "user",
        "content": [ { "text": "Show me all complaints in region MC after 15th June 2024" } ]
    }
]

system_prompts = """You are an AI assistant within a corporate bank in the Complaints team. 
Your role is to retrieve and analyze complaints based on user queries. 
Use the provided tools to filter, retrieve, and generate responses."""


messages = run_loop("Summarise the complaints in region MC",system_prompts, tool_list)

print("\nMESSAGES:\n")
print(json.dumps(messages, indent=4))


In [None]:
len(messages)

In [None]:
messages[-1]