In [None]:
import boto3, json, math
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from langchain.embeddings import BedrockEmbeddings
import boto3
import pandas as pd
bedrock_client = boto3.client(
    service_name="bedrock-runtime",
    region_name='us-east-1',
)
embeddings = BedrockEmbeddings(
            client=bedrock_client, 
            model_id="amazon.titan-embed-text-v2:0"
        )
vector_store = FAISS.load_local('complaints.vs',embeddings,allow_dangerous_deserialization=True)

In [1]:
from rag_functions import getContext,getResponse

  embeddings = BedrockEmbeddings(


In [2]:
query_query = 'Summarise the complaints by the client region'

In [3]:
contxt_df = getContext(query_query)

{'ResponseMetadata': {'RequestId': '9424ff23-f42e-40da-8a96-ae399fbcb64c', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Sat, 18 Jan 2025 21:12:11 GMT', 'content-type': 'application/json', 'content-length': '600', 'connection': 'keep-alive', 'x-amzn-requestid': '9424ff23-f42e-40da-8a96-ae399fbcb64c'}, 'RetryAttempts': 0}, 'output': {'message': {'role': 'assistant', 'content': [{'text': '<thinking> The user wants to summarize complaints by the client region. This means I need to filter complaints based on the client region. Since the user did not specify a particular region or number of documents, I will use the default settings. </thinking>\n'}, {'toolUse': {'toolUseId': 'tooluse_nHCh0WqNTIOU8GSXoriF_Q', 'name': 'identify_complaints_filters', 'input': {'x': {'client_region': ['MC', 'LC', 'ICB']}, 'y': 100}}}]}}, 'stopReason': 'tool_use', 'usage': {'inputTokens': 841, 'outputTokens': 168, 'totalTokens': 1009}, 'metrics': {'latencyMs': 1373}}
filter by metadata: {'client_region': ['MC'

In [5]:
response = getResponse(query_query,contxt_df)

In [6]:
response

"Here is a summary of the complaints by client region:\n\n### ICB (International Corporate Banking)\n- **Payments**:\n  - Xi Solutions Group: A scheduled payment was not executed, causing a breach of contract with a client.\n  - Xi Prime Solutions: A scheduled payment was not executed, causing a breach of contract with a client.\n  - Lambda Trade Solutions: A payment to a foreign supplier was not completed on time, resulting in a fine.\n  - Lambda Prime Trade: A payment to a foreign supplier was not completed on time, resulting in a fine.\n  - Beta Prime Ltd: An international payment was not completed on time, resulting in a penalty from the recipient.\n  - Rho Global Corp: An international payment was incorrectly converted, resulting in a financial loss.\n\n### MC (Mainstream Corporate)\n- **Customer Service**:\n  - Omega Holdings Ltd: Unable to reach a representative for over a week regarding an account issue.\n  - Tech Innovators Corp: The customer service team was unresponsive to q

In [None]:
def call_bedrock(message_list,system_prompts, tool_list,extract_filter=True):
    session = boto3.Session()

    bedrock = session.client(service_name='bedrock-runtime')
    if extract_filter:
        response = bedrock.converse(
            modelId="amazon.nova-pro-v1:0",
            messages=message_list,
            system= [{ 'text':system_prompts  }],
            inferenceConfig={
                "maxTokens": 2000,
                "temperature": 0.2
            },
            toolConfig={ "tools": tool_list }
        )
    else:
            response = bedrock.converse(
            modelId="amazon.nova-pro-v1:0",
            messages=message_list,
            system= [{ 'text':system_prompts  }],
            inferenceConfig={
                "maxTokens": 2000,
                "temperature": 0.2
            }
        )
        
        
    return response


In [None]:

tool_list =[
    {
        "toolSpec": {
            "name": "identify_complaints_filters",
            "description": """Your job is to first look at the query's query and determine if it requires filtering the complaints first based off the following columns 
            client_name - name of the company making the complaint, you will need to use the like filter since users might not type if the exact name
            client_region - possible values ['MC','LC','ICB'] where MC= mid-corporate, LC = Large corporate, ICB = International corprorate
            complaint_date - date of the complaint

            Note: if you're not sure then don't output anything. This is used for metadata filter for the vectorstore. There are two outputs x and y, where x is the column filters and y is the number of documents to return - if user doesnt specify then default is 100

            Example1: user_query: Show me all complaints in region MC
            output: = {'metadata_filter': {client_region": "MC"},'k_filter':100}

            Example2: user_query: Show me all complaints in region LC and after 15th June 2024
            output: {'metadata_filter':{"client_region": "MC","complaint_date": ">2024-06-15"},'k_filter':100}


            Example3: user_query: Show me 5 complaints in regions MC and LC
            output: {'metadata_filter':{client_region": ["MC",'LC']},'k_filter':5}          
            """,
            "inputSchema": {
                "json": {
                    "type": "object",
                    "properties": {
                        "x": {
                            "type": "dict",
                            "description": """output filter as a dict e.g. {client_region": ["MC",'LC']}  or {client_region": "MC"}"""
                        },
                        "y": {
                            "type": "integer",
                            "description": """Filter for the number of documents to return. if user query doesnt specify then default is 100"""
                        }
                    },
                    "required": ["x","y"]
                }
            }
        }}
]


In [None]:
def getContext(user_prompt:str,vector_store,tool_list):
    system_message_filter = """You are an AI assistant within a corporate bank in the Complaints team. Your role is to retrieve back the complaints based off the user query. 
    Your first job is always to breakdown the user query (using the tool identify_complaints_filters) """
    message_list = [
            {
                "role": "user",
                "content": [ { "text": user_prompt } ]
            }
        ]

    filter_response = call_bedrock(message_list=message_list,system_prompts=system_message_filter,tool_list=tool_list,extract_filter=True)
    print(filter_response)
    try:
        metadata_filter = filter_response['output']['message']['content'][1]['toolUse']['input']['x']
        k_filter = filter_response['output']['message']['content'][1]['toolUse']['input']['y']
        print(f"filter by metadata: {metadata_filter} and k: {k_filter}")
        retriever = vector_store.as_retriever(search_kwargs={'filter': metadata_filter, 'k':k_filter})
    except:
        print('cannot extract out filters so going default')
        retriever = vector_store.as_retriever(search_kwargs= {'k':100})

    docs = retriever.invoke(user_prompt)
    
    master_df = pd.DataFrame()
    for doc in docs:
        test=doc.metadata
        test.update({'complaint_text':doc.page_content})
        temp_df = pd.DataFrame([test.values()],columns=test.keys())
        master_df = pd.concat([master_df,temp_df])
    return master_df

In [None]:
def getResponse(user_prompt:str,vector_store,tool_list):
    message_list = [
            {
                "role": "user",
                "content": [ { "text": user_prompt } ]
            }
        ]

    context_df = getContext(user_prompt,vector_store,tool_list)

    
    rag_system_message = f"""
    System: You are an AI assistant in a corporate bank and your job is to answer the users query around complaints using only the context only you should mainly use the complaint_text column to generate answer but can use other columns to check the user query. 
    Human: Here is a set of context, contained in <context> tags:
    
    <context>
    {context_df.to_csv(index=False)}
    </context>
    
     If you don't know the answer, just say that you don't know, don't try to make up an answer.
    """
    
    response = call_bedrock(message_list=message_list,system_prompts=rag_system_message,tool_list=None,extract_filter=False)
    return response['output']['message']['content'][0]['text']

In [None]:
query_query='is there any complaints relating to supply chain for clients in MC region'

In [None]:
query_query = 'Summarise the complaints by the client region'

In [None]:
response = getResponse(user_prompt=query_query,vector_store=vector_store,tool_list=tool_list)


In [None]:
response

In [None]:
class ToolError(Exception):
    pass


In [None]:
def get_tool_result(tool_use_block):

    tool_use_name = tool_use_block['name']
            
    print(f"Using tool {tool_use_name}")
    
    # Note: We're deliberately excluding tangent so something magical can happen
    try:
        return [tool_use_block['input']['x'], tool_use_block['input']['y']]
    except:
        raise ToolError(f"Tooluse input does not contain valid filter terms")


In [None]:
def handle_response(response_message):
    
    response_content_blocks = response_message['content']
    
    follow_up_content_blocks = []
    
    for content_block in response_content_blocks:
        if 'toolUse' in content_block:
            tool_use_block = content_block['toolUse']
            
            try:
                tool_result_value = get_tool_result(tool_use_block)
                
                if tool_result_value is not None:
                    follow_up_content_blocks.append({
                        "toolResult": {
                            "toolUseId": tool_use_block['toolUseId'],
                            "content": [
                                { "json": { "result": tool_result_value } }
                            ]
                        }
                    })
                
            except ToolError as e:
                follow_up_content_blocks.append({ 
                    "toolResult": {
                        "toolUseId": tool_use_block['toolUseId'],
                        "content": [  { "text": repr(e) } ],
                        "status": "error"
                    }
                })
        
    
    if len(follow_up_content_blocks) > 0:
        
        follow_up_message = {
            "role": "user",
            "content": follow_up_content_blocks,
        }
        
        return follow_up_message
    else:
        return None


In [None]:
test = run_loop(query_query,tool_list)

In [None]:
test

In [None]:

rag_system_message = f"""
System: You are an AI assistant in a corporate bank and your job is to answer the users query around complaints using only the context only you should mainly use the complaint_text column to generate answer but can use other columns to check the user query. 
Human: Here is a set of context, contained in <context> tags:

<context>
{master_df.to_csv(index=False)}
</context>

 If you don't know the answer, just say that you don't know, don't try to make up an answer.
"""



In [None]:

rag_system_message = f"""
System: You are an AI assistant in a corporate bank and your job is to answer the users query around complaints using only the context only you should mainly use the complaint_text column to generate answer but can use other columns to check the user query. 
Human: Here is a set of context, contained in <context> tags:

<context>
{master_df.to_csv(index=False)}
</context>

 If you don't know the answer, just say that you don't know, don't try to make up an answer.
"""



In [None]:
def run_loop(prompt, tool_list):
    MAX_LOOPS = 6
    loop_count = 0
    continue_loop = True
    system_message_filter = """You are an AI assistant within a corporate bank in the Complaints team. Your role is to retrieve back the complaints based off the user query. 
Your first job is always to breakdown the user query (using the tool identify_complaints_filters) to determine if you need to filter the metadata in the vectorstore first"""

    message_list = [
        {
            "role": "user",
            "content": [ { "text": prompt } ]
        }
    ]
    
    while continue_loop:
        response = call_bedrock(message_list=message_list,system_prompts=system_message_filter,tool_list=tool_list)
        
        response_message = response['output']['message']
        message_list.append(response_message)
        
        loop_count = loop_count + 1
        
        if loop_count >= MAX_LOOPS:
            print(f"Hit loop limit: {loop_count}")
            break
        
        follow_up_message = handle_response(response_message)
        
        if follow_up_message is None:
            # No remaining work to do, return final response to user
            continue_loop = False 
        else:
            message_list.append(follow_up_message)
            
    return message_list
