In [134]:
import boto3, json, math
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from langchain.embeddings import BedrockEmbeddings
import boto3
bedrock_client = boto3.client(
    service_name="bedrock-runtime",
    region_name='us-east-1',
)
embeddings = BedrockEmbeddings(
            client=bedrock_client, 
            model_id="amazon.titan-embed-text-v2:0"
        )
vector_store = FAISS.load_local('complaints.vs',embeddings,allow_dangerous_deserialization=True)

In [213]:
def call_bedrock(message_list,system_prompts, tool_list=None):
    session = boto3.Session()

    bedrock = session.client(service_name='bedrock-runtime')
    if tool_list is None:
        response = bedrock.converse(
            modelId="amazon.nova-pro-v1:0",
            messages=message_list,
            system= [{ 'text':system_prompts  }],
            inferenceConfig={
                "maxTokens": 2000,
                "temperature": 0.2
            }
        )
    else:
            response = bedrock.converse(
            modelId="amazon.nova-pro-v1:0",
            messages=message_list,
            system= [{ 'text':system_prompts  }],
            inferenceConfig={
                "maxTokens": 2000,
                "temperature": 0.2
            },
            toolConfig={ "tools": tool_list }
        )
        
        
    return response


In [140]:

tool_list =[
    {
        "toolSpec": {
            "name": "identify_complaints_filters",
            "description": """Your job is to first look at the query's query and determine if it requires filtering the complaints first based off the following columns 
            client_name - name of the company making the complaint, you will need to use the like filter since users might not type if the exact name
            client_region - possible values ['MC','LC','ICB'] where MC= mid-corporate, LC = Large corporate, ICB = International corprorate
            complaint_date - date of the complaint

            Note: if you're not sure then don't output anything. This is used for metadata filter for the vectorstore. You will need to use the following
            $eq (equals)
            $neq (not equals)
            $gt (greater than)
            $lt (less than)
            $gte (greater than or equal)
            $lte (less than or equal)
            $in (membership in list)
            $nin (not in list)
            $and (all conditions must match)
            $or (any condition must match)
            $not (negation of condition)

            Example1: user_query: Show me all complaints in region MC
            output: = {"client_region": {"$eq": "MC"}}

            Example2: user_query: Show me all complaints in region LC and after 15th June 2024
            output: = {"client_region": {"$eq": "MC"},"client_region": {"$gt": "2024-06-15"}}

            
            """,
            "inputSchema": {
                "json": {
                    "type": "object",
                    "properties": {
                        "x": {
                            "type": "dict",
                            "description": """output filter as a dict e.g. {"client_region": {"$eq": "MC"},"client_region": {"$gt": "2024-06-15"}}"""
                        }
                    },
                    "required": ["x"]
                }
            }
        }}
]


In [141]:

tool_list =[
    {
        "toolSpec": {
            "name": "identify_complaints_filters",
            "description": """Your job is to first look at the query's query and determine if it requires filtering the complaints first based off the following columns 
            client_name - name of the company making the complaint, you will need to use the like filter since users might not type if the exact name
            client_region - possible values ['MC','LC','ICB'] where MC= mid-corporate, LC = Large corporate, ICB = International corprorate
            complaint_date - date of the complaint

            Note: if you're not sure then don't output anything. This is used for metadata filter for the vectorstore. There are two outputs x and y, where x is the column filters and y is the number of documents to return - if user doesnt specify then default is 100

            Example1: user_query: Show me all complaints in region MC
            output: = {client_region": "MC"}
            y=100

            Example2: user_query: Show me all complaints in region LC and after 15th June 2024
            output: 
            x = {"client_region": "MC","complaint_date": ">2024-06-15"}
            y=100

            Example3: user_query: Show me 5 complaints in regions MC and LC
            output:
            x = {client_region": ["MC",'LC']}
            y = 5
            
            """,
            "inputSchema": {
                "json": {
                    "type": "object",
                    "properties": {
                        "x": {
                            "type": "dict",
                            "description": """output filter as a dict e.g. {client_region": ["MC",'LC']}  or {client_region": "MC"}"""
                        },
                        "y": {
                            "type": "integer",
                            "description": """Filter for the number of documents to return. if user query doesnt specify then default is 100"""
                        }
                    },
                    "required": ["x","y"]
                }
            }
        }}
]


In [152]:
system_message = """You are an AI assistant within a corporate bank in the Complaints team. Your role is to retrieve back the complaints based off the user query. 
Your first job is always to breakdown the user query (using the tool identify_complaints_filters) to determine if you need to filter the metadata in the vectorstore first if yes, then this is passed to the tool retrieve_from_vectorstore"""
query_example = 'is there any complaints relating to payments for clients in MC region'
message_list = [
        {
            "role": "user",
            "content": [ { "text": query_example } ]
        }
    ]



In [153]:
response = call_bedrock(message_list=message_list,system_prompts=system_message,tool_list=tool_list)

In [159]:
try:
    metadata_filter = response['output']['message']['content'][1]['toolUse']['input']['x']
    k_filter = response['output']['message']['content'][1]['toolUse']['input']['y']
    print(f"filter by metadata: {metadata_filter} and k: {k_filter}")
    retriever = vector_store.as_retriever(search_kwargs={'filter': metadata_filter, 'k':k_filter})
except:
    print('cannot extract out filters so going default')
    retriever = vector_store.as_retriever(search_kwargs= {'k':100})

filter by metadata: {'client_region': 'MC'} and k: 100


In [160]:
docs = retriever.invoke(query_example)

In [161]:
docs

[Document(metadata={'client_name': 'Gamma Prime Corp.', 'client_region': 'MC', 'theme': 'Account Management', 'complaint_date': '2024-04-16'}, page_content="Our account was incorrectly linked to another client's account, causing confusion."),
 Document(metadata={'client_name': 'Tech Innovators Corp.', 'client_region': 'MC', 'theme': 'Customer Service', 'complaint_date': '2024-05-13'}, page_content='The customer service team was unresponsive to our queries regarding account discrepancies.'),
 Document(metadata={'client_name': 'Omega Holdings Ltd.', 'client_region': 'MC', 'theme': 'Customer Service', 'complaint_date': '2024-04-19'}, page_content='We have been unable to reach a representative for over a week regarding our account issue.')]

In [104]:
retriever = vector_store.as_retriever(search_kwargs={'filter':{'client_region':'MC'} })
docs = retriever.invoke('is there any complaints relating to supply chain for clients in MC region')

In [199]:
# master_df = pd.DataFrame()
for doc in docs:
    test=doc.metadata
    test.update({'complaint_text':doc.page_content})
    temp_df = pd.DataFrame([test.values()],columns=test.keys())
    master_df = pd.concat([master_df,temp_df])

In [211]:

rag_system_message = f"""
System: You are an AI assistant in a corporate bank and your job is to answer the users query around complaints using only the context only you should mainly use the complaint_text column to generate answer but can use other columns to check the user query. 
Human: Here is a set of context, contained in <context> tags:

<context>
{master_df.to_csv(index=False)}
</context>

 If you don't know the answer, just say that you don't know, don't try to make up an answer.
"""



In [214]:
response = call_bedrock(message_list=message_list,system_prompts=rag_system_message,tool_list=None)


In [215]:
response

{'ResponseMetadata': {'RequestId': 'd4164b30-3466-43d7-8a77-8e63c126c7a2',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Sat, 18 Jan 2025 16:49:16 GMT',
   'content-type': 'application/json',
   'content-length': '785',
   'connection': 'keep-alive',
   'x-amzn-requestid': 'd4164b30-3466-43d7-8a77-8e63c126c7a2'},
  'RetryAttempts': 0},
 'output': {'message': {'role': 'assistant',
   'content': [{'text': 'Based on the provided context, there are no complaints specifically relating to payments for clients in the MC region. The complaints listed are about account management and customer service issues. Here is the breakdown:\n\n1. **Gamma Prime Corp.** - Complaint about account management (incorrect linking of accounts).\n2. **Tech Innovators Corp.** - Complaint about customer service (unresponsive team regarding account discrepancies).\n3. **Omega Holdings Ltd.** - Complaint about customer service (unable to reach a representative).\n\nNone of these complaints mention issues relate