In [18]:
import easyocr
import fitz  # PyMuPDF
import glob
import torch
from collections import defaultdict
from ollama import generate

In [19]:
print(torch.cuda.is_available())  # This should return True if CUDA is available and configured correctly

print(torch.cuda.get_device_name(0))  # This prints the name of the CUDA device, confirming it's recognized


True
NVIDIA GeForce RTX 4060 Ti


In [20]:
# import glob
# import os
# import re
# import dotenv
# import fitz  # PyMuPDF
# from collections import defaultdict
# from langchain.document_loaders import UnstructuredPDFLoader
# from unstructured.cleaners.core import clean_extra_whitespace
# from langchain.embeddings import OpenAIEmbeddings
# from langchain_community.vectorstores import FAISS
# from langchain.chains import LLMChain
# from langchain.output_parsers import PydanticOutputParser
# from langchain.prompts import PromptTemplate
# from pydantic import BaseModel, Field
# from typing import List
# from langchain.retrievers.multi_query import MultiQueryRetriever
# # from langchain.chat_models import ChatOpenAI
# from langchain_community.chat_models import ChatOpenAI
# from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
# from langchain_core.runnables import RunnablePassthrough
# from langchain_core.messages import HumanMessage
# from langchain_core.output_parsers import StrOutputParser

In [21]:
# # get tge api key from the .env file
# dotenv.load_dotenv()
# open_ai_api_key = os.getenv('OPENAI_API_KEY')
# print(open_ai_api_key)

In [22]:
# Get a list of all .pdf files in the current directory
pdf_files = glob.glob('*.pdf')

pdf_documents = [file for file in pdf_files]
pdf_documents

['2022-annual-report-wf.pdf', '2022-annual-report-bofa.pdf']

In [23]:
def extract_raw_images(pdf_file):
    doc = fitz.open(pdf_file)  # Open the PDF file
    raw_images = {}
    

    for page_num in range(len(doc)):  # Iterate through each page
        page = doc.load_page(page_num)  # Load the current page
        image_list = page.get_images(full=True)  # Get the list of images in the current page
        
        images = []
        for img_info in image_list:
            xref = img_info[0]
            base_image = doc.extract_image(xref)
            image_bytes = base_image["image"]  # The raw image bytes
            images.append(image_bytes)
        
        if len(images) > 0:
            raw_images[page_num] = images
    
    return raw_images

def detect_text_in_image(raw_images):
    reader = easyocr.Reader(['en'],gpu=True)  # Create the OCR reader
    # Store the raw images where text was detected
    text_detected_images = {}

    for key, value in raw_images.items():
        text_detected_raw_images = []

        for image_bytes in value:
            results = reader.readtext(image_bytes,detail=0)
            if results:
                # Check the length of results, because there are certain images with hashtag
                # or no significant information
                if len(results) > 10:
                    text_detected_raw_images.append(image_bytes)
        # Add only those raw images of the page which would be used 
        # in Llava to describe the image  
        if len(text_detected_raw_images) > 0:            
            text_detected_images[key] = text_detected_raw_images
    
    return text_detected_images

prompt = """Describe the contents in the image. Follow the below guidelines while generating the description of the image.
1. Do not give the style or font details
2. Do not describe image if it does not contain important information
3. Do not start the description with "The image" or "This image". We know that it is an image,
so there is no need to explicitly mention it.
"""

def extract_image_description(text_detected_images):
    image_description = {}
    

    for key, value in text_detected_images.items():
        page_image_description = []
        
        for raw_image in value:
            response = generate(model = 'llava', prompt=prompt, images=[raw_image], stream=False)
            
            page_image_description.append(response['response'])

        if len(page_image_description) > 0:            
            image_description[key] = page_image_description

    return image_description                

In [25]:
# Generate the description of the images to be used for the LLM for RAG
raw_images = extract_raw_images(pdf_documents[1])
text_detected_images = detect_text_in_image(raw_images)
img_description_bofa = extract_image_description(text_detected_images)

In [26]:
img_description_bofa

{35: [' The image shows a smartphone displaying a mobile banking application. The screen is activated and displays an account summary for a customer named "Robin." The account has an American Life Plan membership, as indicated by the logo of Bank of America. Robin\'s account balance is $693.98, with a pending transaction of $102.75.\n\nThere is also a reward status at the top, stating that Robin has earned 1250 points for their transaction and has reached 6375 total points. A welcome message greets Robin with the text "How can we help you today?" The overall color scheme of the app is white and green, which is the Bank of America\'s branding.\n\nBelow the account summary, there are additional details about a recent transaction. The transaction was made on "MERRILL," and it was for $57.86, indicating that the transaction was not in USD but rather in another currency. There is also a pending charge of $102.75, which seems to be a different transaction from the one above.\n\nThe bottom na

In [27]:
# Generate the description of the images to be used for the LLM for RAG
raw_images = extract_raw_images(pdf_documents[0])
text_detected_images = detect_text_in_image(raw_images)
img_description_wf = extract_image_description(text_detected_images)

In [None]:
# Let's read one page using 
doc = fitz.open(pdf_documents[0])  # Open the PDF file
page = doc.load_page(18)  # Load the first page
text = page.get_text()  # Extract the text from the page
print(text)

In [None]:
class MaxResize(object):
    def __init__(self, max_size=800):
        self.max_size = max_size

    def __call__(self, image):
        width, height = image.size
        current_max_size = max(width, height)
        scale = self.max_size / current_max_size
        resized_image = image.resize(
            (int(round(scale * width)), int(round(scale * height)))
        )

        return resized_image


detection_transform = transforms.Compose(
    [
        MaxResize(800),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ]
)

structure_transform = transforms.Compose(
    [
        MaxResize(1000),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ]
)

# load table detection model
# processor = TableTransformerImageProcessor(max_size=800)
model = AutoModelForObjectDetection.from_pretrained(
    "microsoft/table-transformer-detection", revision="no_timm"
).to(device)

# load table structure recognition model
# structure_processor = TableTransformerImageProcessor(max_size=1000)
structure_model = AutoModelForObjectDetection.from_pretrained(
    "microsoft/table-transformer-structure-recognition-v1.1-all"
).to(device)


# for output bounding box post-processing
def box_cxcywh_to_xyxy(x):
    x_c, y_c, w, h = x.unbind(-1)
    b = [(x_c - 0.5 * w), (y_c - 0.5 * h), (x_c + 0.5 * w), (y_c + 0.5 * h)]
    return torch.stack(b, dim=1)


def rescale_bboxes(out_bbox, size):
    width, height = size
    boxes = box_cxcywh_to_xyxy(out_bbox)
    boxes = boxes * torch.tensor(
        [width, height, width, height], dtype=torch.float32
    )
    return boxes


def outputs_to_objects(outputs, img_size, id2label):
    m = outputs.logits.softmax(-1).max(-1)
    pred_labels = list(m.indices.detach().cpu().numpy())[0]
    pred_scores = list(m.values.detach().cpu().numpy())[0]
    pred_bboxes = outputs["pred_boxes"].detach().cpu()[0]
    pred_bboxes = [
        elem.tolist() for elem in rescale_bboxes(pred_bboxes, img_size)
    ]

    objects = []
    for label, score, bbox in zip(pred_labels, pred_scores, pred_bboxes):
        class_label = id2label[int(label)]
        if not class_label == "no object":
            objects.append(
                {
                    "label": class_label,
                    "score": float(score),
                    "bbox": [float(elem) for elem in bbox],
                }
            )

    return objects


def detect_and_crop_save_table(
    file_path, cropped_table_directory="./table_images/"
):
    image = Image.open(file_path)

    filename, _ = os.path.splitext(file_path.split("/")[-1])

    if not os.path.exists(cropped_table_directory):
        os.makedirs(cropped_table_directory)

    # prepare image for the model
    # pixel_values = processor(image, return_tensors="pt").pixel_values
    pixel_values = detection_transform(image).unsqueeze(0).to(device)

    # forward pass
    with torch.no_grad():
        outputs = model(pixel_values)

    # postprocess to get detected tables
    id2label = model.config.id2label
    id2label[len(model.config.id2label)] = "no object"
    detected_tables = outputs_to_objects(outputs, image.size, id2label)

    print(f"number of tables detected {len(detected_tables)}")

    for idx in range(len(detected_tables)):
        #   # crop detected table out of image
        cropped_table = image.crop(detected_tables[idx]["bbox"])
        cropped_table.save(f"./{cropped_table_directory}/{filename}_{idx}.png")


def plot_images(image_paths):
    images_shown = 0
    plt.figure(figsize=(16, 9))
    for img_path in image_paths:
        if os.path.isfile(img_path):
            image = Image.open(img_path)

            plt.subplot(2, 3, images_shown + 1)
            plt.imshow(image)
            plt.xticks([])
            plt.yticks([])

            images_shown += 1
            if images_shown >= 9:
                break

### 1. Indexing : Load & Split

We need to first load the contents from the PDF file. We will use the [DocumentLoaders](https://python.langchain.com/docs/modules/data_connection/document_loaders/), which are objects that load in data from a source and return a list of Documents. A Document is an object with some page_content (str) and metadata (dict). <br>
The UnstructuredPDFLoader is used in the usecase.

In [4]:
def remove_one_characters(text):
    if re.match('^.$', text):
        return ''
    else:
        return text

def remove_orientation_text(text):
    # There are texts in the pdf that is written vertically which makes the text to appear as
    # character space character & so on. This function removes those texts.
    if re.match('^(.\s)+.$', text):
        return ''
    else:
        return text

def remove_page_number_details(text):
    pattern1 = r'\b\d+\s*\|?\s*bank of america'
    if re.search(pattern1, text, re.IGNORECASE):
        return ''
    
    pattern2 = r'bank of america\s*2022\s*\|?\s*\d+'
    if re.search(pattern2, text, re.IGNORECASE):
        return ''
    
    return text 

In [16]:
# While using hi_res_model_name="detectron2_onnx" it will give error - "Unable to get page count. Is poppler installed and in PATH?"
# perform steps mentioned in [pdf2image](https://github.com/Belval/pdf2image?tab=readme-ov-file#windows) to install poppler
loaders = [UnstructuredPDFLoader(os.path.join(os.getcwd(),file), mode = 'elements', \
                                 strategy = 'hi_res', hi_res_model_name="detectron2_onnx", \
                                post_processors = [remove_one_characters, remove_orientation_text,
                                                    remove_page_number_details, \
                                                   clean_extra_whitespace]) for file in pdf_documents]

In [17]:
# Number of data loaders created is proportional to the number of documents
loaders

[<langchain_community.document_loaders.pdf.UnstructuredPDFLoader at 0x1f28476e140>,
 <langchain_community.document_loaders.pdf.UnstructuredPDFLoader at 0x1f28476dba0>]

In [18]:
# Asked to install pdf2image, pdfminer.six (do not install pdfminer as it is not actively maintained instead
# use pdfminer.six), opencv-python, unstructured_inference & many more
# Instead of installing individual dependent libraries for UnstructuredPDF, install unstructured using
# pip install unstructured[pdf] 
doc_0 = loaders[0].load_and_split()

In [20]:
# To avoid printing multiple text on GitHub
for doc in doc_0:
    if False:
        print(doc.page_content)

In [9]:
doc_1 = loaders[1].load_and_split()

In [11]:
# To avoid printing multiple text on GitHub
for doc1 in doc_1:
    if False:
        print(doc1.page_content)

In [12]:
# Remove the page numbers along with text "Wells Fargo & Company" in the footer.
previous_text = ''
for i, doc1 in enumerate(doc_1):
    if re.match('^\d+$', doc1.page_content):
        previous_text = doc1.page_content
        continue
    
    if re.match('^\d+$', previous_text) and re.search('^Wells Fargo & Company$', doc1.page_content, re.IGNORECASE):  
        doc1.page_content = ''           
    else:
        previous_text = ''   

In [13]:
previous_text = ''
for i, doc1 in enumerate(doc_1):
    if re.search('^Wells Fargo & Company$', doc1.page_content, re.IGNORECASE):
        previous_text = doc1.page_content
        continue
    
    if re.search('^Wells Fargo & Company$', previous_text, re.IGNORECASE) and re.match('^\d+$', doc1.page_content):        
        doc1.page_content = '' 
    else:
        previous_text = ''    

In [14]:
doc1 = list(filter(lambda x: x.page_content != '', doc_1))

In [15]:
for doc1 in doc_1:
    # To avoid printing multiple text on GitHub
    if False:
        print(doc1.page_content)

In [16]:
max_length = max(len(content_bofa.page_content) for content_bofa in doc_0)
print(max_length)

4000


In [17]:
max_length = max(len(content_wf.page_content) for content_wf in doc_1)
print(max_length)

3935


### 2. Indexing : Store
Creating embeddings for the splitted data and store the documents and it's corresponsing embeddings in a vector store. At this point we have a query-able vector store containing the chunked contents of our PDF's. Given a user question, we should ideally be able to return the snippets of the text that answer the question.

In [23]:
vectorstore = FAISS.from_documents(documents=doc_0+doc_1, embedding=OpenAIEmbeddings())

In [24]:
vectorstore.save_local('vectorstore')

In [8]:
vectorstore = FAISS.load_local("./vectorstore",OpenAIEmbeddings())

  warn_deprecated(


In [9]:
# Similarity Search
query = "Bank of America Chair & CEO?"
docs = vectorstore.similarity_search(query, k=3)
print(docs)

[Document(page_content='Brian T. Moynihan Chair of the Board and Chief Executive Oﬃcer, Bank of America Corporation', metadata={'source': 'c:\\Users\\baira\\Desktop\\Infy_Tech_Pioneer\\InfyTech_Docs_ChatBot\\2022-annual-report-bofa.pdf', 'detection_class_prob': 0.8070501685142517, 'coordinates': {'points': ((107.71715497970581, 278.45842002094656), (107.71715497970581, 401.0500022409043), (392.23714805555574, 401.0500022409043), (392.23714805555574, 278.45842002094656)), 'system': 'PixelSpace', 'layout_width': 1700, 'layout_height': 2200}, 'last_modified': '2024-01-04T14:51:31', 'filetype': 'application/pdf', 'languages': ['eng'], 'page_number': 223, 'parent_id': 'dc1fb50caf875b9f261d70b0be8ba6e3', 'file_directory': 'c:\\Users\\baira\\Desktop\\Infy_Tech_Pioneer\\InfyTech_Docs_ChatBot', 'filename': '2022-annual-report-bofa.pdf', 'category': 'NarrativeText'}), Document(page_content='Bank of America Corporation', metadata={'source': 'c:\\Users\\baira\\Desktop\\Infy_Tech_Pioneer\\InfyTech_

In [10]:
# Similarity Search
query = "Who is the CEO of Amazon?"
docs = vectorstore.similarity_search(query, k=3)
print(docs)

[Document(page_content='Chief Operations Executive', metadata={'source': 'c:\\Users\\baira\\Desktop\\Infy_Tech_Pioneer\\InfyTech_Docs_ChatBot\\2022-annual-report-bofa.pdf', 'coordinates': {'points': ((605.015, 1808.9855555555553), (605.015, 1859.1568055555556), (780.1622222222222, 1859.1568055555556), (780.1622222222222, 1808.9855555555553)), 'system': 'PixelSpace', 'layout_width': 1700, 'layout_height': 2200}, 'last_modified': '2024-01-04T14:51:31', 'filetype': 'application/pdf', 'languages': ['eng'], 'page_number': 15, 'file_directory': 'c:\\Users\\baira\\Desktop\\Infy_Tech_Pioneer\\InfyTech_Docs_ChatBot', 'filename': '2022-annual-report-bofa.pdf', 'category': 'Title'}), Document(page_content='Chair of the Board and Chief Executive Oﬃcer', metadata={'source': 'c:\\Users\\baira\\Desktop\\Infy_Tech_Pioneer\\InfyTech_Docs_ChatBot\\2022-annual-report-bofa.pdf', 'coordinates': {'points': ((134.71444444444444, 444.44972222222214), (134.71444444444444, 500.5497222222222), (362.2334722222222

### 3. Retrieval and Generation: Retrieve
Different ways to retrive documents based on query - [Link](https://python.langchain.com/docs/modules/data_connection/retrievers/vectorstore)

<b> Using the query as is to retrieve the relevant documents using the search type "similarity_score_threshold"

In [14]:
retriever_similarity = vectorstore.as_retriever(search_type="similarity_score_threshold",
                                                 search_kwargs={"score_threshold": 0.70})
# retrieved_docs = retriever.invoke("How innovation is driven at bank of america?")
retrieved_docs = retriever_similarity.get_relevant_documents("Who is the CEO of Amazon") #What is the full-form of ROTCE?

In [15]:
len(retrieved_docs)

4

In [16]:
for doc in retrieved_docs:
    print(doc.page_content)

Chief Operations Executive
Chair of the Board and Chief Executive Oﬃcer
Chair of the Board and Chief Executive Oﬃcer
CEO Letter


<b> Using the Multi Query retriver which will create variants of queries based on the prompt to retrieve the relevant documents 

In [17]:
# supply a prompt along with an output parser to split the results into a list of queries.
# Output parser will split the LLM result into a list of queries
class LineList(BaseModel):
    # "lines" is the key (attribute name) of the parsed output
    lines: List[str] = Field(description="Lines of text")


class LineListOutputParser(PydanticOutputParser):
    def __init__(self) -> None:
        super().__init__(pydantic_object=LineList)

    def parse(self, text: str) -> LineList:
        lines = text.strip().split("\n")
        return LineList(lines=lines)


output_parser = LineListOutputParser()

QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate four \ 
    different versions of the given user question to retrieve relevant documents from a vector \
    database. By generating multiple perspectives on the user question, your goal is to help \
    the user overcome some of the limitations of the distance-based similarity search. \ 
    Provide these alternative questions separated by newlines. \
    Original question: {question}""",
)
llm = ChatOpenAI(temperature=0, model='gpt-3.5-turbo-16k')


# Chain
llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT, output_parser=output_parser)

  warn_deprecated(


In [34]:
# Run
retriever = MultiQueryRetriever(
    include_original=True,
    retriever=vectorstore.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.70}),
    llm_chain=llm_chain, parser_key="lines",
    verbose=True
)  # "lines" is the key (attribute name) of the parsed output

# Results
unique_docs = retriever.get_relevant_documents(
    query="What is tuition assistance?"
)
len(unique_docs)

8

In [42]:
# To check different queries generated by the llm
llm_chain.invoke("What is tuition assistance?")

{'question': 'What is tuition assistance?',
 'text': LineList(lines=['1. Can you explain the concept of tuition assistance?', '2. How does tuition assistance work?', '3. What are the benefits of tuition assistance programs?', '4. Can you provide an overview of tuition assistance options available?'])}

### 3. Retrieval and Generation: Generate
In this step everything will be put together into a chain. A chain will take question, will retrieve relevant documents, will construct a prompt, will pass to the llm model and will parse the output.

In [36]:
# contextualize_ques_system_prompt = """Given a chat history and the latest user question \
# which might refer to a context in the chat history, formulate a standalone question \
# which can be understood without the chat history. Do NOT answer the question, \
# just reformulate it if needed and otherwise return it as is."""

contextualize_ques_system_prompt = """ Check if the latest user question refers to a context in the chat history. \
If it does, formulate a standalone question which can be understood without the chat history. \
If it does not, return the question as is. Your task is to just reformulate the question if needed.
"""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_ques_system_prompt),
        # Prompt template that assumes variable is already list of messages.
        # We provide the variable name to be used as messages
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()

In [37]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [38]:
# just say that you don't know.
# qa_system_prompt = """You are an assistant for question-answering tasks. \
# Use the following pieces of retrieved context to answer the question. \
# If you don't know the answer, provide a response as - "I don't know." \

# {context}"""
qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following context or document embeddings to answer the question. \
Don't justify your answers and don't search the internet.If the answer could not be found in the documents,\
ay the words "Sorry, I am unable to answer your question with the information available to me"\

{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)


def contextualized_question(input: dict):
    if input.get("chat_history"):        
        return contextualize_q_chain
    else:
        return input["question"]


rag_chain = (
    RunnablePassthrough.assign(
        context=contextualized_question | retriever_similarity
    )
    | qa_prompt
    | llm
)

In [39]:
rag_chain

RunnableAssign(mapper={
  context: RunnableLambda(contextualized_question)
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001620040C040>, search_type='similarity_score_threshold', search_kwargs={'score_threshold': 0.7})
})
| ChatPromptTemplate(input_variables=['chat_history', 'context', 'question'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template='You are an assistant for question-answering tasks. Use the following context or document embeddings to answer the question. Don\'t justify your answers and don\'t search the internet.If

In [40]:
chat_history = []

question = "How Risk Management is done in Wells Fargo?"
ai_msg = rag_chain.invoke({"question": question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=question), ai_msg])

In [41]:
ai_msg.content

"Wells Fargo manages a variety of risks that can significantly affect its financial performance and its ability to meet the expectations of its customers, shareholders, regulators, and other stakeholders. The company's top priority is to strengthen its risk and control infrastructure. Wells Fargo continues to enhance and mature its risk management programs, including operational and compliance risk management programs as required by regulatory orders. The company is also actively involved in industry cybersecurity efforts and works with third-party service providers and governmental agencies to enhance defenses and improve resiliency to information security threats."

In [42]:
second_question = "How Wells Fargo protect against unauthorized access?"
new_msg = rag_chain.invoke({"question": second_question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=second_question), new_msg])

In [43]:
new_msg.content

'Wells Fargo prioritizes the protection of its networks, computers, software, and data from unauthorized access. The company implements controls, processes, and systems designed to enhance information security and prevent unauthorized access. These measures include robust authentication protocols, encryption technologies, firewalls, intrusion detection systems, and regular monitoring of network activity. Wells Fargo also collaborates with third-party service providers and governmental agencies to enhance defenses and improve resiliency to information security threats.'

In [44]:
third_question = "Who is the CEO of Amazon?"
new_msg = rag_chain.invoke({"question": third_question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=third_question), new_msg])

In [45]:
new_msg.content

'Sorry, I am unable to answer your question with the information available to me.'

In [50]:
fourth_question = "What did Sheri Bronstein wrote in her letter in Bank of America document?"
new_msg = rag_chain.invoke({"question": fourth_question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=fourth_question), new_msg])

In [51]:
new_msg.content

"I apologize for the confusion, but I don't have access to the specific content of Sheri Bronstein's letter in the Bank of America document."

In [52]:
fifth_question = "What is Emotional Wellness?"
new_msg = rag_chain.invoke({"question": fifth_question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=fifth_question), new_msg])
new_msg.content

"Emotional wellness refers to the state of one's emotional well-being and the ability to effectively cope with and manage emotions. It involves understanding and expressing emotions in a healthy way, having positive relationships, and maintaining a sense of balance and resilience in the face of challenges. Emotional wellness encompasses self-awareness, self-care, stress management, and the ability to navigate and regulate emotions in a constructive manner. It is an important aspect of overall well-being and contributes to a person's mental and psychological health."

In [47]:
third_question = "What are different types of Risk Management at Wells Fargo?"
rag_chain.invoke({"question": third_question, "chat_history": chat_history}).content

KeyboardInterrupt: 

In [None]:
# Clear chat history
chat_history.clear()