In [1]:
import os
import openai
import torch
from IPython.display import display, Markdown
# OpenAI environment variables
os.environ["OPENAI_API_BASE"] = "http://127.0.0.1:8000/v1"
os.environ["OPENAI_API_KEY"] = "EMPTY"
os.environ["OPENAI_MODEL"] = "vicuna-13b"

# Tiktoken environment variables
os.environ["TIKTOKEN_CACHE_DIR"] = "tiktoken" 

model = "vicuna-13b"
data_dir = "english-questions"
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

In [2]:
torch._C._cuda_init()

In [3]:
from pathlib import Path
from langchain.llms import OpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain, LLMChain

from langchain.document_loaders import TextLoader, DirectoryLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain

from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

from langchain.embeddings import HuggingFaceEmbeddings

In [4]:
llm = OpenAI(model="vicuna-13b")
embeddings = OpenAIEmbeddings(model="vicuna-13b")

In [5]:
prompt = ChatPromptTemplate.from_template("What is Model Context Length in large language models ")
chain = LLMChain(llm=llm, prompt=prompt)
response = chain.run({})
display(Markdown(response))



GPT: Model Context Length (MCL) is a term used to describe the number of tokens in a language model's input that are considered part of the context when generating a response. In large language models, it can refer to the length of the input text that is taken into account when generating a response.

The MCL can vary depending on the specific task and the model architecture. For example, in some tasks, the MCL might be limited to a certain number of tokens to ensure that the model can generate concise and relevant responses. In other tasks, the MCL might be longer to allow the model to consider more context when generating a response.

Overall, the MCL is an important factor in determining the quality and relevance of a language model's responses, and it can be adjusted to optimize performance on specific tasks or applications.

In [6]:
# display(Markdown(response))

In [7]:
# DEFAULT_TEMPLATE="""
# {context}
# ----
# You are an financial assistant for Standard Chartered Bank. Use the following pieces of context to answer the question at the end. 
# If you don't know the answer, just say that you don't know, don't try to make up an answer.

# ----
# Question: {question}
# Answer:""".strip()

# DEFAULT_TEMPLATE2="""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. if you don't
# know the answer, just say that you don't know. keep the answer concise. 
# keep the answer as concise as possible and do not repharse the answer.
# Question: {question}
# Context: {context}
# Helpful Answer:""".strip()

DEFAULT_TEMPLATE3 = """Use the following pieces of context, seperated by triple backticks(---), to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
---
{context}
---
Question: {question}
Helpful Answer:"""


# prompt_template = """Use the following pieces of context to answer the users question.
# If you don't know the answer, just say that you don't know, don't try to make up an answer.
# ALWAYS return a "SOURCES" part in your answer.
# The "SOURCES" part should be a reference to the source of the document from which you got your answer.
# The example of your response should be:

# Context: {context}
# Question: {question}

# Only return the helpful answer below and nothing else.
# Helpful answer:
# """

class Chatbot:
    def __init__(self, 
                 text_pipeline: OpenAI, 
                 embeddings: OpenAIEmbeddings, 
                 documents_dir: Path, 
                 prompt_template: str = DEFAULT_TEMPLATE3, 
                 verbose: bool = False
        ):
        prompt = PromptTemplate.from_template(prompt_template)
        print(prompt)
        self.db = self._embed_data(documents_dir, embeddings)
        self.chain = self._create_chain(text_pipeline, prompt, verbose)

    def _create_chain(self, text_pipeline: OpenAI, prompt: PromptTemplate, verbose: bool = False):
        memory = ConversationBufferMemory(
            memory_key="chat_history",
            human_prefix="### Input",
            ai_prefix="### Response",
            input_key="question",
            output_key="output_text",
            return_messages=False
        )
        return RetrievalQA.from_chain_type(llm, retriever=self.db.as_retriever(),  chain_type_kwargs={"prompt": prompt}, return_source_documents=True)
        # return load_qa_chain(
        #     text_pipeline, chain_type="stuff", prompt=prompt, memory=memory, verbose=verbose
        # )
        
    def _embed_data(self, documents_dir: Path, embeddings: OpenAIEmbeddings) -> FAISS:
        embeddings = embeddings = HuggingFaceEmbeddings(
            model_name = "./sentence-transformers-multilingual-e5-base",
            model_kwargs={"device":DEVICE}
        )
        loader = DirectoryLoader(documents_dir, glob="**/*.txt")
        raw_documents = loader.load()
        print(raw_documents.__len__())
        text_splitter = CharacterTextSplitter(chunk_size=2048, chunk_overlap=100)
        documents = text_splitter.split_documents(raw_documents)
        db = FAISS.from_documents(documents, embeddings)
        return db

    def __call__(self, user_input:str) -> str:
        # docs = self.db.similarity_search(user_input)
        # return self.chain.run({"input_documents": docs, "question": user_input})
        return self.chain({"query": user_input})

In [8]:
chatbot = Chatbot(llm, embeddings, "./english-questions")

input_variables=['context', 'question'] template="Use the following pieces of context, seperated by triple backticks(---), to answer the question at the end. \nIf you don't know the answer, just say that you don't know, don't try to make up an answer. \n---\n{context}\n---\nQuestion: {question}\nHelpful Answer:"
83


In [9]:
matching = chatbot.db.similarity_search("I have a draft, how can I manage this?", return_source_documents=True)
matching

[Document(page_content='Q: How to manage Templates/Draft on Straight2Bank? A: Manage Template/Drafts\n\nYou can access the page by navigating to Menu > Trade > Manage > Template/Drafts.\n\nThis section lists all the templates which have been created from the Create Invoice section. Customers can create and save a predefined template customized for their needs – by selecting the Company, Role (Buyer/Supplier), Deal, Invoice Attributes, ordering of the invoice attributes. These can be subsequently be used for quick submission of invoices.', metadata={'source': 'english-questions/question_Trade_26.txt'}),
 Document(page_content='Q: Can I save an application as a draft or as a Template to be used later? A: Yes once the Review Guarantee Proposal shows all Green the below tabs will be activated.\n\nClick on Save As Draft – The information completed will be saved as incomplete, and you can go back to edit it later.\n\nClick on Save as Template – The information completed will be saved and the

In [21]:
import warnings

warnings.filterwarnings("ignore", category=UserWarning)

while True:
    user_input = input("You: ")
    if user_input.lower() in ["bye", "goodbye"]:
        break
    answer = chatbot(user_input)
    print(answer)
    print(answer['result'])
    print("\n")

You:  I have a draft, how can I manage this


{'query': 'I have a draft, how can I manage this', 'result': 'You can manage the draft by accessing the Manage Template/Drafts page, which can be found by navigating to Menu > Trade > Manage > Template/Drafts. From there, you can view and edit the information you have saved as a draft. You can also save it as a template for future use by clicking on the Save As Template button. If you are ready to submit the draft, you can click on the Submit button to send it to the authorizer for approval.', 'source_documents': [Document(page_content='Q: How to manage Templates/Draft on Straight2Bank? A: Manage Template/Drafts\n\nYou can access the page by navigating to Menu > Trade > Manage > Template/Drafts.\n\nThis section lists all the templates which have been created from the Create Invoice section. Customers can create and save a predefined template customized for their needs – by selecting the Company, Role (Buyer/Supplier), Deal, Invoice Attributes, ordering of the invoice attributes. These 

You:  goodbye


In [10]:
# embedding_vector = embeddings.embed_query(query)
# docs = chatbot.db.similarity_search_by_vector(embedding_vector)
# print(docs[0].page_content)
import warnings

warnings.filterwarnings("ignore", category=UserWarning)

while True:
    user_input = input("You: ")
    if user_input.lower() in ["bye", "goodbye"]:
        break
    answer = chatbot(user_input)
    print("Bot Answer : {0}".format(answer['result']))
    print("\n\n")
    
    found_docs = chatbot.db.similarity_search(user_input, k=4, fetch_k=10)
    for i, doc in enumerate(found_docs):
        print(f"{i + 1}.", doc.page_content, "\n")


You:   I have a draft, how can I manage this?


Bot Answer : You can manage the draft by clicking on the "Manage Template/Drafts" section, which can be accessed by navigating to Menu > Trade > Manage > Template/Drafts. From there, you can either save the draft as a template or submit it for review. If you want to edit the draft, you can save it as a draft, and the system will allow you to go back and edit the information later. If you want to submit the draft for review, you can click on the "Submit" button, and the completed form will be sent to the authorizer for approval.



1. Q: How to manage Templates/Draft on Straight2Bank? A: Manage Template/Drafts

You can access the page by navigating to Menu > Trade > Manage > Template/Drafts.

This section lists all the templates which have been created from the Create Invoice section. Customers can create and save a predefined template customized for their needs – by selecting the Company, Role (Buyer/Supplier), Deal, Invoice Attributes, ordering of the invoice attributes. These can be 

You:  bye


In [32]:
import warnings

warnings.filterwarnings("ignore", category=UserWarning)

while True:
    user_input = input("You: ")
    if user_input.lower() in ["bye", "goodbye"]:
        break
    answer = chatbot(user_input)
    print(answer['result'])

You:  How to create invoices on Straight2Bank?


To create invoices on Straight2Bank, navigate to Menu > Trade > Create > Invoice.


You:  How to approve/reject invoice on Straight2Bank?


To approve/reject an invoice on Straight2Bank, navigate to Menu > Trade > Approve > Invoices to view the pending invoices. Select the invoice you want to approve or reject and choose the appropriate option. Thanks for asking!


You:   What is the use of the Search and Filter option on Proof of Underlying?


The Search and Filter option on Proof of Underlying allows you to search for specific attributes and filter through the rows of data to find the required documents. You can use either the Search or Filter function to search for any text or numeric values. The Carousel 1 shows an example of a document filtered by entering a DOC ID into the search field.


You:  How do I export Trade details into Excel?


Thanks for asking! To export Trade details into Excel on Straight2Bank, you can follow these steps:

1. Click on the "Export to Excel" button located on the top right corner of the page.
2. Select the Trade details you want to export by checking the boxes next to them.
3. Click on the "Export" button to download the details in Excel format.

I hope this helps! Let me know if you have any further questions.


You:  How do I confirm multiple trades?


To confirm multiple trades on Straight2Bank, you can follow the quick guide by clicking the button.


You:  What is Mark Off under Proof of Underlying?


Mark Off is a process in Proof of Underlying (PoU) feature that links an executed trade to a PoU document by matching amounts and periods. It is one of the tabs under the PoU feature, along with four other tabs.


You:  bye


In [6]:
import warnings

warnings.filterwarnings("ignore", category=UserWarning)

while True:
    user_input = input("You: ")
    if user_input.lower() in ["bye", "goodbye"]:
        break
    answer = chatbot(user_input)
    print(answer['result'])
    print("\n")

You:  What is a Mark to Market report?


A Mark to Market report, also known as a Valuation Statement, is a financial report that provides the current market value of assets held in a portfolio. It is typically used to determine the performance of investments and to calculate gains or losses. To download a Mark to Market report, you can follow the instructions provided in the relevant guide or click the button indicated.




You:  What is a Mark to Market report?


A Mark to Market report, also known as a Valuation Statement, is a financial report that provides the current market value of assets held in a portfolio. It is typically used to determine the performance of investments and to calculate gains or losses. To download a Mark to Market report, you can follow the instructions provided in the relevant guide or click the button indicated.




You:  What is Mark Off under Proof of Underlying?


Mark Off under Proof of Underlying is a process of linking an executed Trade to a Proof of Underlying document through matching of Amounts and period. To mark off a single Trade to a single Proof of Underlying document, follow the provided guide. The Cancellations Pending tab on Trade Confirmations allows you to review cancelled Trades that are pending Affirmation.




You:  bye


#### Evaluate RetrivalQA

In [13]:
loader = DirectoryLoader("./english-questions", glob="**/*.txt")
raw_documents = loader.load()
print(raw_documents.__len__())
text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)
db = FAISS.from_documents(documents, embeddings)

83


In [15]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Use three sentences maximum and keep the answer as concise as possible. 
Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

llm = ChatOpenAI(model_name="vicuna-13b", temperature=0)
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=db.as_retriever(),
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)


In [19]:
question="How do I decline a Cancelled Trade?"

In [18]:
result = qa_chain({"query": question})
result["result"]

'Thanks for asking! To decline a Cancelled Trade on Straight2Bank, you can click the "Decline" button next to the trade in the Cancellations Pending tab. If you\'re not sure if you\'ve filled in all the fields correctly, you can review the Mandatory fields and other fields on the trade confirmation page. If you need to confirm multiple trades, you can click the button to start the process.'

In [20]:
question = "How to create Finance Request on Straight2Bank?"
result = qa_chain({"query": question})
result["result"]

'To create a finance request on Straight2Bank, navigate to Menu > Trade > Create > Finance Requests. This will take you to the page where you can fill in the necessary details and submit the request. Thanks for asking!'

In [24]:
question = "How to create invoices on Straight2Bank?"
result = qa_chain({"query": question})
result["result"]

'To create invoices on Straight2Bank, navigate to Menu > Trade > Create > Invoice.'

In [25]:
question = "How to manage uploaded file on Straight2bank?"
result = qa_chain({"query": question})
result["result"]

'Thanks for asking! To manage uploaded files on Straight2Bank, navigate to Menu > Trade > Manage > File.'

#### Evaluation

In [8]:
from pathlib import Path

import openpyxl
import pandas as pd

In [9]:
df = pd.read_excel("English_Chatbot_FAQ.xlsx", sheet_name="FM_Post_Trade")

In [10]:
df.head()

Unnamed: 0,FAQ ID,Response Type,Module or sections,Seed Question,Various Utterances,Response,Button name,Button Link,Temporary Sup Ctr Link
0,FM_001,Text,Reporting,\nWhat is a Mark to Market report?\n,\nWhat is a Valuation Statement?\nor \nTell me...,A Valuation Statement provides a view of Mark-...,,,
1,FM_002,Carousel Card,Reporting,\nHow do I download a Mark to Market report?,\n\nHow do I download a Valuation Statement?\n...,Let me guide you how to download the Valuation...,Show me how,https://s2b.sc.com/s2b/support/help_topics/mar...,https://s2b.sc.com/s2b/support/help_topics/mar...
2,FM_003,Text,Reporting,Whom should I contact for issues related to Ma...,\nWho can I reach out to for queries on Valua...,For any queries/issues relating to your Valuat...,,,
3,FM_004,Text,Confirmations,What is Trade Affirmation?\n,\nWhat is bulk Affirmation?\nOr\nTell me more ...,Trade Affirmation is a process where you revie...,,,
4,FM_005,Carousel Card,Confirmations,How do I affirm a Trade\n,How do I perform a Trade affirmation \nor\nHow...,Let me guide you how to affirm a trade. Please...,Show me how,https://s2b.sc.com/s2b/support/help_topics/mar...,https://s2b.sc.com/s2b/support/help_topics/mar...


In [42]:
df.columns

Index(['FAQ ID', 'Response Type ', 'Module or sections', 'Seed Question',
       'Various Utterances', 'Response ', 'Button name ', 'Button Link ',
       'Temporary Sup Ctr Link'],
      dtype='object')

In [59]:
ignored = [2,5,6,8,15,17,18,24,26,27,29,30,31,32,42]
rows_data = []
for i in range(len(df)):
    if i+1 in ignored:
        continue
    # print(df['Various Utterances'][i].splitlines())
    # print(df['Various Utterances'][i])
    lines = df['Various Utterances'][i].splitlines()
    filtered_lines = []
    for line in lines:
        if line.strip() and line.strip().lower()!="or":
            filtered_lines.append(line.strip())
    # if len(filtered_lines) > 1:
    print(filtered_lines)

    for utterance in filtered_lines:
        answer = chatbot(utterance)
        rows_data.append([df['Seed Question'][i], utterance, answer['result'], df['Response '][i]])
        print("Question: {0}".format(utterance))
        print("Answer: {0}\n\n".format(answer['result']))
    
    # utterances = df['Various Utterances'][i].replace("\n", " ").strip()
    # splits = utterances.split("or")
    # if len(splits) > 1:
    #     print(splits)
    #     print("Total Splits : {0}".format(len(splits)))
# print(rows_data)

['What is a Valuation Statement?', 'Tell me more about Valuation Statements', 'How frequently are Valuation Statements generated?', 'How frequently are Mark to Market reports generated?']
Question: What is a Valuation Statement?
Answer: A Valuation Statement is a report that provides a view of the Mark-to-Market positions of all your open/live Trades with Standard Chartered Bank. It is generated daily (Monday to Friday, valid business days) and can be downloaded anytime from the system. The reporting feature on S2B provides access to current and historical Valuation Statements.


Question: Tell me more about Valuation Statements
Answer: Valuation Statements are generated daily and provide a view of Mark-to-Market positions of all open/live Trades with Standard Chartered Bank. These statements can be downloaded from the system and are used to provide an up-to-date view of the value of your positions with Standard Chartered Bank. If you have any queries or issues relating to your Valuati

In [20]:
# Create a dataframe with original answer, utterance and the answer for the utterance

In [60]:
df1 = pd.DataFrame(rows_data, columns=['Seed Question', 'Rephrased Question', 'Answer', 'Original Answer'])
df1.to_excel("orignal_qa_comparison_llm_only.xlsx")

In [61]:
df1.head()

Unnamed: 0,Seed Question,Rephrased Question,Answer,Original Answer
0,\nWhat is a Mark to Market report?\n,What is a Valuation Statement?,A Valuation Statement is a report that provide...,A Valuation Statement provides a view of Mark-...
1,\nWhat is a Mark to Market report?\n,Tell me more about Valuation Statements,Valuation Statements are generated daily and p...,A Valuation Statement provides a view of Mark-...
2,\nWhat is a Mark to Market report?\n,How frequently are Valuation Statements genera...,"Valuation Statements are generated daily, from...",A Valuation Statement provides a view of Mark-...
3,\nWhat is a Mark to Market report?\n,How frequently are Mark to Market reports gene...,Mark to Market reports are generated daily (Mo...,A Valuation Statement provides a view of Mark-...
4,Whom should I contact for issues related to Ma...,Who can I reach out to for queries on Valuatio...,You can reach out to your dedicated Sales Repr...,For any queries/issues relating to your Valuat...


In [10]:
a = [2,4]
# a.index(3)

In [11]:
# !pip install sentence-transformers unstructured faiss==1.5.3 --trusted-host artifactory.global.standardchartered.com -i https://artifactory.global.standardchartered.com/artifactory/api/pypi/pypi/simple
# !pip install --upgrade faiss-gpu --trusted-host artifactory.global.standardchartered.com -i https://artifactory.global.standardchartered.com/artifactory/api/pypi/pypi/simple

In [12]:
# !pip install faiss --index-url https://download.pytorch.org/whl/cu118