In [1]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import DirectoryLoader

In [2]:
loader = DirectoryLoader("./docs/", glob="**/*.pdf")

In [3]:
documents = loader.load()

In [4]:
text_splitter = CharacterTextSplitter(
    separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len
)

In [5]:
texts = text_splitter.split_documents(documents)

Created a chunk of size 1838, which is longer than the specified 1000
Created a chunk of size 1195, which is longer than the specified 1000
Created a chunk of size 1094, which is longer than the specified 1000
Created a chunk of size 1204, which is longer than the specified 1000
Created a chunk of size 1579, which is longer than the specified 1000
Created a chunk of size 1161, which is longer than the specified 1000
Created a chunk of size 1005, which is longer than the specified 1000
Created a chunk of size 1461, which is longer than the specified 1000
Created a chunk of size 1160, which is longer than the specified 1000
Created a chunk of size 1524, which is longer than the specified 1000
Created a chunk of size 1067, which is longer than the specified 1000
Created a chunk of size 1127, which is longer than the specified 1000
Created a chunk of size 1010, which is longer than the specified 1000
Created a chunk of size 1220, which is longer than the specified 1000
Created a chunk of s

In [6]:
print(len(texts))

7622


In [7]:
embeddings = OpenAIEmbeddings()

In [8]:
knowledge_base = FAISS.from_documents(texts, embeddings)

Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised APIError: OpenAI API returned an empty embedding.
Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised APIError: OpenAI API returned an empty embedding.
Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised APIError: OpenAI API returned an empty embedding.
Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised APIError: OpenAI API returned an empty embedding.
Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised APIError: OpenAI API returned an empty embedding.


In [9]:
knowledge_base.save_local(".", "mf-ug-index")

In [10]:
embeddings2 = OpenAIEmbeddings()

In [11]:
knowledge = FAISS.load_local(".", embeddings2, "mf-ug-index")

In [12]:
user_question = (
    "What is ACheck, when is it used, what are the requirements for using ACheck?"
)

In [13]:
resultset = knowledge.similarity_search(user_question)

In [14]:
print(len(resultset))

4


In [15]:
for x in resultset:
    print(x)

page_content='When do you use the ACheck application?\nAs soon as you receive an application request and the necessary tax identification numbers and/or social security numbers, enter the information into ACheck.\nIf you have not yet identified all parties at this stage, you must enter all Key Principals, Principals, and Guarantors of the Borrower into ACheck as soon as you identify them.\nIf more than 90 days pass between the initial ACheck and Commitment Date, you must repeat an ACheck.\nHow do you view ACheck results?\nThe ACheck application will provide either a “Continue Processing” or “Do Not Process” electronic response instantaneously.\n“Continue Processing” Response\nYou must receive a “Continue Processing” response in order to proceed with the application.\nThis does not mean that the Borrower, Key Principal, Principal, Guarantor, or Principal is approved; you are still required to complete full Mortgage Loan credit underwriting.\n“Do Not Process” Response' metadata={'source'

In [16]:
llm = ChatOpenAI(temperature=0, model='gpt-4')

In [17]:
chain = load_qa_chain(llm, chain_type="stuff")

In [18]:
response = chain.run(input_documents=resultset, question=user_question)
print(response)

ACheck is a lender due diligence application used for the Borrower, Key Principal, and Principal. It is also known as Applicant Experience Check. 

ACheck is used as soon as you receive an application request and the necessary tax identification numbers and/or social security numbers. At this stage, you must enter all Key Principals, Principals, and Guarantors of the Borrower into ACheck as soon as you identify them. If more than 90 days pass between the initial ACheck and Commitment Date, you must repeat an ACheck.

The requirements for using ACheck are that for both initial applications, Supplemental Mortgage Loans, and any Transfer/Assumption, you must perform an ACheck and receive a "Continue Processing" response for the Borrower, each Key Principal of the Borrower, each Principal of the Borrower, each Guarantor, and any person who owns or controls an entity Key Principal.
