https://www.federalreserve.gov/publications/files/2024-march-supervisory-stress-test-methodology.pdf

In [1]:
%load_ext dotenv
%dotenv ../.env

In [2]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

### making model

In [3]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo-0125")



### making doc loader

In [5]:
from langchain_community.document_loaders import PyMuPDFLoader

loader = PyMuPDFLoader("./2024-march-supervisory-stress-test-methodology.pdf")
docs = loader.load()


In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [7]:
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import TextLoader

import os
from os.path import  join



if(not os.path.exists("faiss_index")):
    vectorstore = FAISS.from_documents(splits, OpenAIEmbeddings())
    vectorstore.save_local("faiss_index")
else: 
    vectorstore = FAISS.load_local("faiss_index", OpenAIEmbeddings(), allow_dangerous_deserialization=True)
retriever = vectorstore.as_retriever()



In [8]:
prompt = hub.pull("rlm/rag-prompt") #pulled prompt from langchain prompt repo 


In [10]:
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, PromptTemplate

my_prompt = ChatPromptTemplate(
    input_variables=['context','question'],
    messages=[
        HumanMessagePromptTemplate(
            prompt=PromptTemplate(
                input_variables=['context', 'question'], 
                template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"
                )
            )
        ]
)

In [12]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


## local and citing sources

In [13]:
from langchain_core.runnables import RunnableParallel

rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)




In [18]:
from IPython.display import clear_output


In [14]:
def ask_question(question):
    ans = rag_chain_with_source.invoke(question)
    print("question = ",ans["question"])
    print("answer = ", ans['answer'])
    print("Documents used:")
    for d in ans['context']:
        if len(d.page_content) > 40:
            print("\tsource: "+d.metadata['source']+"\t"+d.page_content[:20]+"..."+d.page_content[-10:])
        else:
            print("\tsource: "+d.metadata['source']+"\t"+d.page_content)

In [21]:
with open("history.txt", 'a', encoding='utf8') as f: 
    while((question:=input().lower()) not in ('q','quit')):
        clear_output(wait=True)
        ans = rag_chain_with_source.invoke(question)
        to_write = "question = " + ans["question"] + "\n"
        to_write += "answer = " + ans['answer'] + "\n"
        to_write += "Documents used:"  + "\n"
        for d in ans['context']:
            to_write += "\tsource: " + d.metadata['source'] 
            if 'page' in d.metadata:
                to_write += "\t page: " + str(d.metadata['page'])  
            to_write += "\n\t\t"          
            if len(d.page_content) > 40:
                to_write += d.page_content[:40] + "..." + d.page_content[-30:] 
            else:
                to_write += d.metadata['source']+"\t"+d.page_content
            to_write += "\n\n"
        to_write += "-----------------------------\n"
        print(to_write)
        f.write(to_write)

question = how does the fed utilize mortgage information 
answer = The Federal Reserve utilizes mortgage information by mapping FICO® Scores as an input to its first-lien mortgage loss model, using them as the most widely used credit scores in historical data. The Fed has updated its supervisory models for the 2024 stress test, including changes to the first-lien mortgage model to control for macroeconomic data variations during the COVID-19 pandemic. The models rely on detailed portfolio data to provide independent assessments of each firm's capital adequacy under stress.
Documents used:
	source: ./2024-march-supervisory-stress-test-methodology.pdf	1 The Federal Reserve maps to FICO® Scor...visory Stress Test Methodology

	source: ./2024-march-supervisory-stress-test-methodology.pdf	For the 2024 stress test, the Federal Re...riality of the GMS dependence.

	source: ./2024-march-supervisory-stress-test-methodology.pdf	Income at origination
borrowerincome
Bor...visory Stress Test Method

In [22]:
ans

{'context': [Document(page_content='1 The Federal Reserve maps to FICO® Scores as an input to its domestic first-lien mortgage loss model, because these scores are the most\nwidely used commercially available credit scores in the historical data used for estimation.\nTable 30 shows the modeled loss rates for the six groups of loans for the supervisory severely\nadverse scenario. Each entry in the table shows the portfolio-level (average) estimated loss rate\nfor the loans in one of the six groups, as well as the median and 25th and 75th percentiles of the\nestimated loan-level loss rates.\n78\n2024 Supervisory Stress Test Methodology', metadata={'source': './2024-march-supervisory-stress-test-methodology.pdf', 'file_path': './2024-march-supervisory-stress-test-methodology.pdf', 'page': 83, 'total_pages': 100, 'format': 'PDF 1.7', 'title': '2024 Supervisory Stress Test Methodology', 'author': 'Federal Reserve Board', 'subject': '', 'keywords': '', 'creator': 'XPP', 'producer': 'PDFlib+P

In [30]:
to_write = ""
for d in ans['context']:
    to_write += "\tsource: " + d.metadata['source'] 
    if 'page' in d.metadata:
        to_write += "\t" + str(d.metadata['page'])  
    to_write += "\n\t\t"          
    if len(d.page_content) > 40:
        to_write += d.page_content[:40] + "..." + d.page_content[-30:] 
    else:
        to_write += d.metadata['source']+"\t"+d.page_content
    to_write += "\n\n"
to_write += "-----------------------------\n"
print(to_write)
        

	source: ./2024-march-supervisory-stress-test-methodology.pdf	83
		1 The Federal Reserve maps to FICO® Scor...visory Stress Test Methodology

	source: ./2024-march-supervisory-stress-test-methodology.pdf	94
		For the 2024 stress test, the Federal Re...riality of the GMS dependence.

	source: ./2024-march-supervisory-stress-test-methodology.pdf	91
		Income at origination
borrowerincome
Bor...visory Stress Test Methodology

	source: ./2024-march-supervisory-stress-test-methodology.pdf	8
		The Federal Reserve’s models rely on det...driven by differences in firm-

-----------------------------

