In [None]:
import torch
torch.backends.mps.enabled = True
torch.backends.mps.max_concurrency = 1
device = 'mps' if torch.backends.mps.is_available() else 'cpu'


In [1]:
from CAQA import *
from dotenv import load_dotenv
# import json
load_dotenv()
queries = [
    "What is the total amount of loans funded by Fannie Mae since 2009?",
    "What percentage of Fannie Mae's single-family guaranty book comprises loans acquired since 2009?",
    "What was the percentage of delinquent or foreclosed single-family loans in Fannie Mae's book as of December 31, 2013?",
    "What is the status of the COVID-19 pandemic as of June 2023?",
    "What new technologies have been introduced in 2023?"
    ]

llm_list = ["google/flan-t5-xxl", "tiiuae/falcon-7b-instruct", "mosaicml/mpt-7b","bigscience/bloom-560m"]
embedding_model_list = ["hkunlp/instructor-xl"]


In [3]:
# default builder
caqa_builder = CAQABuilder()

# customized builder
costomized_builder = caqa_builder.set_llm(llm_list[0])\
                    .set_embedding_model(embedding_model_list[0])\
                    .set_llm_params(temperature = 0.1, max_new_tokens = 500)

# build the system based on customized builder
myCAQA = costomized_builder.build()


  from tqdm.autonotebook import trange


load INSTRUCTOR_Transformer
max_seq_length  512


Using embedded DuckDB with persistence: data will be stored in: /Users/wangzhuohan/Desktop/year-2/CAQA


In [4]:
print("*******Embedding model used:  " + myCAQA.embedding_model + "*******")
print("*******Large Language Model used:  " + myCAQA.llm_repo_id + "*******")

for query in queries:
    answer, source_docs = myCAQA.generate_response(query)
    print("Question: " + query)
    print("Answer: " + answer + '\n')
    print("*****************")

while True:
    query = input("\nEnter a query: ")

    if query == "exit":
        break

    # Get the answer from the chain
    answer, source_docs = myCAQA.generate_response(query)
    print("Answer: " + answer + '\n')




*******Embedding model used:  hkunlp/instructor-xl*******
*******Large Language Model used:  google/flan-t5-xxl*******
Question: What is the total amount of loans funded by Fannie Mae since 2009?
Answer: $4.1 trillion

*****************
Question: What percentage of Fannie Mae's single-family guaranty book comprises loans acquired since 2009?
Answer: 77 percent

*****************
Question: What was the percentage of delinquent or foreclosed single-family loans in Fannie Mae's book as of December 31, 2013?
Answer: 0.10

*****************
Question: What is the status of the COVID-19 pandemic as of June 2023?
Answer: I don't know

*****************
Question: What new technologies have been introduced in 2023?
Answer: I don't know

*****************
