In [2]:
! pip install -U sentence-transformers



In [3]:
from langchain import PromptTemplate, LLMChain
from langchain.document_loaders import TextLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import GPT4All
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.vectorstores.faiss import FAISS

In [4]:
gpt4all_path = './models/gpt4all-converted.bin' 

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
loader = TextLoader('./docs/shortened_sotu.txt')
llm = GPT4All(model=gpt4all_path, callback_manager=callback_manager, verbose=True)


llama_model_load: loading model from './models/gpt4all-converted.bin' - please wait ...
llama_model_load: n_vocab = 32001
llama_model_load: n_ctx   = 512
llama_model_load: n_embd  = 4096
llama_model_load: n_mult  = 256
llama_model_load: n_head  = 32
llama_model_load: n_layer = 32
llama_model_load: n_rot   = 128
llama_model_load: f16     = 2
llama_model_load: n_ff    = 11008
llama_model_load: n_parts = 1
llama_model_load: type    = 1
llama_model_load: ggml map size = 4017.70 MB
llama_model_load: ggml ctx size =  81.25 KB
llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)
llama_model_load: loading tensors from './models/gpt4all-converted.bin'
llama_model_load: model size =  4017.27 MB / num tensors = 291
llama_init_from_file: kv self size  =  512.00 MB


In [5]:
emb = HuggingFaceEmbeddings()

  from .autonotebook import tqdm as notebook_tqdm


[2023-05-04 21:59:41,230] {SentenceTransformer.py:66} INFO - Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2
[2023-05-04 21:59:42,367] {SentenceTransformer.py:105} INFO - Use pytorch device: cuda


In [6]:
def split_chunks(sources):
    chunks = []
    splitter = RecursiveCharacterTextSplitter(chunk_size=256, chunk_overlap=32)
    for chunk in splitter.split_documents(sources):
        chunks.append(chunk)
    return chunks


def create_index(chunks):
    texts = [doc.page_content for doc in chunks]
    metadatas = [doc.metadata for doc in chunks]

    search_index = FAISS.from_texts(texts, emb, metadatas=metadatas)

    return search_index


def similarity_search(query, index):
    matched_docs = index.similarity_search(query, k=4)
    sources = []
    for doc in matched_docs:
        sources.append(
            {
                "page_content": doc.page_content,
                "metadata": doc.metadata,
            }
        )

    return matched_docs, sources

In [7]:
from pathlib import Path

index_path = Path("models/state_of_the_union_index_gpu_sentence_transformers")
str_path = str(index_path)

if not index_path.is_file():
    print('No index found. Creating index...')
    # Create Index
    docs = loader.load()
    chunks = split_chunks(docs)
    index = create_index(chunks)
    index.save_local(str_path)
else:
    print(f'Index found. Loading index from {index_path}...') 
    # path to string
    str_path = str(index_path)
    index = FAISS.load_local(str_path, emb)



No index found. Creating index...


Batches: 100%|██████████| 2/2 [00:01<00:00,  1.42it/s]

[2023-05-04 21:59:45,375] {loader.py:54} INFO - Loading faiss with AVX2 support.
[2023-05-04 21:59:45,376] {loader.py:58} INFO - Could not load library with AVX2 support due to:
ModuleNotFoundError("No module named 'faiss.swigfaiss_avx2'")
[2023-05-04 21:59:45,377] {loader.py:64} INFO - Loading faiss.
[2023-05-04 21:59:45,410] {loader.py:66} INFO - Successfully loaded faiss.





In [8]:
question = "Summarize the comments about NATO and its purpose."
matched_docs, sources = similarity_search(question, index)

Batches: 100%|██████████| 1/1 [00:00<00:00, 71.67it/s]


In [9]:
import pickle

In [10]:
template = """
Please use the following context to answer questions.
Context: {context}
---
Question: {question}
Answer: Let's think step by step."""

context = "\n".join([doc.page_content for doc in matched_docs])
prompt = PromptTemplate(template=template, input_variables=["context", "question"]).partial(context=context)


In [12]:
pickle.dump(prompt, open("models/prompt.pkl", "wb"))

: 

In [None]:
llm_chain = LLMChain(prompt=prompt, llm=llm)

print(llm_chain.run(question))