### Initializing the text splitter and splitting the document

gpt4all1.pdf can be accessed in their github repo, first paper.

In [1]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.embeddings.gpt4all import GPT4AllEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import NLTKTextSplitter


text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=20,
    length_function=len,
    add_start_index=True,
    # separators='.'
)

# loader = TextLoader("mlops.txt")
loader = PyPDFLoader("gpt4all1.pdf")
# pages = loader.load_and_split()
documents = loader.load()
# nltk_splitter = NLTKTextSplitter()
texts = text_splitter.split_documents(documents)
# texts = nltk_splitter.split_documents(documents)

### Putting the splitted text into the vector database

In [2]:
from langchain.vectorstores import FAISS

embedder = GPT4AllEmbeddings()
db = FAISS.from_documents(texts, embedding=embedder)
# db = FAISS.from_embeddings
# db.save_local('db')

Found model file at  /Users/random/.cache/gpt4all/ggml-all-MiniLM-L6-v2-f16.bin


objc[1795]: Class GGMLMetalClass is implemented in both /Users/random/miniconda3/envs/llm/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x12fb14228) and /Users/random/miniconda3/envs/llm/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x12f9dc228). One of the two will be used. Which one is undefined.


### Testing

In [3]:
query = "What is gpt4all"
docs = db.similarity_search(query, k=5)

In [1]:
print(docs[0].page_content)

NameError: name 'docs' is not defined

### Loading the model

In [5]:
def retrieve_info(query):
    similar_response = db.similarity_search(query, k=3)

    page_contents_array = [doc.page_content for doc in similar_response]

    # print(page_contents_array)

    return page_contents_array


In [6]:
from langchain import PromptTemplate, LLMChain
from langchain.llms import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

local_path = (
    # "/Users/random/.cache/gpt4all/ggml-model-gpt4all-falcon-q4_0.bin"  # replace with your desired local file path
    "/Users/random/.cache/gpt4all/wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin"  # replace with your desired local file path
)

callbacks = [StreamingStdOutCallbackHandler()]

llm = GPT4All(
    model=local_path, backend="llama", verbose=True, streaming=True, callbacks=callbacks, max_tokens=16000
)

# template = """You are a helpful AI assistant and provide the answer for the question based on the given context and your existing knowledge.
# Context:{context}
# >>QUESTION<<{message}
# >>ANSWER<<"""

template = """You are a helpful AI assistant and provide the answer for the question based on the given context and your existing knowledge.
Context:{context}
>>QUESTION<<{message}
>>ANSWER<<"""

prompt = PromptTemplate(input_variables=["context", "message"], template=template)

llm_chain = LLMChain(llm=llm, prompt=prompt)

Found model file at  /Users/random/.cache/gpt4all/wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin


llama.cpp: using Metal
llama.cpp: loading model from /Users/random/.cache/gpt4all/wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32001
llama_model_load_internal: n_ctx      = 2048
llama_model_load_internal: n_embd     = 5120
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 40
llama_model_load_internal: n_head_kv  = 40
llama_model_load_internal: n_layer    = 40
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 5.0e-06
llama_model_load_internal: n_ff       = 13824
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 13B
llama_model_load_internal: ggml ctx size =    0.11 MB
llama_model_load_internal: mem required  = 7477.73 MB (+ 1600.00 MB per state)
lla

In [7]:
def generate_response(message):
    context = retrieve_info(message)
    # print(context)
    response = llm_chain.run(message=message, context=context)
    return response

### Checking the response

In [8]:
# actual GPU inference!!!!
question = "gpt4all"
result = generate_response(question)

GPT4All is an open source, large scale chatbot trained over a curated corpus of assistant interactions. The project aims to promote reproducibility and collaboration in AI research by releasing data, training code, and final model weights for the community to build upon. GPT4All was developed using LLaMA, which has a non-commercial license, and based on Ope

If we comment the context part from the ```generate_response``` function, we will see that the model starts hallucinating as it has no idea what gpt4all is. But after putting the correct context from the database, we can see that the model generated nice results and can clearly summarize what's going on in the paper.