In [14]:
from langchain_community.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llm = LlamaCpp(
    model_path="../models/nous-hermes-llama-2-7b.q4_0.gguf",
    temperature=0,
    max_tokens=2000,
    top_p=1,
    callback_manager=callback_manager,
    n_ctx=4096,
    verbose=True,  # Verbose is required to pass to the callback manager
)

llama_model_loader: loaded meta data with 18 key-value pairs and 291 tensors from ../models/nous-hermes-llama-2-7b.q4_0.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = nous-hermes-llama-2-7b.ggmlv3.q4_0.bin
llama_model_loader: - kv   2:                        general.description str              = converted from legacy GGJTv3 format
llama_model_loader: - kv   3:                       llama.context_length u32              = 2048
llama_model_loader: - kv   4:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   5:                          llama.block_count u32              = 32
llama_model_loader: - kv   6:                  llama.feed_forward_length u32              = 11008
llama_model_loa

In [19]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers import LanguageParser
from langchain.text_splitter import Language
from langchain_community.vectorstores import Chroma

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'})

loader_python = GenericLoader.from_filesystem(
                    "../docs",
                    glob="**/*",
                    suffixes=[".py"],
                    exclude=["**/non-utf8-encoding.py"],
                    parser=LanguageParser(language=Language.PYTHON, parser_threshold=500),
                )

# interpret information in the documents
documents_python = loader_python.load()
splitter_python = RecursiveCharacterTextSplitter.from_language(language=Language.PYTHON, chunk_size=2000, chunk_overlap=200)
texts_python = splitter_python.split_documents(documents_python)

db = Chroma.from_documents(texts_python, embeddings)
retriever = db.as_retriever(search_type="mmr", search_kwargs={"k": 8})

In [21]:
from langchain_core.prompts import PromptTemplate
from langchain.chains import StuffDocumentsChain, LLMChain, RetrievalQA

prompt_template = """You are a documentation agent. Use the following pieces of context to answer the question at the end. Please follow the following rules:
1. Give detailed description of what each function in a file does.
2. If you don't know the answer, don't try to make up an answer. Just say **I do not know that**.
3. If you find the answer, write the answer in with as much technical detail as possible.

{context}

Question: {question}
Helpful Answer:"""

QA_CHAIN_PROMPT = PromptTemplate.from_template(prompt_template) 
llm_chain = LLMChain(llm=llm, prompt=QA_CHAIN_PROMPT)

document_prompt = PromptTemplate(
    input_variables=["page_content"],
    template="{page_content}"
)

document_prompt = PromptTemplate(
    input_variables=["page_content", "source"],
    template="Context:\ncontent:{page_content}\nsource:{source}",
)

chain = StuffDocumentsChain(
    llm_chain=llm_chain,
    document_prompt=document_prompt,
    document_variable_name="context"
)

qa = RetrievalQA(
    combine_documents_chain=chain,
    callbacks=None,
    verbose=True,
    retriever=retriever,
    return_source_documents=True,
)

res = qa("What does the sample.py file do?")
print(res['result'])



  warn_deprecated(
Number of requested results 20 is greater than number of elements in index 6, updating n_results = 6
Llama.generate: prefix-match hit




[1m> Entering new RetrievalQA chain...[0m
 The sample.py file defines two functions, one called 'addNumbers()' and another called 'printEnv()'. The 'addNumbers()' function takes two arguments and returns their sum, while the 'printEnv()' function prints out the value of the environment variable 'TEST'. The sample.py file is not used for anything specific and is just a simple example of how functions can be defined and used in Python.


llama_print_timings:        load time =     514.27 ms
llama_print_timings:      sample time =       9.33 ms /    87 runs   (    0.11 ms per token,  9326.76 tokens per second)
llama_print_timings: prompt eval time =   28442.72 ms /   565 tokens (   50.34 ms per token,    19.86 tokens per second)
llama_print_timings:        eval time =    6799.86 ms /    86 runs   (   79.07 ms per token,    12.65 tokens per second)
llama_print_timings:       total time =   35475.20 ms /   651 tokens



[1m> Finished chain.[0m
 The sample.py file defines two functions, one called 'addNumbers()' and another called 'printEnv()'. The 'addNumbers()' function takes two arguments and returns their sum, while the 'printEnv()' function prints out the value of the environment variable 'TEST'. The sample.py file is not used for anything specific and is just a simple example of how functions can be defined and used in Python.


In [None]:
res = qa("What does the sample2.py file do?")
print(res['result'])