# <font color=red>LangChain:  Llama-2</font>
- https://docs.langchain.com/docs

<span style="font-family:'Comic Sans MS', cursive, sans-serif;"><font color=orange>
## Llama-2
</font></span>

### This demo shows how to use LangChain with Llama-2, which is currently somewhat different from using it with OpenAI models, because LangChain does not have built-in support for Llama-2 at the moment.
The demo assumes you have a local version of Llama-2-7B-chat-hf installed. </br>
It uses a file named 83332.12.txt which contains genomic data.</br>
It places the data into a ChromaDB vector database and uses the database in conjunction with Llama-2 to answer queries.

In [None]:
from langchain.vectorstores import Chroma
## from langchain.embeddings import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
from langchain import PromptTemplate
from langchain.chains import LLMChain
from langchain.llms import HuggingFacePipeline

# for llama-2
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

loader = TextLoader("./83332.12.txt")   ### just the one file

documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    separators = ["\n"],
    keep_separator = False,
    chunk_size = 0,    # just splits on lines (separators)
    chunk_overlap  = 0,
    length_function = len,
    is_separator_regex = False,
    # add_start_index = True,
)
texts = text_splitter.split_documents(documents)

## embeddings = OpenAIEmbeddings()
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}
embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

vectordb = Chroma.from_documents(documents=texts, embedding=embeddings,) # no persist

retriever = vectordb.as_retriever(search_kwargs={"k": 2}) # k override 4 with 2

###  now setup the LLM  ###
# llm = ChatOpenAI(model_name="gpt-4", temperature=0.0, max_tokens = 128,)

model_id = "./Llama-2-7b-chat-hf"   # first, setup the model
model_config = transformers.AutoConfig.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    config=model_config,
    device_map='auto',
    # trust_remote_code=True,  # if using at huggingface
    # use_auth_token=hf_key,   # if using at huggingface
)
model.eval()
print("DEVICE",next(model.parameters()).device)

tokenizer = AutoTokenizer.from_pretrained(model_id)  # second, setup the tokenizer

pipe = pipeline(            # third, setup the pipeline using the model and tokenizer
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=128,
    temperature=0.3,
    repetition_penalty=1.1,
    return_full_text=True,
    device_map='auto'
)

llm = HuggingFacePipeline(pipeline=pipe)   # fourth / last, setup the LLM

qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                       chain_type="stuff", # stuff all in at once
                                       retriever=retriever,
                                       # return_source_documents=True) # we know :-)
                                      )

query = "Which genome has ID 83332.12 ?"  # Mycobacterium tuberculosis H37Rv

llm_response = qa_chain(query)
print("RESPONSE")
print(llm_response['result'])