In [None]:
!pip install langchain
!pip install pypdf
!pip install unstructured
!pip install sentence_transformers
!pip install pinecone-client
!pip install llama-cpp-python
!pip install huggingface_hub
!pip install faiss-cpu

# import all the libraries

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import DirectoryLoader
import magic
import os
import nltk
from langchain.document_loaders import PyPDFLoader, OnlinePDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone, FAISS
from sentence_transformers import SentenceTransformer
from langchain.chains.question_answering import load_qa_chain
import pinecone
import os
from langchain.llms import HuggingFaceHub

# load the file and extract the text

In [None]:
loader = PyPDFLoader("/content/platform.pdf")
documents = loader.load()
documents[0]

# splitting in small chunks/parts

In [None]:
text_splitter=RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=10)

In [None]:
docs=text_splitter.split_documents(documents)

In [None]:
len(docs)

# setting the apis

In [None]:
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY', '24270767-c8bd-4cd9-9d95-e58569390d61')
PINECONE_API_ENV = os.environ.get('PINECONE_API_ENV', 'us-west4-gcp-free')

# embedding

In [None]:
embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

# load into FAISS vector

In [None]:
# Get your docsearch ready
docsearch = FAISS.from_documents(docs, embeddings)


# find the similarity with k means how many number of parts do you require?

In [None]:
query="Explain Optiflow plan"
docs=docsearch.similarity_search(query, k=10)
docs

# LLM using Quantized model because of low computational power

In [None]:
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --upgrade --no-cache-dir --verbose

In [None]:
from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from huggingface_hub import hf_hub_download
from langchain.chains.question_answering import load_qa_chain

In [None]:
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
# Verbose is required to pass to the callback manager

# llm model name

In [None]:
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML"
model_basename = "llama-2-13b-chat.ggmlv3.q5_1.bin" # the model is in bin format

In [None]:
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)

In [None]:
n_gpu_layers = 40  # Change this value based on your model and your GPU VRAM pool.
n_batch = 256  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.

# Loading model,
llm = LlamaCpp(
    model_path=model_path,
    max_tokens=256,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    callback_manager=callback_manager,
    n_ctx=1024,
    verbose=True,
)

Create a langchain--- Chain the models

In [None]:
chain=load_qa_chain(llm, chain_type="stuff")

In [None]:
query="Explain the second phase"
docs=docsearch.similarity_search(query)
chain.run(input_documents=docs, question=query)

## Prompt Engineer

In [None]:
from langchain import PromptTemplate

In [None]:
from langchain import PromptTemplate

custom_prompt_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Context: {context}
Question: {question}
Explain the answer in 2-3 sentence.
Helpful answer:
"""

prompt_template = PromptTemplate(
    input_variables=["context","question"],
    template=custom_prompt_template
)

In [None]:
print(llm(
    prompt_template.format(
        context=docs,
        question="Explain the Document Automation Solution"
    )
))

In [None]:
print(llm(
    prompt_template.format(
        context=docs,
        question="Explain the second phase of optiflow"
    )
))