In [None]:
import dotenv
import os

from genai.model import GenerateParams
from genai import Credentials
from genai.extensions.langchain import LangChainInterface

from langchain.chains.summarize import load_summarize_chain
from langchain.chains.question_answering import load_qa_chain
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.document_loaders import UnstructuredPDFLoader, OnlinePDFLoader, PyPDFLoader

In [None]:
chunk_size = 2000
chunk_overlap = 100

In [None]:
dotenv.load_dotenv()

api_key = os.getenv("GENAI_KEY",None)
api_endpoint = os.getenv("GENAI_API",None)

creds = Credentials(api_key,api_endpoint)

params = GenerateParams(
    decoding_method="greedy",
    max_new_tokens=300,
    min_new_tokens=15,
    repetition_penalty=2,
)

text_splitter = RecursiveCharacterTextSplitter(chunk_size= chunk_size, chunk_overlap=chunk_overlap)

In [None]:
llm = LangChainInterface(model="meta-llama/llama-2-13b",credentials=creds,params=params)
chain = load_summarize_chain(llm=llm,chain_type="map_reduce")

# doc = Document(page_content="generate a story about cat")
# summary = chain.run([doc])
# print(summary)

In [None]:
from pathlib import Path

docs = []

for path in Path('../../menu/').rglob('*.pdf'):
    print('../../menu/'+ path.name)
    loader = PyPDFLoader('../../menu/'+ path.name)
    data = loader.load()
    docs += text_splitter.split_documents(data)

In [None]:
embeddings = HuggingFaceInstructEmbeddings(
            model_name="hkunlp/instructor-large",
            model_kwargs={"device": "cpu"}
        )
db = FAISS.from_documents(docs, embeddings)

In [None]:
file = open("sample.txt", "r")
questions = file.readlines()

print(questions)

In [None]:
chain = load_qa_chain(llm, chain_type="stuff")

for query in questions:
    print("Q:"+query)
    resultdocs = db.similarity_search(query, k=3)
    # for res in resultdocs:
    #     print(res)
    answer = chain.run(input_documents=resultdocs, question=query)
    print("A:"+answer)