In [1]:
!pip install langchain
!pip install chromadb
!pip install pdfplumber
!pip install tiktoken
!pip install lxml
!pip install torch
!pip install transformers
!pip install accelerate
!pip install sentence-transformers
!pip install einops
!pip install xformers
!pip install flask-ngrok



In [8]:
from langchain.document_loaders import PDFPlumberLoader,  TextLoader
from langchain.text_splitter import CharacterTextSplitter, TokenTextSplitter
from transformers import pipeline
from langchain.prompts import PromptTemplate
from flask import Flask, request, jsonify
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain import HuggingFacePipeline
from langchain.embeddings import HuggingFaceInstructEmbeddings, HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from chromadb.errors import InvalidDimensionException
import torch
from transformers import AutoTokenizer
from flask_ngrok import run_with_ngrok
import re
import os

# Initialize the Flask app
app = Flask(__name__)
run_with_ngrok(app)

# Embedding model
EMB_SBERT_MPNET_BASE = "sentence-transformers/all-mpnet-base-v2"
model = SentenceTransformer(EMB_SBERT_MPNET_BASE)

LLM_FLAN_T5_BASE = "google/flan-t5-base"

config = {"persist_directory":None,
          "load_in_8bit":False,
          "embedding" : EMB_SBERT_MPNET_BASE,
          "llm":LLM_FLAN_T5_BASE,
          }

def create_sbert_mpnet():
        device = "cuda" if torch.cuda.is_available() else "cpu"
        return HuggingFaceEmbeddings(model_name=EMB_SBERT_MPNET_BASE, model_kwargs={"device": device})


def create_flan_t5_base(load_in_8bit=False):
        # Wrap it in HF pipeline for use with LangChain
        model="google/flan-t5-base"
        tokenizer = AutoTokenizer.from_pretrained(model)
        return pipeline(
            task="text2text-generation",
            model=model,
            tokenizer = tokenizer,
            max_new_tokens=500,
            do_sample = True, #check this
            model_kwargs={"device_map": "auto", "load_in_8bit": load_in_8bit, "max_length": 512, "temperature": 0.5}
        )

embedding = create_sbert_mpnet()
load_in_8bit = config["load_in_8bit"]
llm = create_flan_t5_base(load_in_8bit=load_in_8bit)

# Pre-Load the PDF document and prepare data
pdf_path = "/content/The 2011 Cricket World Cup.pdf"
loader = PDFPlumberLoader(pdf_path)
documents = loader.load()

try:
    docsearch = Chroma.from_documents(documents=documents, embedding=embedding)
except InvalidDimensionException:
    Chroma().delete_collection()
    docsearch = Chroma.from_documents(documents=documents, embedding=embedding)

# Split documents and create text snippets
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
text_splitter = TokenTextSplitter(chunk_size=100, chunk_overlap=10)
texts = text_splitter.split_documents(texts)

persist_directory = config["persist_directory"]
vectordb = Chroma.from_documents(documents=texts, embedding=embedding, persist_directory=persist_directory)

hf_llm = HuggingFacePipeline(pipeline=llm)
retriever = vectordb.as_retriever(search_kwargs={"k": 4})
qa = RetrievalQA.from_chain_type(llm=hf_llm, chain_type="stuff", retriever=retriever)

# Defining a default prompt for flan models
if config["llm"] == LLM_FLAN_T5_BASE:
    question_t5_template = """
    context: {context}
    question: {question}
    answer:
    """
    QUESTION_T5_PROMPT = PromptTemplate(
        template=question_t5_template, input_variables=["context", "question"]
    )
    qa.combine_documents_chain.llm_chain.prompt = QUESTION_T5_PROMPT

@app.route('/answer', methods=['POST'])
def get_answer():
    try:
        data = request.json

        question = data.get('question', '')

        qa.combine_documents_chain.verbose = True
        qa.return_source_documents = True
        res = qa({"query": question})

        return jsonify({"answer": res['result']}), 200

    except Exception as e:
        return jsonify({"error": str(e)}), 500


if __name__ == '__main__':
    app.run()

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


 * Running on http://cece-35-204-190-88.ngrok.io
 * Traffic stats available on http://127.0.0.1:4040


Both `max_new_tokens` (=500) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)




[1m> Entering new StuffDocumentsChain chain...[0m


INFO:werkzeug:127.0.0.1 - - [26/Sep/2023 08:07:10] "POST /answer HTTP/1.1" 200 -



[1m> Finished chain.[0m


Both `max_new_tokens` (=500) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)




[1m> Entering new StuffDocumentsChain chain...[0m


INFO:werkzeug:127.0.0.1 - - [26/Sep/2023 08:08:15] "POST /answer HTTP/1.1" 200 -



[1m> Finished chain.[0m


Both `max_new_tokens` (=500) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
INFO:werkzeug:127.0.0.1 - - [26/Sep/2023 08:08:50] "POST /answer HTTP/1.1" 200 -




[1m> Entering new StuffDocumentsChain chain...[0m

[1m> Finished chain.[0m


Both `max_new_tokens` (=500) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)




[1m> Entering new StuffDocumentsChain chain...[0m


INFO:werkzeug:127.0.0.1 - - [26/Sep/2023 08:09:04] "POST /answer HTTP/1.1" 200 -



[1m> Finished chain.[0m
