In [21]:
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate

In [22]:
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

## Create a summary chain using map reduce

In [23]:
prompt_template = """Write a concise summary of the following:


{text}


CONCISE SUMMARY IN GERMAN:"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])

In [24]:
from langchain.chains.summarize import load_summarize_chain
from langchain.llms import HuggingFaceTextGenInference

inference_server_url = ""
llm = HuggingFaceTextGenInference(
    inference_server_url=inference_server_url,
    max_new_tokens=512,
    top_k=10,
    top_p=0.95,
    typical_p=0.95,
    temperature=0.1,
    repetition_penalty=1.175
)
summary_chain = load_summarize_chain(llm, chain_type="map_reduce", map_prompt=PROMPT, combine_prompt=PROMPT)

In [25]:
from langchain.chains import AnalyzeDocumentChain

summarize_document_chain = AnalyzeDocumentChain(combine_docs_chain=summary_chain)

In [26]:
from langchain.document_loaders import PyPDFDirectoryLoader

pdf_folder_path = 'pdfs'

loader = PyPDFDirectoryLoader(pdf_folder_path)
docs = loader.load()

In [27]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1024, chunk_overlap = 40)
all_splits = text_splitter.split_documents(docs)

In [28]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

vectorstore = Chroma.from_documents(documents=all_splits, embedding=HuggingFaceEmbeddings())

In [30]:
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
db_retriever = vectorstore.as_retriever()
langchain_qa = RetrievalQA.from_chain_type(llm=llm, 
                                 chain_type="stuff", 
                                 retriever=db_retriever)

In [31]:
def summarize_output(topic):
  relevant_data = langchain_qa(topic)
  #print(relevant_data)
  summary = summarize_document_chain.run(relevant_data['result'])
  return summary

In [32]:
query = 'Langchain concepts'

In [33]:
summarize_output(query)

'\n\nDas LangChain-Rahmenwerk ist eine Basis für zwei Schlüsselprinzipien: Datenbewusstsein und Agentik. Mit dem Datenbewusstsein wird ein Sprachmodell mit anderen Quellen von Daten verbunden, während die Agentik erlaubt, dass ein Sprachmodell seine Umgebung interagieren kann. Dies ermöglicht Entwicklern, mehr mächtige und unterschiedliche Anwendungen zu entwickeln als nur einen Sprachmodell anzurufen.'