# Summary Generator


Generate a summary starting from all the previously created documentation file.

## 0. Imports

In [29]:
import os
from langchain_community.chat_models import ChatOpenAI
from langchain_community.document_loaders import DirectoryLoader, UnstructuredMarkdownLoader
from langchain_text_splitters import MarkdownHeaderTextSplitter, RecursiveCharacterTextSplitter
from langchain import hub
from langchain_core.prompts.prompt import PromptTemplate
from langchain.chains.llm import LLMChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain



## 1. Loading Files

In [30]:
path = r"C:\Users\reply\Desktop\root-cause-analysis-asset\markdown-gpt-3.5"
loader = DirectoryLoader(path, glob="./*.md", show_progress=True, loader_cls=UnstructuredMarkdownLoader)
docs = loader.load()

100%|██████████| 10/10 [00:01<00:00,  6.94it/s]


In [31]:
llm = ChatOpenAI(temperature=0.1, model_name="gpt-3.5-turbo-1106")


In [32]:
#import runnable sequence
from langchain_core.runnables import RunnableSequence

In [33]:
map_prompt = hub.pull("rlm/map-prompt")
reduce_prompt = hub.pull("rlm/reduce-prompt")
map_chain = LLMChain(llm=llm, prompt=map_prompt)
reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)




In [34]:
from langchain.chains import MapReduceDocumentsChain, ReduceDocumentsChain
from langchain_text_splitters import CharacterTextSplitter

llm = ChatOpenAI(temperature=0)

In [35]:

# Map Template
map_template = """The following is a set of documents:
{docs}
Based on this list of documents, please make a short description of their contents.
Helpful Answer:"""
map_prompt = PromptTemplate.from_template(map_template)
map_chain = LLMChain(llm=llm, prompt=map_prompt)

# Reduce Template
reduce_template = """The following is a set of summaries:
{docs}
Please distill these summaries into a final, consolidated summary of the overall contents. Ensure the final summary captures the main points and key details from each document.
Helpful Answer:"""
reduce_prompt = PromptTemplate.from_template(reduce_template)
reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)


In [36]:
# Run chain
reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)

# Takes a list of documents, combines them into a single string, and passes this to an LLMChain
combine_documents_chain = StuffDocumentsChain(
    llm_chain=reduce_chain, document_variable_name="docs"
)

# Combines and iteratively reduces the mapped documents
reduce_documents_chain = ReduceDocumentsChain(
    # This is final chain that is called.
    combine_documents_chain=combine_documents_chain,
    # If documents exceed context for `StuffDocumentsChain`
    collapse_documents_chain=combine_documents_chain,
    # The maximum number of tokens to group documents into.
    token_max=4000,
)

In [37]:
# Combining documents by mapping a chain over them, then combining results
map_reduce_chain = MapReduceDocumentsChain(
    # Map chain
    llm_chain=map_chain,
    # Reduce chain
    reduce_documents_chain=reduce_documents_chain,
    # The variable name in the llm_chain to put the documents in
    document_variable_name="docs",
    # Return the results of the map steps in the output
    return_intermediate_steps=False,
)

In [38]:
def split_documents(doc, chunk_size=250, chunk_overlap=30):
    """ Split a document into chunks of text."""

    headers_to_split_on = [
         ("#", "Header 1"),
        ("##", "Header 2"),
        ("###", "Header 3"),
    ]

    markdown_splitter = MarkdownHeaderTextSplitter(
        headers_to_split_on=headers_to_split_on, strip_headers=False
    )
    md_header_splits = markdown_splitter.split_text( doc.page_content)
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size, chunk_overlap=chunk_overlap
    )

    # Split
    splits = text_splitter.split_documents(md_header_splits)
   
    return splits

In [39]:
all_splits = []
for doc in docs:
        splits = split_documents(doc, chunk_size=500, chunk_overlap=0)
        for doc_split in splits:
            filename = os.path.basename(list(doc.metadata.values())[0])
            doc_split.metadata = {'source':filename}        
        all_splits.extend(splits)

In [40]:
result = map_reduce_chain.invoke(all_splits)


In [41]:
def format_text(text):
    # Example formatting: Insert newlines after each period for demonstration
    formatted_text = a.replace(". ", ".\n")
    return formatted_text

In [42]:
a = format_text(result["output_text"])

In [43]:
#save a on a file
a.replace(". ", ".\n")
with open("output.txt", "w") as file:
    file.write(a)

In [44]:
refine_template = """We have an existing summary that needs to be updated based on new information.
                        Existing Summary:
                        {existing_summary}

                        New Information:
                        {new_docs}

                        Please refine the existing summary to incorporate the new information accurately and comprehensively.
                        Helpful Answer:"""
refine_prompt = PromptTemplate.from_template(refine_template)
refine_chain = LLMChain(llm=llm, prompt=refine_prompt)


In [45]:
#if it has been already created then just update it 
if os.path.exists("output.txt"):
    

SyntaxError: incomplete input (3087848325.py, line 3)