In [19]:
import os
from langchain_openai.chat_models import ChatOpenAI
from langchain_core.messages import HumanMessage

In [3]:
PROMPT = """
Your task is to summarize this markdown text into an one paragraph text, keeping the main key points.
Discard information that is not relevant.
Discard any symbles that are not relevant for the semantic structure.
The text contain a kind of documentation, try to keep the examples highligted with a ``` between quotes (") among the paragraph. 

Text to summarize: {}
"""

In [13]:
documents = []
for (root, dirs, files) in os.walk("../documents"):
    for file in files:
        pwd = f'{root}/{file}'
        with open(pwd, 'r', encoding='utf-8') as f:
            content = f.read()
        documents.append({
            'name': file,
            'content': content
        })

In [5]:
print(documents[:3])

[{'path': '../documents/introduction.md', 'content': "# Introduction to AI Agents\n\n\nAgents are revolutionizing the way we approach complex tasks, leveraging the power of large language models (LLMs) to work on our behalf and achieve remarkable results. In this guide we will dive into the fundamentals of AI agents, exploring their capabilities, design patterns, and potential applications.\n\n## What is an Agent?\n\n![Agent Components](../../img/agents/agent-components.png)\n\nIn this guide, we refer to an agent as an LLM-powered system designed to take actions and solve complex tasks autonomously. Unlike traditional LLMs, AI agents go beyond simple text generation. They are equipped with additional capabilities, including:\n\n- **Planning and reflection:** AI agents can analyze a problem, break it down into steps, and adjust their approach based on new information.\n- **Tool access:** They can interact with external tools and resources, such as databases, APIs, and software applicati

In [20]:
llm = ChatOpenAI(
    model='gpt-4o-mini',
    temperature=0.0
)

In [21]:
new_path = '../documents/sum'
for index, doc in enumerate(documents):
    messages = [
        HumanMessage(content=PROMPT.format(doc['content']))
    ]

    print(f'Summarizing {index} doc(s) named: {doc['name']}')
    res = llm.invoke(messages)

    content = res.content.split('</think>')[-1]

    new_pwd = f'{new_path}/{doc['name']}'
    print(f'Saving {index} doc(s) to {new_pwd}')
    with open(new_pwd, 'w', encoding='utf-8') as f:
        f.write(content)


Summarizing 0 doc(s) named: introduction.md
Saving 0 doc(s) to ../documents/sum/introduction.md
Summarizing 1 doc(s) named: coding.md
Saving 1 doc(s) to ../documents/sum/coding.md
Summarizing 2 doc(s) named: context-caching.md
Saving 2 doc(s) to ../documents/sum/context-caching.md
Summarizing 3 doc(s) named: finetuning-gpt4o.md
Saving 3 doc(s) to ../documents/sum/finetuning-gpt4o.md
Summarizing 4 doc(s) named: function_calling.md
Saving 4 doc(s) to ../documents/sum/function_calling.md
Summarizing 5 doc(s) named: generating_textbooks.md
Saving 5 doc(s) to ../documents/sum/generating_textbooks.md
Summarizing 6 doc(s) named: generating.md
Saving 6 doc(s) to ../documents/sum/generating.md
Summarizing 7 doc(s) named: pf.md
Saving 7 doc(s) to ../documents/sum/pf.md
Summarizing 8 doc(s) named: synthetic_rag.md
Saving 8 doc(s) to ../documents/sum/synthetic_rag.md
Summarizing 9 doc(s) named: workplace_casestudy.md
Saving 9 doc(s) to ../documents/sum/workplace_casestudy.md
Summarizing 10 doc(s) 