In [9]:
from llama_index.schema import MetadataMode
from llama_index import SimpleDirectoryReader
from helpers.markdown_docs_reader import MarkdownDocsReader

# Loading only MD files

In [6]:
def load_markdown_docs(filepath):
    """Load markdown docs from a directory, excluding all other file types."""
    loader = SimpleDirectoryReader(
                                input_dir=filepath, 
                                exclude=["*.rst", "*.ipynb", "*.py", "*.bat", "*.txt", "*.png", "*.jpg", "*.jpeg", "*.csv", "*.html", "*.js", "*.css", "*.pdf", "*.json"],
                                file_extractor={".md": MarkdownDocsReader()},
                                recursive=True
                                )

    return loader.load_data()

In [14]:
getting_started_docs = load_markdown_docs("data/docs/getting_started")
community_docs = load_markdown_docs("data/docs/community")
data_docs = load_markdown_docs("data/docs/core_modules/data_modules")
agent_docs = load_markdown_docs("data/docs/core_modules/agent_modules")
model_docs = load_markdown_docs("data/docs/core_modules/model_modules")
query_docs = load_markdown_docs("data/docs/core_modules/query_modules")
supporting_docs = load_markdown_docs("data/docs/core_modules/supporting_modules")
tutorials_docs = load_markdown_docs("data/docs/end_to_end_tutorials")
contributing_docs = load_markdown_docs("data/docs/development")

# Meta Data

In [15]:
print(agent_docs[5].get_content(metadata_mode=MetadataMode.ALL))

File Name: data/docs/core_modules/agent_modules/agents/root.md
Content Type: text
Header Path: Data Agents/Concept/Tool Abstractions
Links: 
file_path: data/docs/core_modules/agent_modules/agents/root.md
file_name: root.md
file_type: None
file_size: 2340
creation_date: 2023-12-17
last_modified_date: 2023-12-17
last_accessed_date: 2023-12-17

You can learn more about our Tool abstractions in our Tools section.


### 01. access meta-data

In [16]:
print(agent_docs[0].metadata)

{'File Name': 'data/docs/core_modules/agent_modules/agents/modules.md', 'Content Type': 'text', 'Header Path': 'Module Guides', 'Links': '', 'file_path': 'data/docs/core_modules/agent_modules/agents/modules.md', 'file_name': 'modules.md', 'file_type': None, 'file_size': 619, 'creation_date': '2023-12-17', 'last_modified_date': '2023-12-17', 'last_accessed_date': '2023-12-17'}


### 02. even you can change meta-data

In [19]:
text_template = "Content Metadata:\n{metadata_str}\n\nContent:\n{content}"

metadata_template = "{key}: {value},"
metadata_seperator= " "

for idx, doc in enumerate(agent_docs):
    doc.text_template = text_template
    doc.metadata_template = metadata_template
    doc.metadata_seperator = metadata_seperator
    agent_docs[idx] = doc

In [21]:
print(agent_docs[0].get_content(metadata_mode=MetadataMode.ALL))

Content Metadata:
File Name: data/docs/core_modules/agent_modules/agents/modules.md, Content Type: text, Header Path: Module Guides, Links: , file_path: data/docs/core_modules/agent_modules/agents/modules.md, file_name: modules.md, file_type: None, file_size: 619, creation_date: 2023-12-17, last_modified_date: 2023-12-17, last_accessed_date: 2023-12-17,

Content:
These guide provide an overview of how to use our agent classes.

For more detailed guides on how to use specific tools, check out our tools module guides.


### 03. advanced meta-data customization

In [22]:
agent_docs[0].excluded_llm_metadata_keys = ["File Name"]
print(agent_docs[0].get_content(metadata_mode=MetadataMode.LLM))

Content Metadata:
Content Type: text, Header Path: Module Guides, Links: , file_path: data/docs/core_modules/agent_modules/agents/modules.md, file_name: modules.md, file_type: None, file_size: 619, creation_date: 2023-12-17, last_modified_date: 2023-12-17, last_accessed_date: 2023-12-17,

Content:
These guide provide an overview of how to use our agent classes.

For more detailed guides on how to use specific tools, check out our tools module guides.
