# Documents & Metadata

In [10]:
import os
import sys


In [11]:
os.getcwd()

'/Users/raoofmac/Documents/coding/learning/genai/rag/llama_bottoms_up_approaches'

In [44]:
from utils.markdown_docs_reader import MarkdownDocsReader
from llama_index.core import SimpleDirectoryReader


def load_markdown_docs(filepath):
    """Load markdown docs from a directory, excluding all other file types."""
    loader = SimpleDirectoryReader(
        input_dir=filepath, 
        exclude=["*.rst", "*.ipynb", "*.py", "*.bat", "*.txt", "*.png", "*.jpg", "*.jpeg", "*.csv", "*.html", "*.js", "*.css", "*.pdf", "*.json"],
        file_extractor={".md": MarkdownDocsReader()},
        recursive=True
    )

    return loader.load_data()

In [45]:
## Load our documents from each folder.

base_path = '/Users/raoofmac/Documents/coding/learning/genai'

getting_started_docs =  load_markdown_docs(os.path.join(base_path, "docs/getting_started"))
community_docs =  load_markdown_docs(os.path.join(base_path, "docs/community"))
data_docs =  load_markdown_docs(os.path.join(base_path, "docs/core_modules/data_modules"))
agent_docs =  load_markdown_docs(os.path.join(base_path, "docs/core_modules/agent_modules"))
model_docs =  load_markdown_docs(os.path.join(base_path, "docs/core_modules/model_modules"))
query_docs =  load_markdown_docs(os.path.join(base_path, "docs/core_modules/query_modules"))
supporting_docs =  load_markdown_docs(os.path.join(base_path, "docs/core_modules/supporting_modules"))
tutorial_docs =  load_markdown_docs(os.path.join(base_path, "docs/end_to_end_tutorials"))
contributing_docs =  load_markdown_docs(os.path.join(base_path, "docs/development"))

In [46]:
## To make the print look cool
from llama_index.core.schema import MetadataMode

In [47]:
print(getting_started_docs[0].get_content(metadata_mode=MetadataMode.ALL))

File Name: /Users/raoofmac/Documents/coding/learning/genai/docs/getting_started/concepts.md
Content Type: code
Header Path: High-Level Concepts
file_path: /Users/raoofmac/Documents/coding/learning/genai/docs/getting_started/concepts.md
file_name: concepts.md
file_type: None
file_size: 5045
creation_date: 2024-02-18
last_modified_date: 2024-02-18
last_accessed_date: 2024-02-18

```{tip}
If you haven't, install and complete starter tutorial before you read this. It will make a lot more sense!
```


In [48]:
print(agent_docs[0].metadata)

{'File Name': '/Users/raoofmac/Documents/coding/learning/genai/docs/core_modules/agent_modules/agents/modules.md', 'Content Type': 'text', 'Header Path': 'Module Guides', 'Links': '', 'file_path': '/Users/raoofmac/Documents/coding/learning/genai/docs/core_modules/agent_modules/agents/modules.md', 'file_name': 'modules.md', 'file_type': None, 'file_size': 619, 'creation_date': '2024-02-18', 'last_modified_date': '2024-02-18', 'last_accessed_date': '2024-02-18'}


In [49]:
text_template = "Content Metadata:\n{metadata_str}\n\nContent:\n{content}"

metadata_template = "{key}: {value},"
metadata_seperator = " "

for doc in agent_docs:
    doc.text_template = text_template
    doc.metadata_template = metadata_template
    doc.metadata_seperator = metadata_seperator



In [50]:
print(agent_docs[0].get_content(metadata_mode=MetadataMode.ALL))

Content Metadata:
File Name: /Users/raoofmac/Documents/coding/learning/genai/docs/core_modules/agent_modules/agents/modules.md, Content Type: text, Header Path: Module Guides, Links: , file_path: /Users/raoofmac/Documents/coding/learning/genai/docs/core_modules/agent_modules/agents/modules.md, file_name: modules.md, file_type: None, file_size: 619, creation_date: 2024-02-18, last_modified_date: 2024-02-18, last_accessed_date: 2024-02-18,

Content:
These guide provide an overview of how to use our agent classes.

For more detailed guides on how to use specific tools, check out our tools module guides.


### Advance Customization

In [55]:
agent_docs[0].excluded_llm_metadata_keys = ["File Name"]
print(agent_docs[0].get_content(metadata_mode=MetadataMode.LLM))

Content Metadata:
Content Type: text, Header Path: Module Guides, Links: , file_path: /Users/raoofmac/Documents/coding/learning/genai/docs/core_modules/agent_modules/agents/modules.md, file_name: modules.md, file_type: None, file_size: 619, creation_date: 2024-02-18, last_modified_date: 2024-02-18, last_accessed_date: 2024-02-18,

Content:
These guide provide an overview of how to use our agent classes.

For more detailed guides on how to use specific tools, check out our tools module guides.


In [54]:
agent_docs[0].excluded_embed_metadata_keys = ["File Name"]
print(agent_docs[0].get_content(metadata_mode=MetadataMode.EMBED))

Content Metadata:
Content Type: text, Header Path: Module Guides, Links: , file_path: /Users/raoofmac/Documents/coding/learning/genai/docs/core_modules/agent_modules/agents/modules.md, file_name: modules.md, file_type: None, file_size: 619, creation_date: 2024-02-18, last_modified_date: 2024-02-18, last_accessed_date: 2024-02-18,

Content:
These guide provide an overview of how to use our agent classes.

For more detailed guides on how to use specific tools, check out our tools module guides.
