diff --git a/content/get-started/installation/streamlit-playground.md b/content/get-started/installation/streamlit-playground.md index f0fbf8d87..260098328 100644 --- a/content/get-started/installation/streamlit-playground.md +++ b/content/get-started/installation/streamlit-playground.md @@ -1,6 +1,7 @@ --- title: Use Streamlit Playground in your browser slug: /get-started/installation/streamlit-playground +description: Quick start guide to Streamlit using the Streamlit Playground - no installation required. --- # Use Streamlit Playground in your browser diff --git a/content/kb/FAQ/_index.md b/content/kb/FAQ/_index.md index 2883462f5..7e70108b3 100644 --- a/content/kb/FAQ/_index.md +++ b/content/kb/FAQ/_index.md @@ -1,6 +1,7 @@ --- title: FAQ slug: /knowledge-base/using-streamlit +description: Explore answers to frequently asked questions about developing a Streamlit app. --- # FAQ diff --git a/content/kb/_index.md b/content/kb/_index.md index 822337496..ff7106083 100644 --- a/content/kb/_index.md +++ b/content/kb/_index.md @@ -1,6 +1,7 @@ --- title: Knowledge Base slug: /knowledge-base +description: Explore troubleshooting guides for common problems. --- # Knowledge base diff --git a/content/kb/dependencies/_index.md b/content/kb/dependencies/_index.md index 9ed119344..638b2f50c 100644 --- a/content/kb/dependencies/_index.md +++ b/content/kb/dependencies/_index.md @@ -1,6 +1,7 @@ --- title: Installing dependencies slug: /knowledge-base/dependencies +description: Explore common dependency and environment problems, and see possible solutions. --- # Installing dependencies diff --git a/content/kb/deployments/_index.md b/content/kb/deployments/_index.md index 208f3b8c3..f00396cc4 100644 --- a/content/kb/deployments/_index.md +++ b/content/kb/deployments/_index.md @@ -1,6 +1,7 @@ --- title: Deployment Issues slug: /knowledge-base/deploy +description: Explore common deployment problems and solutions. --- # Deployment-related questions and errors diff --git a/makefile b/makefile index 60b7854ab..f3bea7d4b 100644 --- a/makefile +++ b/makefile @@ -11,11 +11,15 @@ start: npm run start .PHONY: export -export: llms +export: llms llms-full npm run export .PHONY: llms llms: + uv run python/generate_llms_txt.py + +.PHONY: llms-full +llms-full: uv run python/generate_llms_full_txt.py .PHONY: lint diff --git a/public/llms-full.txt b/public/llms-full.txt index 128908e7f..d62cdca3b 100644 --- a/public/llms-full.txt +++ b/public/llms-full.txt @@ -1,2 +1 @@ This gets autogenerated on deploy. See `make export`. - diff --git a/public/llms.txt b/public/llms.txt new file mode 100644 index 000000000..d62cdca3b --- /dev/null +++ b/public/llms.txt @@ -0,0 +1 @@ +This gets autogenerated on deploy. See `make export`. diff --git a/python/generate_content_catalog.py b/python/generate_content_catalog.py deleted file mode 100644 index 75063b488..000000000 --- a/python/generate_content_catalog.py +++ /dev/null @@ -1,84 +0,0 @@ -# /// script -# dependencies = [ -# "python-frontmatter>=1.0.0", -# ] -# /// -# -# This file is used to generate content_catalog.json, which contains -# the markdown contents for every page of Streamlit's documentation in -# the format [{url: "the_url", content: "the_content"}, ...] -# -# content_catalog.json is used in the RAG pipeline for st-assistant.streamlit.app. -# -# In the future, we may also want to use this file to generate llms.txt -# and llms-full.txt, since they serve a very similar purpose to content_catalog.json. -# -# Usage: -# uv run generate_content_catalog.py - -import json -from typing import List, Dict, Optional -import frontmatter -from pathlib import Path - - -def process_markdown_files(content_dir: Path) -> List[Dict[str, Optional[str]]]: - """Process all markdown files in the content directory and its subdirectories. - - Args: - content_dir: Path to the content directory containing markdown files. - - Returns: - List of dictionaries containing 'url' (from frontmatter slug) and 'content' for each markdown file. - """ - content_catalog: List[Dict[str, Optional[str]]] = [] - - # Walk through all directories and files - for file_path in content_dir.rglob("*.md"): - try: - # Read the content of the markdown file with frontmatter - post = frontmatter.load(file_path) - - # Get the URL from frontmatter slug if it exists, otherwise set to null - url = post.get("slug") - - if not url: - continue - - url = f"https://docs.streamlit.io{url}" - - # Add to catalog - content_catalog.append({"url": url, "content": post.content}) - except frontmatter.FrontmatterError as e: - print(f"Error parsing frontmatter in {file_path}: {str(e)}") - except Exception as e: - print(f"Error processing {file_path}: {str(e)}") - - return content_catalog - - -def main() -> None: - """Generate a content catalog JSON file from markdown files in the content directory.""" - # Get the content directory path (sibling to the python directory) - content_dir = Path(__file__).parent.parent / "content" - - # Process all markdown files - content_catalog = process_markdown_files(content_dir) - - # Write the catalog to a JSON file in the python directory - output_file = Path(__file__).parent / "content_catalog.json" - try: - output_file.write_text( - json.dumps(content_catalog, ensure_ascii=False, indent=2), encoding="utf-8" - ) - print( - f"Successfully generated {output_file} with {len(content_catalog)} entries" - ) - except json.JSONEncodeError as e: - print(f"Error encoding JSON for {output_file}: {str(e)}") - except Exception as e: - print(f"Error writing {output_file}: {str(e)}") - - -if __name__ == "__main__": - main() diff --git a/python/generate_llms_full_txt.py b/python/generate_llms_full_txt.py index 2cc7a64b2..080db7e16 100644 --- a/python/generate_llms_full_txt.py +++ b/python/generate_llms_full_txt.py @@ -280,8 +280,7 @@ def replace_autofunction_tags(content: str, function_info: Dict[str, Any]) -> st else: if function_name: print( - f"Warning: Function '{function_name}' not found in " - "streamlit.json" + f"Warning: Function '{function_name}' not found in streamlit.json" ) # If function not found, remove the tag but leave a placeholder tag.replace_with(f"[Function '{function_name}' not found]") @@ -303,7 +302,7 @@ def process_markdown_files(content_dir: Path) -> List[Dict[str, Optional[str]]]: content_dir: Path to the content directory containing markdown files. Returns: - List of dictionaries containing 'url' (from frontmatter slug) and 'content' + List of dictionaries containing 'url' (from frontmatter slug) and 'content' for each markdown file. """ content_catalog: List[Dict[str, Optional[str]]] = [] @@ -398,4 +397,3 @@ def main() -> None: if __name__ == "__main__": main() - diff --git a/python/generate_llms_txt.py b/python/generate_llms_txt.py new file mode 100644 index 000000000..c5f9644e6 --- /dev/null +++ b/python/generate_llms_txt.py @@ -0,0 +1,195 @@ +# /// script +# dependencies = [ +# "python-frontmatter>=1.0.0", +# ] +# /// +# +# This file is used to generate llms.txt, which describes the structure +# of Streamlit's documentation, with links for more info. +# +# The llms.txt file is a standardized format for describing a project's +# structure for Language Learning Models (LLMs), making it easier for +# AI assistants to understand and navigate the documentation. +# +# Usage: +# uv run generate_llms_txt.py + +from typing import Dict, Any, Optional, cast +import frontmatter +from pathlib import Path + +# Separator used in menu categories to create hierarchical navigation +# e.g., "Get started / Installation" becomes ["Get started", "Installation"] +CATEGORY_SEP = " / " + +# Header text that appears at the top of the generated llms.txt file +INITIAL_TEXT = """\ +# Streamlit documentation website + +> Streamlit is a powerful open-source Python framework that allows data +scientists and AI/ML engineers to build interactive apps (i.e. data apps) +with only a few lines of code.""" + + +def read_menu_file(menu_file_path: Path) -> Dict[str, Any]: + """Read and parse the menu file containing the documentation structure. + + Args: + menu_file_path: Path to the menu.md file + + Returns: + Dictionary containing the parsed menu structure from the frontmatter + """ + # Read the menu.md file + with open(menu_file_path, "r", encoding="utf-8") as f: + content = f.read() + + # Parse the frontmatter + post = frontmatter.loads(content) + return cast(Dict[str, Any], post.metadata.get("site_menu", {})) + + +def get_url_to_descriptions_dict(content_dir: Path) -> Dict[str, Optional[str]]: + """Get a mapping of URLs to their descriptions from markdown files. + + Args: + content_dir: Directory containing markdown files to process + + Returns: + Dictionary mapping URLs to their descriptions (None if no description) + """ + url_to_descriptions_dict: Dict[str, Optional[str]] = {} + + # Walk through all directories and files + for file_path in content_dir.rglob("*.md"): + try: + # Read the content of the markdown file with frontmatter + post = frontmatter.load(str(file_path)) + + url = cast(Optional[str], post.get("slug")) + + if not url: + continue + + url_to_descriptions_dict[url] = cast(Optional[str], post.get("description")) + + except Exception as e: + print(f"Error processing {file_path}: {str(e)}") + + return url_to_descriptions_dict + + +def main() -> None: + """Generate the llms.txt file from the documentation structure. + + This is the main function that orchestrates the entire process: + 1. Reads the menu structure from menu.md + 2. Extracts descriptions from individual markdown files + 3. Processes menu items to create a hierarchical structure + 4. Generates the final llms.txt file with proper formatting + """ + # Construct paths relative to this script's location + # The content directory is a sibling to the python directory + content_dir = Path(__file__).parent.parent / "content" + menu_file_path = content_dir / "menu.md" + + try: + menu_dict = read_menu_file(menu_file_path) + except Exception as e: + print("Error reading menu file\n") + raise e + + url_to_descriptions_dict = get_url_to_descriptions_dict(content_dir) + + output = [INITIAL_TEXT] + prev_output_is_paragraph = True + + # Process each menu item from the parsed menu structure + for menu_item in menu_dict: + menu_item = cast(Dict, menu_item) + try: + # Check visibility settings for LLMs + # Some pages may be hidden from LLMs specifically using 'visible_to_llms' + if "visible_to_llms" in menu_item: + if not menu_item["visible_to_llms"]: + continue + else: + # Fall back to general 'visible' flag if 'visible_to_llms' not specified + if not menu_item.get("visible", True): + continue + + # Skip items without required fields + if "category" not in menu_item: + continue + if "url" not in menu_item: + continue + + # Parse the hierarchical category structure + # e.g., "Get started / Installation / Command line" -> ["Get started", "Installation", "Command line"] + category_list = menu_item["category"].split(CATEGORY_SEP) + url: str = menu_item["url"] + + # Try to get description from the markdown files first (more detailed) + # Fall back to description in menu if not found + description: Optional[str] = url_to_descriptions_dict.get(url, None) + if not description: + description = menu_item.get("description", None) + + # The label to display is the last part of the category hierarchy + # This assumes menu.md is ordered hierarchically + category_label: str = category_list[-1] + + # Format output based on the hierarchy level + indentation = "" + + # Level 1: Top-level sections (## headers) + # e.g., "Get started", "Develop", "Deploy" + if len(category_list) == 1: + output.append("") # Add blank line for separation + output.append(f"## [{category_label}]({url})") + if description: + output.append("") + output.append(description) + prev_output_is_paragraph = True + + # Level 2: Subsections (### headers) + # e.g., "Installation", "Fundamentals", "First steps" + elif len(category_list) == 2: + output.append("") # Add blank line for separation + output.append(f"### [{category_label}]({url})") + if description: + output.append("") + output.append(description) + prev_output_is_paragraph = True + + # Level 3+: List items with proper indentation + # e.g., individual tutorial pages, API references + else: + if prev_output_is_paragraph: + output.append("") + + # Calculate indentation: each level beyond 3 gets 2 spaces + num_indents = len(category_list) - 3 + indentation = num_indents * " " + + # Format as markdown list item with link + output.append(f"{indentation}- [{category_label}]({url})") + if description: + output.append(f"{indentation} {description}") + prev_output_is_paragraph = False + + except Exception as e: + print(f"--------------------\nError parsing {menu_item['category']}\n") + raise e + + # Generate the final output file + # The llms.txt file is placed in the public directory for web serving + output_file = Path(__file__).parent.parent / "public" / "llms.txt" + + # Join all output lines with newlines and write to file + output_file.write_text("\n".join(output), encoding="utf-8") + print(f"Successfully generated {output_file}") + + +if __name__ == "__main__": + main()