From deaf633c43cec0a66cbfa61e900bd3b4bc7a2bdd Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Thu, 5 Jun 2025 12:26:47 +0800 Subject: [PATCH 1/4] Python Prompt interface --- packages/poml/components/document.tsx | 21 ++- packages/poml/components/tree.tsx | 8 +- packages/poml/components/webpage.tsx | 12 +- packages/poml/tests/components.test.tsx | 18 ++ pyproject.toml | 2 +- python/poml/prompt.py | 237 ++++++++++++++++++++++++ 6 files changed, 287 insertions(+), 11 deletions(-) create mode 100644 python/poml/prompt.py diff --git a/packages/poml/components/document.tsx b/packages/poml/components/document.tsx index de8e7b3b..d3170941 100644 --- a/packages/poml/components/document.tsx +++ b/packages/poml/components/document.tsx @@ -188,6 +188,7 @@ interface DocumentProps extends PropsSyntaxBase { src?: string; parser?: DocumentParser; buffer?: string | Buffer; + base64?: string; multimedia?: boolean; selectedPages?: string; } @@ -238,6 +239,7 @@ async function autoParseDocument( * * @param {string} src - The source file to read the data from. This must be provided if records is not provided. * @param {Buffer|string} buffer - Document data buffer. Recommended to use `src` instead unless you want to use a string. + * @param {string} base64 - Base64 encoded string of the document data. Mutually exclusive with `src` and `buffer`. * @param {'auto'|'pdf'|'docx'|'txt'} parser - The parser to use for reading the data. If not provided, it will be inferred from the file extension. * @param {boolean} multimedia - If true, the multimedias will be displayed. If false, the alt strings will be displayed at best effort. Default is `true`. * @param {string} selectedPages - The pages to be selected. This is only available **for PDF documents**. If not provided, all pages will be selected. @@ -255,15 +257,22 @@ async function autoParseDocument( export const Document = component('Document', { aliases: ['doc'], asynchorous: true })(( props: DocumentProps ) => { - let { buffer, parser, ...others } = props; + let { buffer, parser, base64, ...others } = props; let parsedBuffer: Buffer | undefined; - if (typeof buffer === 'string') { - parsedBuffer = Buffer.from(buffer, 'utf-8'); - if (parser === undefined || parser === 'auto') { - parser = 'txt'; + if (base64) { + if (buffer !== undefined) { + throw new Error('Either buffer or base64 should be provided, not both.'); } + parsedBuffer = Buffer.from(base64, 'base64'); } else { - parsedBuffer = buffer; + if (typeof buffer === 'string') { + parsedBuffer = Buffer.from(buffer, 'utf-8'); + if (parser === undefined || parser === 'auto') { + parser = 'txt'; + } + } else { + parsedBuffer = buffer; + } } const document = useWithCatch( autoParseDocument({ buffer: parsedBuffer, parser, ...others }), diff --git a/packages/poml/components/tree.tsx b/packages/poml/components/tree.tsx index c5be92b8..38cde166 100644 --- a/packages/poml/components/tree.tsx +++ b/packages/poml/components/tree.tsx @@ -261,8 +261,12 @@ function readDirectoryToTreeItems( const children: TreeItemData[] = []; const entries = fs.readdirSync(dirPath, { withFileTypes: true }).sort((a, b) => { // Directories first, then files - if (a.isDirectory() && !b.isDirectory()) return -1; - if (!a.isDirectory() && b.isDirectory()) return 1; + if (a.isDirectory() && !b.isDirectory()) { + return -1; + } + if (!a.isDirectory() && b.isDirectory()) { + return 1; + } return a.name.localeCompare(b.name); }); diff --git a/packages/poml/components/webpage.tsx b/packages/poml/components/webpage.tsx index b6af88d8..f05f9bc4 100644 --- a/packages/poml/components/webpage.tsx +++ b/packages/poml/components/webpage.tsx @@ -10,6 +10,7 @@ export interface WebpageProps extends PropsSyntaxBase { src?: string; url?: string; buffer?: string | Buffer; + base64?: string; extractText?: boolean; selector?: string; } @@ -102,6 +103,7 @@ async function processWebpage(props: WebpageProps): Promise * @param {string} url - The URL of the webpage to fetch and display. * @param {string} src - Local file path to an HTML file to display. * @param {string|Buffer} buffer - HTML content as string or buffer. + * @param {string} base64 - Base64 encoded HTML content. * @param {boolean} extractText - Whether to extract plain text content (true) or convert HTML to structured POML (false). Default is false. * @param {string} selector - CSS selector to extract specific content from the page (e.g., "article", ".content", "#main"). Default is "body". * @@ -126,7 +128,13 @@ async function processWebpage(props: WebpageProps): Promise export const Webpage = component('Webpage', { asynchorous: true })(( props: WebpageProps ) => { - const { src, url, buffer, extractText, selector, ...others } = props; - const content = useWithCatch(processWebpage(props), others); + let { src, url, buffer, base64, extractText, selector, ...others } = props; + if (base64) { + if (buffer !== undefined) { + throw new Error('Either buffer or base64 should be provided, not both.'); + } + buffer = Buffer.from(base64, 'base64'); + } + const content = useWithCatch(processWebpage({ ...props, buffer: buffer }), others); return {content ?? null}; }); diff --git a/packages/poml/tests/components.test.tsx b/packages/poml/tests/components.test.tsx index ae128939..cb1a0ace 100644 --- a/packages/poml/tests/components.test.tsx +++ b/packages/poml/tests/components.test.tsx @@ -50,6 +50,15 @@ describe('document', () => { /without any merged cells:\n\n\| Screen Reader \| Responses \| Share \|\n/g ); }); + + test('docx from base64', async () => { + const buffer = readFileSync(__dirname + '/assets/sampleWord.docx'); + const base64 = buffer.toString('base64'); + const result = await poml(); + expect(result[4]).toMatch( + /without any merged cells:\n\n\| Screen Reader \| Responses \| Share \|\n/g + ); + }); }); describe('message', () => { @@ -481,4 +490,13 @@ Finally, link to another page in your own Web site. expect(result).toContain('

Enter the main heading, usually the same as the title.

'); }); + + test('loading HTML from base64', async () => { + const htmlContent = readFileSync(webpagePath, 'utf-8'); + const base64Content = Buffer.from(htmlContent).toString('base64'); + const markup = ; + const result = await poml(markup); + + expect(result).toContain('

Enter the main heading, usually the same as the title.

'); + }); }); diff --git a/pyproject.toml b/pyproject.toml index 41be260e..1f9892ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "poml" version = "0.0.5" description = "Prompt Orchestration Markup Language" readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.9" license = {file = "LICENSE"} dependencies = [ "nodejs-wheel" diff --git a/python/poml/prompt.py b/python/poml/prompt.py new file mode 100644 index 00000000..4848f4f9 --- /dev/null +++ b/python/poml/prompt.py @@ -0,0 +1,237 @@ +import xml.etree.ElementTree as ET +import base64 +import json +import tempfile +import warnings + +from .api import poml # Assuming this exists in your project structure + + +def _write_file_for_poml(content: str): + """Writes content to a named temporary file that is not deleted on close.""" + # The caller is responsible for managing the lifecycle of this file, including deletion. + temp_file = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", delete=False) + temp_file.write(content) + temp_file.flush() # Ensure content is written to disk + # temp_file.close() # Consider if the file should be closed here or by the caller. + return temp_file + + +class _ImplicitDualTagHandler: + """ + Handles XML tags that can be self-closing or act as context managers for nested content. + It creates an ET.Element on initialization and adds it to the current parent. + If used with 'with', it pushes the element onto the Prompt's parent stack. + """ + + def __init__(self, prompt_instance: "Prompt", tag_name: str, attrs: dict): + self.prompt = prompt_instance + self.tag_name = tag_name + + prepared_attrs = self.prompt._prepare_attrs(**attrs) + self.element = ET.Element(tag_name, prepared_attrs) + + if self.prompt.current_parent_stack: + # Append as child to the currently open element + self.prompt.current_parent_stack[-1].append(self.element) + else: + # No parent on stack, so this is a root-level element + self.prompt.root_elements.append(self.element) + + self._is_context_managed = False # True if __enter__ completes successfully + + def __enter__(self): + # This element now becomes the current parent for any nested tags or text. + self.prompt.current_parent_stack.append(self.element) + self._is_context_managed = True + return self.prompt # Return Prompt instance for chained calls like p.text() + + def __exit__(self, exc_type, exc_val, exc_tb): + if not self._is_context_managed: + # This means __enter__ did not complete successfully, or the handler + # was instantiated but not used correctly in a 'with' statement. + raise SystemError( + f"Exiting tag handler for '{self.element.tag}' that was not properly context-managed. " + "Ensure it's used in a 'with' statement and __enter__ completed." + ) + + # If __enter__ completed, self.element was pushed onto the stack. + if not self.prompt.current_parent_stack: + # This indicates a critical internal logic error. + raise SystemError( + f"Internal error: Tag stack empty while exiting context for '{self.element.tag}'. " + "_is_context_managed was True, implying a tag should be on stack." + ) + + popped_element = self.prompt.current_parent_stack.pop() + if popped_element is not self.element: + # This is a critical internal error, indicating mismatched tags or stack corruption. + self.prompt.current_parent_stack.append(popped_element) # Restore the stack to its previous state + raise SystemError( + f"XML structure error: Mismatched tag on context exit. Expected to pop '{self.element.tag}', " + f"but found '{popped_element.tag}'. This suggests an issue with nested contexts." + ) + + +class Prompt: + """ + Builds an XML structure using ElementTree, supporting context-managed tags. + """ + + def __init__(self): + self.root_elements: list[ET.Element] = [] + self.current_parent_stack: list[ET.Element] = [] # Stack of current ET.Element parents + + def _prepare_attrs(self, **attrs) -> dict[str, str]: + """Converts attribute values to strings suitable for ElementTree.""" + prepared = {} + for k, v in attrs.items(): + if v is None: # Skip None attributes + continue + key_str = str(k) # Keys are typically strings + if isinstance(v, bool): + val_str = str(v).lower() # XML often uses "true"/"false" + elif isinstance(v, bytes): + b64 = base64.b64encode(v).decode() + if key_str == "buffer": + prepared["base64"] = b64 + continue + else: + val_str = base64.b64encode(v).decode("ascii") + elif isinstance(v, (int, float, str)): + val_str = str(v) + else: + val_str = json.dumps(v) # Fallback for complex types, convert to JSON string + prepared[key_str] = val_str + return prepared + + def text(self, content: str): + """Adds text content to the currently open XML element.""" + if not self.current_parent_stack: + raise ValueError("Cannot add text: No tag is currently open. Use a 'with' block for a tag.") + + current_el = self.current_parent_stack[-1] + # ElementTree handles XML escaping for text content automatically + content_str = str(content) + + # Append text correctly for mixed content (text between child elements) + if len(current_el) > 0: # If current element has children + last_child = current_el[-1] + if last_child.tail is None: + last_child.tail = content_str + else: + last_child.tail += content_str + else: # No children yet in the current element, add to its primary text + if current_el.text is None: + current_el.text = content_str + else: + current_el.text += content_str + + def _generate_xml_string(self, pretty: bool) -> str: + """ + Serializes the built XML structure to a string. + Can optionally pretty-print the output. + """ + if self.current_parent_stack: + # This warning is for cases where rendering/dumping happens with unclosed tags. + print( + f"Warning: Generating XML with open tags: {[el.tag for el in self.current_parent_stack]}. " + "Ensure all 'with' blocks for tags are properly exited before finalizing XML." + ) + + xml_strings = [] + for root_el in self.root_elements: + if pretty: + # ET.indent modifies the element in-place (Python 3.9+) + ET.indent(root_el, space=" ", level=0) + xml_strings.append(ET.tostring(root_el, encoding="unicode", method="xml")) + else: + # Serialize compactly without extra whitespace + xml_strings.append(ET.tostring(root_el, encoding="unicode", method="xml")) + + # Join the string representations of each root-level element. + # If pretty printing and multiple roots, join with newlines for readability. + # Otherwise, join directly to form a contiguous XML stream. + joiner = "\n" if pretty and len(xml_strings) > 0 else "" # Add newline between pretty roots + return joiner.join(xml_strings) + + def render(self, chat: bool = True, context=None, stylesheet=None) -> list | dict | str: + """ + Renders the final XML. Raises error if tags are still open. + """ + if self.current_parent_stack: + raise ValueError( + f"Cannot render: Open tags remaining: {[el.tag for el in self.current_parent_stack]}. " + "Ensure all 'with' blocks for tags are properly exited." + ) + # poml likely expects a compact, single XML string. + final_xml = self._generate_xml_string(pretty=False) + return poml(final_xml, context=context, stylesheet=stylesheet, chat=chat) + + def dump_xml(self) -> str: + """ + Dumps the generated XML string, pretty-printed by default (useful for debugging). + """ + return self._generate_xml_string(pretty=True) + + def __enter__(self): + """Initializes Prompt for a new XML construction session within a 'with' block.""" + self.root_elements = [] + self.current_parent_stack = [] + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Cleans up Prompt state upon exiting a 'with' block.""" + if self.current_parent_stack and exc_type is None: + # This means the Prompt context itself exited while some _ImplicitDualTagHandler + # contexts (tags) were still notionally open. + warnings.warn( + f"Warning: Prompt context exited with open tags: {[el.tag for el in self.current_parent_stack]}. " + "This may indicate nested tag context managers were not properly closed before the Prompt context ended." + ) + + # Consistent with original behavior: clear internal state on exit. + # This means results should typically be obtained (via render/dump_xml) + # before the Prompt's 'with' block finishes if Prompt itself is a context manager. + self.root_elements.clear() + self.current_parent_stack.clear() + + # --- Tag-specific methods using the dual-use handler --- + # These methods provide a convenient API for creating specific XML tags. + + def task(self, **attrs) -> _ImplicitDualTagHandler: + """Task tag. Use with `with` for content, or call directly for self-closing.""" + return _ImplicitDualTagHandler(self, "Task", attrs) + + def document(self, **attrs) -> _ImplicitDualTagHandler: + """Document tag. Use with `with` for content, or call directly for self-closing.""" + return _ImplicitDualTagHandler(self, "Document", attrs) + + def p(self, **attrs) -> _ImplicitDualTagHandler: + """Paragraph tag. Use with `with` for content, or call directly for self-closing.""" + return _ImplicitDualTagHandler(self, "p", attrs) + + # Add any other tag from your JSON schema here, following the same pattern: + # def some_tag_name(self, **attrs) -> _ImplicitDualTagHandler: + # return _ImplicitDualTagHandler(self, "SomeTagName", attrs) + + +if __name__ == '__main__': + # Example usage of the Prompt class + with Prompt() as p: + with p.p(): + with p.task(id="task1", status="open"): + p.text("This is a task description.") + with p.p(): + p.text("This is a paragraph in the document.") + + xml_output = p.dump_xml() # Get pretty-printed XML for debugging + print(xml_output) + prompt_output = p.render() + print(prompt_output) + + #

+ # This is a task description. + #

This is a paragraph in the document.

+ #

+ # [{'speaker': 'human', 'content': '# Task\n\nThis is a task description.\n\nThis is a paragraph in the document.'}] From cc7ff6e620d47552eaaac2159fe42fe9a09a74c9 Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Thu, 5 Jun 2025 17:40:15 +0800 Subject: [PATCH 2/4] taglib gen --- docs/components.md | 2 + packages/poml-vscode/lsp/parseComments.ts | 155 ++- packages/poml/assets/componentDocs.json | 14 + python/poml/_tags.py | 1277 +++++++++++++++++++++ python/poml/prompt.py | 29 +- 5 files changed, 1442 insertions(+), 35 deletions(-) create mode 100644 python/poml/_tags.py diff --git a/docs/components.md b/docs/components.md index 2e248ca5..f8479803 100644 --- a/docs/components.md +++ b/docs/components.md @@ -748,6 +748,7 @@ To display a Word document without including the real multimedia: - **src**: The source file to read the data from. This must be provided if records is not provided. - **buffer**: Buffer. Document data buffer. Recommended to use `src` instead unless you want to use a string. +- **base64**: Base64 encoded string of the document data. Mutually exclusive with `src` and `buffer`. - **parser**: Can be one of: auto, pdf, docx, txt. The parser to use for reading the data. If not provided, it will be inferred from the file extension. - **multimedia**: Boolean. If true, the multimedias will be displayed. If false, the alt strings will be displayed at best effort. Default is `true`. - **selectedPages**: The pages to be selected. This is only available **for PDF documents**. If not provided, all pages will be selected. @@ -998,6 +999,7 @@ Convert HTML to structured POML components: - **url**: The URL of the webpage to fetch and display. - **src**: Local file path to an HTML file to display. - **buffer**: Buffer. HTML content as string or buffer. +- **base64**: Base64 encoded HTML content. - **extractText**: Boolean. Whether to extract plain text content (true) or convert HTML to structured POML (false). Default is false. - **selector**: CSS selector to extract specific content from the page (e.g., "article", ".content", "#main"). Default is "body". - **syntax**: Can be one of: markdown, html, json, yaml, xml, text. The syntax of the content. diff --git a/packages/poml-vscode/lsp/parseComments.ts b/packages/poml-vscode/lsp/parseComments.ts index 5cff5fed..a1107f00 100644 --- a/packages/poml-vscode/lsp/parseComments.ts +++ b/packages/poml-vscode/lsp/parseComments.ts @@ -1,9 +1,9 @@ -import "poml"; -import { ComponentSpec, Parameter } from "poml/base"; +import 'poml'; +import { ComponentSpec, Parameter } from 'poml/base'; -import { readFileSync, readdirSync, writeFileSync } from "fs"; -import { join } from "path"; -import { formatComponentDocumentation } from "./documentFormatter"; +import { readFileSync, readdirSync, writeFile, writeFileSync } from 'fs'; +import { join } from 'path'; +import { formatComponentDocumentation } from './documentFormatter'; const basicComponents: string[] = []; const intentions: string[] = []; @@ -18,13 +18,14 @@ function tsCommentToMarkdown(comment: string): ComponentSpec { .replace(/^\/\*\*?/, '') .replace(/\*\/$/, '') .split('\n') - .map((line) => line.replace(/^\s*\*( )?/, '')) - .map((line) => line.replace(/\s+$/, '')) + .map(line => line.replace(/^\s*\*( )?/, '')) + .map(line => line.replace(/\s+$/, '')) .join('\n'); // Recognize description, @param and @example in the comment. const descriptionRegex = /([\s\S]*?)(?=@param|@example|@see|$)/; - const paramRegex = /@param\s+(\{([\S'"\|]+?)\}\s+)?(\w+)\s+-\s+([\s\S]*?)(?=@param|@example|@see|$)/g; + const paramRegex = + /@param\s+(\{([\S'"\|]+?)\}\s+)?(\w+)\s+-\s+([\s\S]*?)(?=@param|@example|@see|$)/g; const exampleRegex = /@example\s+([\s\S]*?)(?=@param|@example|@see|$)/; const seeRegex = /@see\s+([\s\S]*?)(?=@param|@example|@see|$)/g; @@ -50,7 +51,7 @@ function tsCommentToMarkdown(comment: string): ComponentSpec { fallbackType = 'string'; } else if (paramMatch[2] && paramMatch[2].includes('|')) { type = 'string'; - choices = paramMatch[2].split('|').map((choice) => choice.replace(/['"\s]/g, '').trim()); + choices = paramMatch[2].split('|').map(choice => choice.replace(/['"\s]/g, '').trim()); } else if (paramMatch[2]) { type = paramMatch[2]; } @@ -80,7 +81,7 @@ function tsCommentToMarkdown(comment: string): ComponentSpec { params, example, baseComponents - } + }; } function extractTsComments(text: string) { @@ -95,7 +96,8 @@ function extractTsComments(text: string) { function extractComponentComments(text: string) { const comments: ComponentSpec[] = []; - const commentRegex = /(\/\*\*([\s\S]*?)\*\/)\nexport const [\w]+ = component\(['"](\w+)['"](,[\S\s]*?)?\)/g; + const commentRegex = + /(\/\*\*([\s\S]*?)\*\/)\nexport const [\w]+ = component\(['"](\w+)['"](,[\S\s]*?)?\)/g; let match; while ((match = commentRegex.exec(text)) !== null) { const doc = { name: match[3], ...tsCommentToMarkdown(match[2]) }; @@ -104,7 +106,6 @@ function extractComponentComments(text: string) { return comments; } - function* walk(folderPath: string): IterableIterator { for (const entry of readdirSync(folderPath, { withFileTypes: true })) { if (entry.isFile() && (entry.name.endsWith('.tsx') || entry.name.endsWith('.ts'))) { @@ -135,7 +136,7 @@ function scanComponentDocs(folderPath: string) { } else { utilities.push(...names); } - }; + } return allComments; } @@ -159,6 +160,134 @@ function docsToMarkdown(docs: ComponentSpec[]) { return parts.join('\n\n'); } +function camelToSnake(str: string): string { + return str + .replace(/([A-Z]+)([A-Z][a-z])/g, '$1_$2') // Handles cases like "XMLFile" -> "XML_File" + .replace(/([a-z\d])([A-Z])/g, '$1_$2') // Handles "camelCase" -> "camel_Case" + .toLowerCase(); // Converts to lowercase: "XML_File" -> "xml_file" +} + +function getPythonType(jsonType: string, paramName: string): string { + const lcJsonType = jsonType.toLowerCase(); + switch (lcJsonType) { + case 'string': + return 'str'; + case 'boolean': + return 'bool'; + case 'buffer': + return 'bytes'; + case 'number': + // Heuristic for int vs float based on common parameter names + if ( + paramName.includes('max') || + paramName.includes('count') || + paramName.includes('depth') || + paramName.endsWith('Index') + ) { + return 'int'; + } + return 'float'; + case 'object': + return 'Any'; // Could be Dict[str, Any] + case 'regexp': + return 'str'; // Python uses strings for regex patterns + default: + if (jsonType.endsWith('[]')) { + // Handles array types like TreeItemData[] + return 'List[Any]'; // Generic list type + } + // For unknown or complex non-array types (e.g., a specific object schema name) + return 'Any'; + } +} + +function generatePythonMethod(tag: ComponentSpec): string { + const methodName = camelToSnake(tag.name!); + let paramsSignatureList: string[] = [' self']; + let argsDocstring = ''; + const callArgsList: string[] = [`tag_name="${tag.name}"`]; + + tag.params.forEach(param => { + const paramName = param.name; // Use original JSON name for Python parameter + const pythonType = getPythonType(param.type, paramName); + const typeHint = `Optional[${pythonType}]`; + + paramsSignatureList.push(` ${paramName}: ${typeHint} = None`); + callArgsList.push(`${paramName}=${paramName}`); + + let paramDesc = param.description.replace(/\n/g, '\n '); + if (param.defaultValue !== undefined) { + const defValStr = + typeof param.defaultValue === 'string' ? `"${param.defaultValue}"` : param.defaultValue; + paramDesc += ` Default is \`${defValStr}\`.`; + } + if (param.choices && param.choices.length > 0) { + paramDesc += ` Choices: ${param.choices.map(c => `\`${JSON.stringify(c)}\``).join(', ')}.`; + } + argsDocstring += ` ${paramName} (${typeHint}): ${paramDesc}\n`; + }); + + paramsSignatureList.push(' **kwargs: Any'); + + const paramsString = paramsSignatureList.join(',\n'); + + let docstring = `"""${tag.description.replace(/\n/g, '\n ')}\n\n`; + if (argsDocstring) { + docstring += ` Args:\n${argsDocstring}`; + } + if (tag.example) { + const exampleIndented = tag.example + .replace(/\\/g, '\\\\') // Escape backslashes for string literal + .replace(/"""/g, '\\"\\"\\"') // Escape triple quotes if any in example + .replace(/\n/g, '\n '); + docstring += `\n Example:\n ${exampleIndented}\n`; + } + docstring += ` """`; + + const methodBody = `return self._tag( + ${callArgsList.join(',\n ')}, + **kwargs, + )`; + + return ` + def ${methodName}( +${paramsString}, + ): + ${docstring} + ${methodBody} + `; +} + +function generatePythonFile(jsonData: ComponentSpec[]): string { + let pythonCode = `# This file is auto-generated from component documentation. +# Do not edit manually. Run \`npm run build-comment\` to regenerate. + +from typing import Optional, Any, Union, List, Dict +# from numbers import Number # For more specific number types if needed + +class _TagLib: + + def tag(self, tag_name: str, **kwargs: Any) -> Any: + """Helper method to create a tag with the given name and attributes. + Implemented by subclasses. + """ + raise NotImplementedError("This method should be implemented by subclasses.") +`; + + jsonData.forEach(tag => { + if (!tag.name) { + console.warn('Skipping tag with no name:', tag); + return; + } + pythonCode += generatePythonMethod(tag); + }); + + return pythonCode; +} + const allDocs = scanComponentDocs('packages/poml'); +const pythonCode = generatePythonFile(allDocs); writeFileSync('packages/poml/assets/componentDocs.json', JSON.stringify(allDocs, null, 2)); writeFileSync('docs/components.md', docsToMarkdown(allDocs)); +writeFileSync('python/poml/_tags.py', pythonCode); +console.log('Component documentation generated successfully!'); diff --git a/packages/poml/assets/componentDocs.json b/packages/poml/assets/componentDocs.json index 7086e93b..976506af 100644 --- a/packages/poml/assets/componentDocs.json +++ b/packages/poml/assets/componentDocs.json @@ -18,6 +18,13 @@ "description": "Document data buffer. Recommended to use `src` instead unless you want to use a string.", "required": false }, + { + "name": "base64", + "type": "string", + "choices": [], + "description": "Base64 encoded string of the document data. Mutually exclusive with `src` and `buffer`.", + "required": false + }, { "name": "parser", "type": "string", @@ -1106,6 +1113,13 @@ "description": "HTML content as string or buffer.", "required": false }, + { + "name": "base64", + "type": "string", + "choices": [], + "description": "Base64 encoded HTML content.", + "required": false + }, { "name": "extractText", "type": "boolean", diff --git a/python/poml/_tags.py b/python/poml/_tags.py new file mode 100644 index 00000000..19314a0b --- /dev/null +++ b/python/poml/_tags.py @@ -0,0 +1,1277 @@ +# This file is auto-generated from component documentation. +# Do not edit manually. Run `npm run build-comment` to regenerate. + +from typing import Optional, Any, Union, List, Dict +# from numbers import Number # For more specific number types if needed + +class _TagLib: + + def tag(self, tag_name: str, **kwargs: Any) -> Any: + """Helper method to create a tag with the given name and attributes. + Implemented by subclasses. + """ + raise NotImplementedError("This method should be implemented by subclasses.") + + def document( + self, + src: Optional[str] = None, + buffer: Optional[bytes] = None, + base64: Optional[str] = None, + parser: Optional[str] = None, + multimedia: Optional[bool] = None, + selectedPages: Optional[str] = None, + **kwargs: Any, + ): + """Displaying an external document like PDF, TXT or DOCX. + + Args: + src (Optional[str]): The source file to read the data from. This must be provided if records is not provided. + buffer (Optional[bytes]): Document data buffer. Recommended to use `src` instead unless you want to use a string. + base64 (Optional[str]): Base64 encoded string of the document data. Mutually exclusive with `src` and `buffer`. + parser (Optional[str]): The parser to use for reading the data. If not provided, it will be inferred from the file extension. Choices: `"auto"`, `"pdf"`, `"docx"`, `"txt"`. + multimedia (Optional[bool]): If true, the multimedias will be displayed. If false, the alt strings will be displayed at best effort. Default is `true`. Default is `"true"`. + selectedPages (Optional[str]): The pages to be selected. This is only available **for PDF documents**. If not provided, all pages will be selected. + You can use a string like `2` to specify a single page, or slice like `2:4` to specify a range of pages (2 inclusive, 4 exclusive). + The pages selected are **0-indexed**. Negative indexes like `-1` is not supported here. + + Example: + To display a Word document without including the real multimedia: + ```xml + + ``` + """ + return self._tag( + tag_name="Document", + src=src, + buffer=buffer, + base64=base64, + parser=parser, + multimedia=multimedia, + selectedPages=selectedPages, + **kwargs, + ) + + def role( + self, + caption: Optional[str] = None, + captionSerialized: Optional[str] = None, + captionStyle: Optional[str] = None, + captionTextTransform: Optional[str] = None, + captionEnding: Optional[str] = None, + **kwargs: Any, + ): + """Specifies the role you want the language model to assume when responding. + Defining a role provides the model with a perspective or context, + such as a scientist, poet, child, or any other persona you choose. + + Args: + caption (Optional[str]): The title or label for the role paragraph. Default is `Role`. Default is `"Role"`. + captionSerialized (Optional[str]): The serialized version of the caption when using "serializer" syntaxes. Default is `role`. Default is `"role"`. + captionStyle (Optional[str]): Determines the style of the caption, + applicable only for "markup" syntaxes. Default is `header`. Default is `"header"`. Choices: `"header"`, `"bold"`, `"plain"`, `"hidden"`. + captionTextTransform (Optional[str]): Specifies text transformation for the caption, applicable only for "markup" syntaxes. Default is `none`. Default is `"none"`. Choices: `"upper"`, `"level"`, `"capitalize"`, `"none"`. + captionEnding (Optional[str]): A caption can ends with a colon, a newline or simply nothing. + If not specified, it defaults to `colon` for `bold` or `plain` captionStyle, and `none` otherwise. Choices: `"colon"`, `"newline"`, `"colon-newline"`, `"none"`. + + Example: + ```xml + You are a data scientist. + ``` + """ + return self._tag( + tag_name="Role", + caption=caption, + captionSerialized=captionSerialized, + captionStyle=captionStyle, + captionTextTransform=captionTextTransform, + captionEnding=captionEnding, + **kwargs, + ) + + def task( + self, + caption: Optional[str] = None, + captionSerialized: Optional[str] = None, + captionStyle: Optional[str] = None, + captionTextTransform: Optional[str] = None, + captionEnding: Optional[str] = None, + **kwargs: Any, + ): + """Task represents the action you want the language model to perform. + It is a directive or instruction that you want the model to follow. + Task is usually not long, but rather a concise and clear statement. + Users can also include a list of steps or instructions to complete the task. + + Args: + caption (Optional[str]): The title or label for the task paragraph. Default is `Task`. Default is `"Task"`. + captionSerialized (Optional[str]): The serialized version of the caption when using "serializer" syntaxes. Default is `task`. Default is `"task"`. + captionStyle (Optional[str]): Determines the style of the caption, + applicable only for "markup" syntaxes. Default is `header`. Default is `"header"`. Choices: `"header"`, `"bold"`, `"plain"`, `"hidden"`. + captionTextTransform (Optional[str]): Specifies text transformation for the caption, applicable only for "markup" syntaxes. Default is `none`. Default is `"none"`. Choices: `"upper"`, `"level"`, `"capitalize"`, `"none"`. + captionEnding (Optional[str]): A caption can ends with a colon, a newline or simply nothing. + If not specified, it defaults to `colon` for `bold` or `plain` captionStyle, and `none` otherwise. Choices: `"colon"`, `"newline"`, `"colon-newline"`, `"none"`. + + Example: + ```xml + Cook a recipe on how to prepare a beef dish. + ``` + + When including a list of steps: + ```xml + + Planning a schedule for a travel. + + Decide on the destination and plan the duration. + Find useful information about the destination. + Write down the schedule for each day. + + + ``` + """ + return self._tag( + tag_name="Task", + caption=caption, + captionSerialized=captionSerialized, + captionStyle=captionStyle, + captionTextTransform=captionTextTransform, + captionEnding=captionEnding, + **kwargs, + ) + + def output_format( + self, + caption: Optional[str] = None, + captionSerialized: Optional[str] = None, + captionStyle: Optional[str] = None, + captionTextTransform: Optional[str] = None, + captionEnding: Optional[str] = None, + **kwargs: Any, + ): + """Output format deals with the format in which the model should provide the output. + It can be a specific format such as JSON, XML, or CSV, or a general format such as a story, + a diagram or steps of instructions. + Please refrain from specifying too complex formats that the model may not be able to generate, + such as a PDF file or a video. + + Args: + caption (Optional[str]): The title or label for the output format paragraph. Default is `Output Format`. + captionSerialized (Optional[str]): The serialized version of the caption when using "serializer" syntaxes. Default is `outputFormat`. Default is `"outputFormat"`. + captionStyle (Optional[str]): Determines the style of the caption, + applicable only for "markup" syntaxes. Default is `header`. Default is `"header"`. Choices: `"header"`, `"bold"`, `"plain"`, `"hidden"`. + captionTextTransform (Optional[str]): Specifies text transformation for the caption, applicable only for "markup" syntaxes. Default is `none`. Default is `"none"`. Choices: `"upper"`, `"level"`, `"capitalize"`, `"none"`. + captionEnding (Optional[str]): A caption can ends with a colon, a newline or simply nothing. + If not specified, it defaults to `colon` for `bold` or `plain` captionStyle, and `none` otherwise. Choices: `"colon"`, `"newline"`, `"colon-newline"`, `"none"`. + + Example: + ```xml + Respond with a JSON without additional characters or punctuations. + ``` + """ + return self._tag( + tag_name="OutputFormat", + caption=caption, + captionSerialized=captionSerialized, + captionStyle=captionStyle, + captionTextTransform=captionTextTransform, + captionEnding=captionEnding, + **kwargs, + ) + + def stepwise_instructions( + self, + caption: Optional[str] = None, + captionSerialized: Optional[str] = None, + captionStyle: Optional[str] = None, + captionTextTransform: Optional[str] = None, + captionEnding: Optional[str] = None, + **kwargs: Any, + ): + """StepwiseInstructions that elaborates the task by providing a list of steps or instructions. + Each step should be concise and clear, and the list should be easy to follow. + + Args: + caption (Optional[str]): The title or label for the stepwise instructions paragraph. Default is `Stepwise Instructions`. + captionSerialized (Optional[str]): The serialized version of the caption when using "serializer" syntaxes. Default is `stepwiseInstructions`. Default is `"stepwiseInstructions"`. + captionStyle (Optional[str]): Determines the style of the caption, + applicable only for "markup" syntaxes. Default is `header`. Default is `"header"`. Choices: `"header"`, `"bold"`, `"plain"`, `"hidden"`. + captionTextTransform (Optional[str]): Specifies text transformation for the caption, applicable only for "markup" syntaxes. Default is `none`. Default is `"none"`. Choices: `"upper"`, `"level"`, `"capitalize"`, `"none"`. + captionEnding (Optional[str]): A caption can ends with a colon, a newline or simply nothing. + If not specified, it defaults to `colon` for `bold` or `plain` captionStyle, and `none` otherwise. Choices: `"colon"`, `"newline"`, `"colon-newline"`, `"none"`. + + Example: + ```xml + + + Interpret and rewrite user's query. + Think of a plan to solve the query. + Generate a response based on the plan. + + + ``` + """ + return self._tag( + tag_name="StepwiseInstructions", + caption=caption, + captionSerialized=captionSerialized, + captionStyle=captionStyle, + captionTextTransform=captionTextTransform, + captionEnding=captionEnding, + **kwargs, + ) + + def hint( + self, + caption: Optional[str] = None, + captionSerialized: Optional[str] = None, + captionStyle: Optional[str] = None, + captionTextTransform: Optional[str] = None, + captionColon: Optional[bool] = None, + **kwargs: Any, + ): + """Hint can be used anywhere in the prompt where you want to provide a helpful tip or explanation. + It is usually a short and concise statement that guides the LLM in the right direction. + + Args: + caption (Optional[str]): The title or label for the hint paragraph. Default is `Hint`. Default is `"Hint"`. + captionSerialized (Optional[str]): The serialized version of the caption when using "serializer" syntaxes. Default is `hint`. Default is `"hint"`. + captionStyle (Optional[str]): Determines the style of the caption, + applicable only for "markup" syntaxes. Default is `bold`. Default is `"bold"`. Choices: `"header"`, `"bold"`, `"plain"`, `"hidden"`. + captionTextTransform (Optional[str]): Specifies text transformation for the caption, applicable only for "markup" syntaxes. Default is `none`. Default is `"none"`. Choices: `"upper"`, `"level"`, `"capitalize"`, `"none"`. + captionColon (Optional[bool]): Indicates whether to append a colon after the caption. + By default, this is true for `bold` or `plain` captionStyle, and false otherwise. + + Example: + ```xml + Alice first purchased 4 apples and then 3 more, so she has 7 apples in total. + ``` + """ + return self._tag( + tag_name="Hint", + caption=caption, + captionSerialized=captionSerialized, + captionStyle=captionStyle, + captionTextTransform=captionTextTransform, + captionColon=captionColon, + **kwargs, + ) + + def introducer( + self, + caption: Optional[str] = None, + captionSerialized: Optional[str] = None, + captionStyle: Optional[str] = None, + captionTextTransform: Optional[str] = None, + captionEnding: Optional[str] = None, + **kwargs: Any, + ): + """Introducer is a paragraph before a long paragraph (usually a list of examples, steps, or instructions). + It serves as a context introducing what is expected to follow. + + Args: + caption (Optional[str]): The title or label for the introducer paragraph. Default is `Introducer`. Default is `"Introducer"`. + captionSerialized (Optional[str]): The serialized version of the caption when using "serializer" syntaxes. Default is `introducer`. Default is `"introducer"`. + captionStyle (Optional[str]): Determines the style of the caption, + applicable only for "markup" syntaxes. Default is `hidden`. Default is `"hidden"`. Choices: `"header"`, `"bold"`, `"plain"`, `"hidden"`. + captionTextTransform (Optional[str]): Specifies text transformation for the caption, applicable only for "markup" syntaxes. Default is `none`. Default is `"none"`. Choices: `"upper"`, `"level"`, `"capitalize"`, `"none"`. + captionEnding (Optional[str]): A caption can ends with a colon, a newline or simply nothing. + If not specified, it defaults to `colon` for `bold` or `plain` captionStyle, and `none` otherwise. Choices: `"colon"`, `"newline"`, `"colon-newline"`, `"none"`. + + Example: + ```xml + Here are some examples. + ``` + """ + return self._tag( + tag_name="Introducer", + caption=caption, + captionSerialized=captionSerialized, + captionStyle=captionStyle, + captionTextTransform=captionTextTransform, + captionEnding=captionEnding, + **kwargs, + ) + + def example_set( + self, + caption: Optional[str] = None, + captionSerialized: Optional[str] = None, + chat: Optional[bool] = None, + introducer: Optional[str] = None, + captionStyle: Optional[str] = None, + captionTextTransform: Optional[str] = None, + captionEnding: Optional[str] = None, + **kwargs: Any, + ): + """Example set (``) is a collection of examples that are usually presented in a list. + With the example set, you can manage multiple examples under a single title and optionally an introducer, + as well as the same `chat` format. + You can also choose to use `` purely without example set. + + Args: + caption (Optional[str]): The title or label for the example set paragraph. Default is `Examples`. Default is `"Examples"`. + captionSerialized (Optional[str]): The serialized version of the caption when using "serializer" syntaxes. Default is `examples`. Default is `"examples"`. + chat (Optional[bool]): Indicates whether the examples should be rendered in chat format. + By default, it's `true` for "markup" syntaxes and `false` for "serializer" syntaxes. + introducer (Optional[str]): An optional introducer text to be displayed before the examples. + For example, `Here are some examples:`. + captionStyle (Optional[str]): Determines the style of the caption, + applicable only for "markup" syntaxes. Default is `header`. Default is `"header"`. Choices: `"header"`, `"bold"`, `"plain"`, `"hidden"`. + captionTextTransform (Optional[str]): Specifies text transformation for the caption, applicable only for "markup" syntaxes. Default is `none`. Default is `"none"`. Choices: `"upper"`, `"level"`, `"capitalize"`, `"none"`. + captionEnding (Optional[str]): A caption can ends with a colon, a newline or simply nothing. + If not specified, it defaults to `colon` for `bold` or `plain` captionStyle, and `none` otherwise. Choices: `"colon"`, `"newline"`, `"colon-newline"`, `"none"`. + + Example: + ```xml + + + What is the capital of France? + Paris + + + What is the capital of Germany? + Berlin + + + ``` + """ + return self._tag( + tag_name="ExampleSet", + caption=caption, + captionSerialized=captionSerialized, + chat=chat, + introducer=introducer, + captionStyle=captionStyle, + captionTextTransform=captionTextTransform, + captionEnding=captionEnding, + **kwargs, + ) + + def example( + self, + caption: Optional[str] = None, + captionSerialized: Optional[str] = None, + captionStyle: Optional[str] = None, + chat: Optional[bool] = None, + captionTextTransform: Optional[str] = None, + captionColon: Optional[bool] = None, + **kwargs: Any, + ): + """Example is useful for providing a context, helping the model to understand what kind of inputs and outputs are expected. + It can also be used to demonstrate the desired output style, clarifying the structure, tone, or level of detail in the response. + + Args: + caption (Optional[str]): The title or label for the example paragraph. Default is `Example`. Default is `"Example"`. + captionSerialized (Optional[str]): The serialized version of the caption when using "serializer" syntaxes. Default is `example`. Default is `"example"`. + captionStyle (Optional[str]): Determines the style of the caption, applicable only for "markup" syntaxes. Default is `hidden`. + Options include `header`, `bold`, `plain`, or `hidden`. Default is `"hidden"`. + chat (Optional[bool]): Indicates whether the example should be rendered in chat format. + When used in a example set (``), this is inherited from the example set. + Otherwise, it defaults to `false` for "serializer" syntaxes and `true` for "markup" syntaxes. + captionTextTransform (Optional[str]): Specifies text transformation for the caption, applicable only for "markup" syntaxes. + Options are `upper`, `lower`, `capitalize`, or `none`. Default is `none`. Default is `"none"`. + captionColon (Optional[bool]): Indicates whether to append a colon after the caption. + By default, this is true for `bold` or `plain` captionStyle, and false otherwise. + + Example: + ```xml + + What is the capital of France? + Paris + + ``` + + ```xml + Summarize the following passage in a single sentence. + + The sun provides energy for life on Earth through processes like photosynthesis. + The sun is essential for energy and life processes on Earth. + + ``` + """ + return self._tag( + tag_name="Example", + caption=caption, + captionSerialized=captionSerialized, + captionStyle=captionStyle, + chat=chat, + captionTextTransform=captionTextTransform, + captionColon=captionColon, + **kwargs, + ) + + def example_input( + self, + caption: Optional[str] = None, + captionSerialized: Optional[str] = None, + speaker: Optional[str] = None, + captionStyle: Optional[str] = None, + captionTextTransform: Optional[str] = None, + captionColon: Optional[bool] = None, + **kwargs: Any, + ): + """ExampleInput (``) is a paragraph that represents an example input. + By default, it's spoken by a human speaker in a chat context, but you can manually specify the speaker. + + Args: + caption (Optional[str]): The title or label for the example input paragraph. Default is `Input`. Default is `"Input"`. + captionSerialized (Optional[str]): The serialized version of the caption when using "serializer" syntaxes. Default is `input`. Default is `"input"`. + speaker (Optional[str]): The speaker for the example input. Default is `human` if chat context is enabled (see ``). Default is `"human"`. + captionStyle (Optional[str]): Determines the style of the caption, + applicable only for "markup" syntaxes. Default is `hidden` if chat context is enabled. Otherwise, it's `bold`. Default is `"hidden"`. Choices: `"header"`, `"bold"`, `"plain"`, `"hidden"`. + captionTextTransform (Optional[str]): Specifies text transformation for the caption, applicable only for "markup" syntaxes. Default is `none`. Default is `"none"`. Choices: `"upper"`, `"level"`, `"capitalize"`, `"none"`. + captionColon (Optional[bool]): Indicates whether to append a colon after the caption. + By default, this is true for `bold` or `plain` captionStyle, and false otherwise. + + Example: + ```xml + What is the capital of France? + ``` + + When used with a template: + + ```xml + What is the capital of {{country}}? + ``` + """ + return self._tag( + tag_name="ExampleInput", + caption=caption, + captionSerialized=captionSerialized, + speaker=speaker, + captionStyle=captionStyle, + captionTextTransform=captionTextTransform, + captionColon=captionColon, + **kwargs, + ) + + def example_output( + self, + caption: Optional[str] = None, + captionSerialized: Optional[str] = None, + speaker: Optional[str] = None, + captionStyle: Optional[str] = None, + captionTextTransform: Optional[str] = None, + captionColon: Optional[bool] = None, + **kwargs: Any, + ): + """ExampleOutput (``) is a paragraph that represents an example output. + By default, it's spoken by a AI speaker in a chat context, but you can manually specify the speaker. + + Args: + caption (Optional[str]): The title or label for the example output paragraph. Default is `Output`. Default is `"Output"`. + captionSerialized (Optional[str]): The serialized version of the caption when using "serializer" syntaxes. Default is `output`. Default is `"output"`. + speaker (Optional[str]): The speaker for the example output. Default is `ai` if chat context is enabled (see ``). Default is `"ai"`. + captionStyle (Optional[str]): Determines the style of the caption, + applicable only for "markup" syntaxes. Default is `hidden` if chat context is enabled. Otherwise, it's `bold`. Default is `"hidden"`. Choices: `"header"`, `"bold"`, `"plain"`, `"hidden"`. + captionTextTransform (Optional[str]): Specifies text transformation for the caption, applicable only for "markup" syntaxes. Default is `none`. Default is `"none"`. Choices: `"upper"`, `"level"`, `"capitalize"`, `"none"`. + captionColon (Optional[bool]): Indicates whether to append a colon after the caption. + By default, this is true for `bold` or `plain` captionStyle, and false otherwise. + + Example: + ```xml + The capital of France is Paris. + ``` + + When used with a template: + + ```xml + The capital of {{country}} is {{capital}}. + ``` + """ + return self._tag( + tag_name="ExampleOutput", + caption=caption, + captionSerialized=captionSerialized, + speaker=speaker, + captionStyle=captionStyle, + captionTextTransform=captionTextTransform, + captionColon=captionColon, + **kwargs, + ) + + def question( + self, + questionCaption: Optional[str] = None, + answerCaption: Optional[str] = None, + captionSerialized: Optional[str] = None, + captionStyle: Optional[str] = None, + captionTextTransform: Optional[str] = None, + captionEnding: Optional[str] = None, + **kwargs: Any, + ): + """Question (``) is actually a combination of a question and a prompt for the answer. + It's usually used at the end of a prompt to ask a question. + The question is followed by a prompt for answer (e.g., `Answer:`) to guide the model to respond. + + Args: + questionCaption (Optional[str]): The title or label for the question paragraph. Default is `Question`. Default is `"Question"`. + answerCaption (Optional[str]): The title or label for the answer paragraph. Default is `Answer`. Default is `"Answer"`. + captionSerialized (Optional[str]): The serialized version of the caption when using "serializer" syntaxes. Default is `question`. Default is `"question"`. + captionStyle (Optional[str]): Determines the style of the caption, + applicable only for "markup" syntaxes. Default is `bold`. Default is `"bold"`. Choices: `"header"`, `"bold"`, `"plain"`, `"hidden"`. + captionTextTransform (Optional[str]): Specifies text transformation for the caption, applicable only for "markup" syntaxes. Default is `none`. Default is `"none"`. Choices: `"upper"`, `"level"`, `"capitalize"`, `"none"`. + captionEnding (Optional[str]): A caption can ends with a colon, a newline or simply nothing. + If not specified, it defaults to `colon` for `bold` or `plain` captionStyle, and `none` otherwise. Choices: `"colon"`, `"newline"`, `"colon-newline"`, `"none"`. + + Example: + ```xml + What is the capital of France? + ``` + """ + return self._tag( + tag_name="Question", + questionCaption=questionCaption, + answerCaption=answerCaption, + captionSerialized=captionSerialized, + captionStyle=captionStyle, + captionTextTransform=captionTextTransform, + captionEnding=captionEnding, + **kwargs, + ) + + def system_message( + self, + **kwargs: Any, + ): + """Wrap the contents in a system message. + + + Example: + ```xml + Answer concisely. + ``` + """ + return self._tag( + tag_name="SystemMessage", + **kwargs, + ) + + def human_message( + self, + **kwargs: Any, + ): + """Wrap the contents in a user message. + + + Example: + ```xml + What is the capital of France? + ``` + """ + return self._tag( + tag_name="HumanMessage", + **kwargs, + ) + + def ai_message( + self, + **kwargs: Any, + ): + """Wrap the contents in a AI message. + + + Example: + ```xml + Paris + ``` + """ + return self._tag( + tag_name="AiMessage", + **kwargs, + ) + + def message_content( + self, + content: Optional[Any] = None, + **kwargs: Any, + ): + """Display a message content. + + Args: + content (Optional[Any]): The content of the message. It can be a string, or an array of strings and multimedia content. + + Example: + ```xml + + ``` + """ + return self._tag( + tag_name="MessageContent", + content=content, + **kwargs, + ) + + def conversation( + self, + messages: Optional[Any] = None, + selectedMessages: Optional[str] = None, + **kwargs: Any, + ): + """Display a conversation between system, human and AI. + + Args: + messages (Optional[Any]): A list of message. Each message should have a `speaker` and a `content` field. + selectedMessages (Optional[str]): The messages to be selected. If not provided, all messages will be selected. + You can use a string like `2` to specify a single message, or slice like `2:4` to specify a range of messages (2 inclusive, 4 exclusive). + Or use `-6:` to select the last 6 messages. + + Example: + ```xml + + ``` + """ + return self._tag( + tag_name="Conversation", + messages=messages, + selectedMessages=selectedMessages, + **kwargs, + ) + + def table( + self, + syntax: Optional[str] = None, + records: Optional[Any] = None, + columns: Optional[Any] = None, + src: Optional[str] = None, + parser: Optional[str] = None, + selectedColumns: Optional[Any] = None, + selectedRecords: Optional[Any] = None, + maxRecords: Optional[int] = None, + maxColumns: Optional[int] = None, + **kwargs: Any, + ): + """Displaying a table with records and columns. + + Args: + syntax (Optional[str]): The output syntax of the content. Choices: `"markdown"`, `"html"`, `"json"`, `"text"`, `"csv"`, `"tsv"`, `"xml"`. + records (Optional[Any]): A list, each element is an object / dictionary / list of elements. The keys are the fields and the values are the data in cells. + columns (Optional[Any]): A list of column definitions. Each column definition is an object with keys "field", "header", and "description". + The field is the key in the record object, the header is displayed in the top row, and the description is meant to be an explanation. + Columns are optional. If not provided, the columns are inferred from the records. + src (Optional[str]): The source file to read the data from. This must be provided if records is not provided. + parser (Optional[str]): The parser to use for reading the data. If not provided, it will be inferred from the file extension. Choices: `"auto"`, `"csv"`, `"tsv"`, `"excel"`, `"json"`, `"jsonl"`. + selectedColumns (Optional[Any]): The selected columns to display. If not provided, all columns will be displayed. + It should be an array of column field names, e.g. `["name", "age"]`; or a string like `2:4` to select columns 2 (inclusive) to 4 (exclusive). + There is a special column name called `index` which is the enumeration of the records starting from 0. + You can also use a special value called `+index` to add the index column to the original table. + selectedRecords (Optional[Any]): The selected records to display. If not provided, all records will be displayed. + It should be an array of record indices, e.g. `[0, 1]`; or a string like `2:4` to select records 2 (inclusive) to 4 (exclusive). + maxRecords (Optional[int]): The maximum number of records to display. If not provided, all records will be displayed. + maxColumns (Optional[int]): The maximum number of columns to display. If not provided, all columns will be displayed. + + Example: + ```xml + + ``` + + To import an excel file, and display the first 10 records in csv syntax: + + ```xml +
+ ``` + """ + return self._tag( + tag_name="Table", + syntax=syntax, + records=records, + columns=columns, + src=src, + parser=parser, + selectedColumns=selectedColumns, + selectedRecords=selectedRecords, + maxRecords=maxRecords, + maxColumns=maxColumns, + **kwargs, + ) + + def tree( + self, + syntax: Optional[str] = None, + items: Optional[List[Any]] = None, + showContent: Optional[bool] = None, + **kwargs: Any, + ): + """Renders a tree structure in various formats. + + Args: + syntax (Optional[str]): The output syntax to use for rendering the tree Choices: `"markdown"`, `"html"`, `"json"`, `"yaml"`, `"text"`, `"xml"`. + items (Optional[List[Any]]): Array of tree items to render + showContent (Optional[bool]): Whether to show content values of tree items + + Example: + ```xml + + ``` + """ + return self._tag( + tag_name="Tree", + syntax=syntax, + items=items, + showContent=showContent, + **kwargs, + ) + + def folder( + self, + syntax: Optional[str] = None, + src: Optional[str] = None, + data: Optional[List[Any]] = None, + filter: Optional[str] = None, + maxDepth: Optional[int] = None, + showContent: Optional[bool] = None, + **kwargs: Any, + ): + """Displays a directory structure as a tree. + + Args: + syntax (Optional[str]): The output syntax of the content. Choices: `"markdown"`, `"html"`, `"json"`, `"yaml"`, `"text"`, `"xml"`. + src (Optional[str]): The source directory path to display. + data (Optional[List[Any]]): Alternative to src, directly provide tree data structure. + filter (Optional[str]): A regular expression to filter files. + The regex is applied to the folder names and file names (not the full path). + Directories are included by default unless all of their nested content is filtered out. + When filter is on, empty directories will not be shown. + maxDepth (Optional[int]): Maximum depth of directory traversal. Default is 3. + showContent (Optional[bool]): Whether to show file contents. Default is false. + + Example: + To display a directory structure with a filter for Python files: + ```xml + + ``` + """ + return self._tag( + tag_name="Folder", + syntax=syntax, + src=src, + data=data, + filter=filter, + maxDepth=maxDepth, + showContent=showContent, + **kwargs, + ) + + def captioned_paragraph( + self, + caption: Optional[str] = None, + captionSerialized: Optional[str] = None, + captionStyle: Optional[str] = None, + captionTextTransform: Optional[str] = None, + captionEnding: Optional[str] = None, + **kwargs: Any, + ): + """CaptionedParagraph (`` for short) creates a paragraph with a customized caption title. + + Args: + caption (Optional[str]): The title or label for the paragraph. Required. + captionSerialized (Optional[str]): The serialized version of the caption when using "serializer" syntaxes. + By default, it's same as `caption`. + captionStyle (Optional[str]): Determines the style of the caption, + applicable only for "markup" syntaxes. Default is `header`. Default is `"header"`. Choices: `"header"`, `"bold"`, `"plain"`, `"hidden"`. + captionTextTransform (Optional[str]): Specifies text transformation for the caption, applicable only for "markup" syntaxes. Default is `none`. Default is `"none"`. Choices: `"upper"`, `"level"`, `"capitalize"`, `"none"`. + captionEnding (Optional[str]): A caption can ends with a colon, a newline or simply nothing. + If not specified, it defaults to `colon` for `bold` or `plain` captionStyle, and `none` otherwise. Choices: `"colon"`, `"newline"`, `"colon-newline"`, `"none"`. + + Example: + ```xml + + + Do not exceed 1000 tokens. + Please use simple words. + + + ``` + """ + return self._tag( + tag_name="CaptionedParagraph", + caption=caption, + captionSerialized=captionSerialized, + captionStyle=captionStyle, + captionTextTransform=captionTextTransform, + captionEnding=captionEnding, + **kwargs, + ) + + def webpage( + self, + url: Optional[str] = None, + src: Optional[str] = None, + buffer: Optional[bytes] = None, + base64: Optional[str] = None, + extractText: Optional[bool] = None, + selector: Optional[str] = None, + **kwargs: Any, + ): + """Displays content from a webpage. + + Args: + url (Optional[str]): The URL of the webpage to fetch and display. + src (Optional[str]): Local file path to an HTML file to display. + buffer (Optional[bytes]): HTML content as string or buffer. + base64 (Optional[str]): Base64 encoded HTML content. + extractText (Optional[bool]): Whether to extract plain text content (true) or convert HTML to structured POML (false). Default is false. + selector (Optional[str]): CSS selector to extract specific content from the page (e.g., "article", ".content", "#main"). Default is "body". + + Example: + Display content from a URL: + ```xml + + ``` + + Extract only specific content using a selector: + ```xml + + ``` + + Convert HTML to structured POML components: + ```xml + + ``` + """ + return self._tag( + tag_name="Webpage", + url=url, + src=src, + buffer=buffer, + base64=base64, + extractText=extractText, + selector=selector, + **kwargs, + ) + + def text( + self, + syntax: Optional[str] = None, + className: Optional[str] = None, + speaker: Optional[str] = None, + name: Optional[str] = None, + type: Optional[str] = None, + writerOptions: Optional[Any] = None, + **kwargs: Any, + ): + """Text (``, ``) is a wrapper for any contents. + By default, it uses `markdown` syntax and writes the contents within it directly to the output. + When used with "markup" syntaxes, it renders a standalone section preceded and followed by one blank line. + It's mostly used in the root element of a prompt, but it should also work in any other places. + This component will be automatically added as a wrapping root element if it's not provided: + 1. If the first element is pure text contents, `` will be added. + 2. If the first element is a POML component, `` will be added. + + Args: + syntax (Optional[str]): The syntax of the content. Choices: `"markdown"`, `"html"`, `"json"`, `"yaml"`, `"xml"`, `"text"`. + className (Optional[str]): A class name for quickly styling the current block with stylesheets. + speaker (Optional[str]): The speaker of the content. By default, it's determined by the context and the content. Choices: `"human"`, `"ai"`, `"system"`. + name (Optional[str]): The name of the content, used in serialization. + type (Optional[str]): The type of the content, used in serialization. + writerOptions (Optional[Any]): An experimental optional JSON string to customize the format of markdown headers, JSON indents, etc. + + Example: + ```xml + + Contents of the whole prompt. + + 1. Your customized list. + 2. You don't need to know anything about POML. + + ``` + + To render the whole prompt in markdown syntax with a "human" speaker: + + ```xml + +

You are a helpful assistant.

+

What is the capital of France?

+
+ ``` + """ + return self._tag( + tag_name="Text", + syntax=syntax, + className=className, + speaker=speaker, + name=name, + type=type, + writerOptions=writerOptions, + **kwargs, + ) + + def paragraph( + self, + blankLine: Optional[bool] = None, + **kwargs: Any, + ): + """Paragraph (`

`) is a standalone section preceded by and followed by two blank lines in markup syntaxes. + It's mostly used for text contents. + + Args: + blankLine (Optional[bool]): Whether to add one more blank line (2 in total) before and after the paragraph. + + Example: + ```xml +

Contents of the paragraph.

+ ``` + """ + return self._tag( + tag_name="Paragraph", + blankLine=blankLine, + **kwargs, + ) + + def inline( + self, + syntax: Optional[str] = None, + className: Optional[str] = None, + speaker: Optional[str] = None, + writerOptions: Optional[Any] = None, + **kwargs: Any, + ): + """Inline (``) is a container for inline content. + When used with markup syntaxes, it wraps text in an inline style, without any preceding or following blank characters. + In serializer syntaxes, it's treated as a generic value. + Inline elements are not designed to be used alone (especially in serializer syntaxes). + One might notice problematic renderings (e.g., speaker not applied) when using it alone. + + Args: + syntax (Optional[str]): The syntax of the content. Choices: `"markdown"`, `"html"`, `"json"`, `"yaml"`, `"xml"`, `"text"`. + className (Optional[str]): A class name for quickly styling the current block with stylesheets. + speaker (Optional[str]): The speaker of the content. By default, it's determined by the context and the content. Choices: `"human"`, `"ai"`, `"system"`. + writerOptions (Optional[Any]): An experimental optional JSON string to customize the format of markdown headers, JSON indents, etc. + + Example: + ```xml +

I'm listening to music right now.

+ ``` + """ + return self._tag( + tag_name="Inline", + syntax=syntax, + className=className, + speaker=speaker, + writerOptions=writerOptions, + **kwargs, + ) + + def newline( + self, + newLineCount: Optional[float] = None, + **kwargs: Any, + ): + """Newline (`
`) explicitly adds a line break, primarily in markup syntaxes. + In serializer syntaxes, it's ignored. + + Args: + newLineCount (Optional[float]): The number of linebreaks to add. + + Example: + ```xml +
+ ``` + """ + return self._tag( + tag_name="Newline", + newLineCount=newLineCount, + **kwargs, + ) + + def header( + self, + **kwargs: Any, + ): + """Header (``) renders headings in markup syntaxes. + It's commonly used to highlight titles or section headings. + The header level will be automatically computed based on the context. + Use SubContent (`
`) for nested content. + + + Example: + ```xml +
Section Title
+ ``` + """ + return self._tag( + tag_name="Header", + **kwargs, + ) + + def sub_content( + self, + **kwargs: Any, + ): + """SubContent (`
`) renders nested content, often following a header. + The headers within the section will be automatically adjusted to a lower level. + + + Example: + ```xml + Section Title +
+ Sub-section Title +

Sub-section details

+
+ ``` + """ + return self._tag( + tag_name="SubContent", + **kwargs, + ) + + def bold( + self, + **kwargs: Any, + ): + """Bold (``) emphasizes text in a bold style when using markup syntaxes. + + + Example: + ```xml +

Task: Do something.

+ ``` + """ + return self._tag( + tag_name="Bold", + **kwargs, + ) + + def italic( + self, + **kwargs: Any, + ): + """Italic (``) emphasizes text in an italic style when using markup syntaxes. + + + Example: + ```xml + Your italicized text. + ``` + """ + return self._tag( + tag_name="Italic", + **kwargs, + ) + + def strikethrough( + self, + **kwargs: Any, + ): + """Strikethrough (``, ``) indicates removed or invalid text in markup syntaxes. + + + Example: + ```xml + This messages is removed. + ``` + """ + return self._tag( + tag_name="Strikethrough", + **kwargs, + ) + + def underline( + self, + **kwargs: Any, + ): + """Underline (``) draws a line beneath text in markup syntaxes. + + + Example: + ```xml + This text is underlined. + ``` + """ + return self._tag( + tag_name="Underline", + **kwargs, + ) + + def code( + self, + inline: Optional[bool] = None, + lang: Optional[str] = None, + **kwargs: Any, + ): + """Code is used to represent code snippets or inline code in markup syntaxes. + + Args: + inline (Optional[bool]): Whether to render code inline or as a block. Default is `true`. Default is `"true"`. + lang (Optional[str]): The language of the code snippet. + + Example: + ```xml + const x = 42; + ``` + + ```xml + + const x = 42; + + ``` + """ + return self._tag( + tag_name="Code", + inline=inline, + lang=lang, + **kwargs, + ) + + def list( + self, + listStyle: Optional[str] = None, + **kwargs: Any, + ): + """List (``) is a container for multiple ListItem (``) elements. + When used with markup syntaxes, a bullet or numbering is added. + + Args: + listStyle (Optional[str]): The style for the list marker, such as dash or star. Default is `dash`. Default is `"dash"`. Choices: `"star"`, `"dash"`, `"plus"`, `"decimal"`, `"latin"`. + + Example: + ```xml + + Item 1 + Item 2 + + ``` + """ + return self._tag( + tag_name="List", + listStyle=listStyle, + **kwargs, + ) + + def list_item( + self, + **kwargs: Any, + ): + """ListItem (``) is an item within a List component. + In markup mode, it is rendered with the specified bullet or numbering style. + + + Example: + ```xml + + Item 1 + Item 2 + + ``` + """ + return self._tag( + tag_name="ListItem", + **kwargs, + ) + + def object( + self, + syntax: Optional[str] = None, + data: Optional[Any] = None, + **kwargs: Any, + ): + """Object (``, ``) displays external data or object content. + When in serialize mode, it's serialized according to the given serializer. + + Args: + syntax (Optional[str]): The syntax or serializer of the content. Default is `json`. Default is `"json"`. Choices: `"markdown"`, `"html"`, `"json"`, `"yaml"`, `"xml"`. + data (Optional[Any]): The data object to render. + + Example: + ```xml + + ``` + """ + return self._tag( + tag_name="Object", + syntax=syntax, + data=data, + **kwargs, + ) + + def image( + self, + src: Optional[str] = None, + alt: Optional[str] = None, + base64: Optional[str] = None, + type: Optional[str] = None, + position: Optional[str] = None, + maxWidth: Optional[int] = None, + maxHeight: Optional[int] = None, + resize: Optional[float] = None, + syntax: Optional[str] = None, + **kwargs: Any, + ): + """Image (``) displays an image in the content. + Alternatively, it can also be shown as an alt text by specifying the `syntax` prop. + Note that syntax must be specified as `multimedia` to show the image. + + Args: + src (Optional[str]): The path to the image file. + alt (Optional[str]): The alternative text to show when the image cannot be displayed. + base64 (Optional[str]): The base64 encoded image data. It can not be specified together with `src`. + type (Optional[str]): The MIME type of the image **to be shown**. If not specified, it will be inferred from the file extension. + If specified, the image will be converted to the specified type. Can be `image/jpeg`, `image/png`, etc., or without the `image/` prefix. + position (Optional[str]): The position of the image. Default is `here`. Default is `"here"`. Choices: `"top"`, `"bottom"`, `"here"`. + maxWidth (Optional[int]): The maximum width of the image to be shown. + maxHeight (Optional[int]): The maximum height of the image to be shown. + resize (Optional[float]): The ratio to resize the image to to be shown. + syntax (Optional[str]): Only when specified as `multimedia`, the image will be shown. + Otherwise, the alt text will be shown. By default, it's `multimedia` when `alt` is not specified. Otherwise, it's undefined (inherit from parent). Choices: `"markdown"`, `"html"`, `"json"`, `"yaml"`, `"xml"`, `"multimedia"`. + + Example: + ```xml + + ``` + """ + return self._tag( + tag_name="Image", + src=src, + alt=alt, + base64=base64, + type=type, + position=position, + maxWidth=maxWidth, + maxHeight=maxHeight, + resize=resize, + syntax=syntax, + **kwargs, + ) + + def audio( + self, + src: Optional[str] = None, + base64: Optional[str] = None, + alt: Optional[str] = None, + type: Optional[str] = None, + position: Optional[str] = None, + syntax: Optional[str] = None, + **kwargs: Any, + ): + """Audio (`
``` """ - return self._tag( + return self.tag( tag_name="Table", syntax=syntax, records=records, @@ -703,7 +703,7 @@ def tree( ``` """ - return self._tag( + return self.tag( tag_name="Tree", syntax=syntax, items=items, @@ -740,7 +740,7 @@ def folder( ``` """ - return self._tag( + return self.tag( tag_name="Folder", syntax=syntax, src=src, @@ -782,7 +782,7 @@ def captioned_paragraph( ``` """ - return self._tag( + return self.tag( tag_name="CaptionedParagraph", caption=caption, captionSerialized=captionSerialized, @@ -828,7 +828,7 @@ def webpage( ``` """ - return self._tag( + return self.tag( tag_name="Webpage", url=url, src=src, @@ -884,7 +884,7 @@ def text( ``` """ - return self._tag( + return self.tag( tag_name="Text", syntax=syntax, className=className, @@ -911,7 +911,7 @@ def paragraph(

Contents of the paragraph.

``` """ - return self._tag( + return self.tag( tag_name="Paragraph", blankLine=blankLine, **kwargs, @@ -942,7 +942,7 @@ def inline(

I'm listening to music right now.

``` """ - return self._tag( + return self.tag( tag_name="Inline", syntax=syntax, className=className, @@ -967,7 +967,7 @@ def newline(
``` """ - return self._tag( + return self.tag( tag_name="Newline", newLineCount=newLineCount, **kwargs, @@ -988,7 +988,7 @@ def header(
Section Title
``` """ - return self._tag( + return self.tag( tag_name="Header", **kwargs, ) @@ -1010,7 +1010,7 @@ def sub_content( ``` """ - return self._tag( + return self.tag( tag_name="SubContent", **kwargs, ) @@ -1027,7 +1027,7 @@ def bold(

Task: Do something.

``` """ - return self._tag( + return self.tag( tag_name="Bold", **kwargs, ) @@ -1044,7 +1044,7 @@ def italic( Your italicized text. ``` """ - return self._tag( + return self.tag( tag_name="Italic", **kwargs, ) @@ -1061,7 +1061,7 @@ def strikethrough( This messages is removed. ``` """ - return self._tag( + return self.tag( tag_name="Strikethrough", **kwargs, ) @@ -1078,7 +1078,7 @@ def underline( This text is underlined. ``` """ - return self._tag( + return self.tag( tag_name="Underline", **kwargs, ) @@ -1106,7 +1106,7 @@ def code( ``` """ - return self._tag( + return self.tag( tag_name="Code", inline=inline, lang=lang, @@ -1132,7 +1132,7 @@ def list( ``` """ - return self._tag( + return self.tag( tag_name="List", listStyle=listStyle, **kwargs, @@ -1154,7 +1154,7 @@ def list_item( ``` """ - return self._tag( + return self.tag( tag_name="ListItem", **kwargs, ) @@ -1177,7 +1177,7 @@ def object( ``` """ - return self._tag( + return self.tag( tag_name="Object", syntax=syntax, data=data, @@ -1219,7 +1219,7 @@ def image( ``` """ - return self._tag( + return self.tag( tag_name="Image", src=src, alt=alt, @@ -1264,7 +1264,7 @@ def audio(