### format_document case

In [1]:
from langchain.schema import Document
from langchain.prompts import PromptTemplate

from langchain.schema.prompt_template import format_document

In [2]:
doc = Document(page_content="This is a joke", metadata={"page": "1"})

In [3]:
doc

Document(page_content='This is a joke', metadata={'page': '1'})

In [4]:
prompt = PromptTemplate.from_template("Page {page}: {page_content}")

In [5]:
format_document(doc, prompt)

'Page 1: This is a joke'

> format_document 使用了metadata中的key 与 Document 本身的key

### 源码分析

In [None]:
def format_document(doc: Document, prompt: BasePromptTemplate) -> str:
    """Format a document into a string based on a prompt template.
    Args:
        doc: Document, the page_content and metadata will be used to create
            the final string.
        prompt: BasePromptTemplate, will be used to format the page_content
            and metadata into the final string.
    
    Returns:
        string of the document formatted.
    
    """
    base_info = {"page_content": doc.page_content, **doc.metadata}
    missing_metadata = set(prompt.input_variables).difference(base_info)
    if len(missing_metadata) > 0:
        required_metadata = [
            iv for iv in prompt.input_variables if iv != "page_content"
        ]
        raise ValueError(
            f"Document prompt requires documents to have metadata variables: "
            f"{required_metadata}. Received document with missing metadata: "
            f"{list(missing_metadata)}."
        )
    document_info = {k: base_info[k] for k in prompt.input_variables}
    return prompt.format(**document_info)

> 1. 组合page_content 与 metadata

In [6]:
base_info = {"page_content": doc.page_content, **doc.metadata}
base_info

{'page_content': 'This is a joke', 'page': '1'}

> 2. 检查差值

In [7]:
missing_metadata = set(prompt.input_variables).difference(base_info)
missing_metadata

set()

> 3. 如果无差值, 不缺少参数,则调用`BasePromptTemplate.format`格式化参数,生成 prompt 字符串

In [8]:
document_info = {k: base_info[k] for k in prompt.input_variables}
document_info

{'page': '1', 'page_content': 'This is a joke'}

In [9]:
prompt.format(**document_info)

'Page 1: This is a joke'

In [11]:
prompt.input_variables

['page', 'page_content']