# Chain相关源码分析

> **prep_inputs 和 prep_outputs: 这两个方法负责验证和准备链的输入和输出，包括从内存中添加输入、保存运行信息到内存。**

```python
def prep_inputs(self, inputs: Union[Dict[str, Any], Any]) -> Dict[str, str]:
    """Validate and prepare chain inputs, including adding inputs from memory.

    Args:
        inputs: Dictionary of raw inputs, or single input if chain expects
            only one param. Should contain all inputs specified in
            `Chain.input_keys` except for inputs that will be set by the chain's
            memory.

    Returns:
        A dictionary of all inputs, including those added by the chain's memory.
    """
    => 输入可以是单个参数,既字符串
    if not isinstance(inputs, dict):
        # 获取用户输入key
        _input_keys = set(self.input_keys)
        if self.memory is not None:
            # If there are multiple input keys, but some get set by memory so that
            # only one is not set, we can still figure out which key it is.
            => 更新_input_keys: self.memory.memory_variables, 见上边例子
            => _input_keys 减去 memory中的key, 剩下的是_input_keys独有的.
            _input_keys = _input_keys.difference(self.memory.memory_variables)
        => 上一层if已经假定是单个参数(非字典), 所以如果不等于1,就出问题了. 看下ValueErr描述
        if len(_input_keys) != 1:
            raise ValueError(
                f"A single string input was passed in, but this chain expects "
                f"multiple inputs ({_input_keys}). When a chain expects "
                f"multiple inputs, please call it by passing in a dictionary, "
                "eg `chain({'foo': 1, 'bar': 2})`"
            )
        => 将key与value组合成字典
        inputs = {list(_input_keys)[0]: inputs}
    if self.memory is not None:
        => 扩展的上下文
        external_context = self.memory.load_memory_variables(inputs)
        => 合并为新的inputs字典
        inputs = dict(inputs, **external_context)
    => 方法会检查所有预期的键是否存在于输入字典中，如果有任何缺失的键，则抛出一个错误
    self._validate_inputs(inputs)
    return inputs
```



```python
    def prep_outputs(
        self,
        inputs: Dict[str, str],                      => 这应是包含原始输入的字典类型的数据，可能包括从内存加载的附加输入。
        outputs: Dict[str, str],                     => 是包含初始链输出的字典，需要被进一步处理和验证 
        return_only_outputs: bool = False,           => 用来指示是否只返回输出数据。如果设为 False，则输入也将添加到最终的输出中  
    ) -> Dict[str, str]:
        """Validate and prepare chain outputs, and save info about this run to memory.

        Args:
            inputs: Dictionary of chain inputs, including any inputs added by chain
                memory.
            outputs: Dictionary of initial chain outputs.
            return_only_outputs: Whether to only return the chain outputs. If False,
                inputs are also added to the final outputs.

        Returns:
            A dict of the final chain outputs.
        """
        => _validate_outputs 方法对 outputs 参数进行校验，确保所有的输出数据都是完整和有效的
        self._validate_outputs(outputs)
        if self.memory is not None:
            => 会把 inputs 和 outputs 的上下文信息保存到内存中。这样可以让我们在后续的执行或调试中回溯这次的运行过程
            self.memory.save_context(inputs, outputs)
        if return_only_outputs:
            return outputs
        else:
            return {**inputs, **outputs}

```

```python
def _validate_inputs(self, inputs: Dict[str, Any]) -> None:
    """Check that all inputs are present."""
    => inputs 就是 prep_inputs的输出: 从memory中加载了一些新的key之后的inputs.
    missing_keys = set(self.input_keys).difference(inputs)
    if missing_keys:
        raise ValueError(f"Missing some input keys: {missing_keys}")

def _validate_outputs(self, outputs: Dict[str, Any]) -> None:
    missing_keys = set(self.output_keys).difference(outputs)
    if missing_keys:
        raise ValueError(f"Missing some output keys: {missing_keys}")
```

![](./imgs/__call__.jpg)

```python
def __call__(
    self,
    inputs: Union[Dict[str, Any], Any],                         => inputs: 预期为包含输入数据的字典或单一值（如果Chain类只需要一个参数）。字典的键应与 Chain.input_keys 对应，除非预期的输入将从链的Memory加载。
    return_only_outputs: bool = False,                          => True，只有由链生成的新键值对会被返回；如果为False，原始输入和由链生成的新键值对都将被返回
    callbacks: Callbacks = None,                                => 链运行过程中调用，作为构建时指定的回调函数之外的额外操作。
    *,
    tags: Optional[List[str]] = None,                           => 字符串列表，用于对链的执行进行标记。这些标签会传递给所有回调函数，并且只有运行时指定的标签会传播到其他对象的调用中
    metadata: Optional[Dict[str, Any]] = None,                  => 可选参数，用于存储与链相关联的额外信息 
    run_name: Optional[str] = None,
    include_run_info: bool = False,                             => 布尔值，决定是否在返回的结果中包含此次运行的相关信息。默认为False
) -> Dict[str, Any]:                                            => 返回值: 一个包含预期输出的字典，其键应存在于 Chain.output_keys 中定义的键集合
    """Execute the chain.

    Args:
        inputs: Dictionary of inputs, or single input if chain expects
            only one param. Should contain all inputs specified in
            `Chain.input_keys` except for inputs that will be set by the chain's
            memory.
        return_only_outputs: Whether to return only outputs in the
            response. If True, only new keys generated by this chain will be
            returned. If False, both input keys and new keys generated by this
            chain will be returned. Defaults to False.
        callbacks: Callbacks to use for this chain run. These will be called in
            addition to callbacks passed to the chain during construction, but only
            these runtime callbacks will propagate to calls to other objects.
        tags: List of string tags to pass to all callbacks. These will be passed in
            addition to tags passed to the chain during construction, but only
            these runtime tags will propagate to calls to other objects.
        metadata: Optional metadata associated with the chain. Defaults to None
        include_run_info: Whether to include run info in the response. Defaults
            to False.

    Returns:
        A dict of named outputs. Should contain all outputs specified in
            `Chain.output_keys`.
    """
    => 将输入数据进行预处理，以确保它们满足链的实际运行需求。
    inputs = self.prep_inputs(inputs)
    => 配置回调管理器，以便在链的运行过程中执行特定的操作。
    callback_manager = CallbackManager.configure(
        callbacks,
        self.callbacks,
        self.verbose,
        tags,
        self.tags,
        metadata,
        self.metadata,
    )
    new_arg_supported = inspect.signature(self._call).parameters.get("run_manager")
    => 使用已配置的回调管理器启动链的运行
    run_manager = callback_manager.on_chain_start(
        dumpd(self),
        inputs,
        name=run_name,
    )
    => 尝试执行 _call 方法（这是在各个子类中根据具体需求定义的方法）以实现链的实际操作
    => 如果在运行过程中出现任何异常，将由回调管理器捕获并处理。
    try:
        outputs = (
            self._call(inputs, run_manager=run_manager)
            if new_arg_supported
            else self._call(inputs)
        )
    except BaseException as e:
        run_manager.on_chain_error(e)
        raise e
        
    => 在链运行结束后，通过回调管理器处理关闭事件，例如清理资源，记录日志等
    run_manager.on_chain_end(outputs)
    
    
    => 1 对链运行生成的输出进行预处理，以满足返回格式的要求。
    final_outputs: Dict[str, Any] = self.prep_outputs(
        inputs, outputs, return_only_outputs
    )
    => 2 如果 include_run_info 设为True，则在最终的输出字典中添加此次运行的相关信息。
    if include_run_info:
        final_outputs[RUN_KEY] = RunInfo(run_id=run_manager.run_id)
    => 3 返回处理后的输出字典
    return final_outputs
```

> **input_keys 的开始与结束**

```python
@property
def input_keys(self) -> List[str]: 
    """Will be whatever keys the prompt expects.
    
    :meta private:
    """ 
    return self.prompt.input_variables
```

In [1]:
from langchain.prompts import PromptTemplate

In [2]:
template = """您是一个正在与人类对话的聊天机器人.

{chat_history}
人类: {human_input}
聊天机器人:"""

prompt = PromptTemplate(
    input_variables=["chat_history", "human_input"], template=template
)

In [3]:
prompt.input_variables

['chat_history', 'human_input']

In [4]:
prompt = PromptTemplate.from_template(template)

In [5]:
prompt.input_variables

['chat_history', 'human_input']

> 我们看看 `prompt.input_variables`

In [None]:
def get_template_variables(template: str, template_format: str) -> List[str]:  
    if template_format == "jinja2":
        input_variables = _get_jinja2_variables_from_template(template)
    elif template_format == "f-string":
        input_variables = {
            v for _, v, _, _ in Formatter().parse(template) if v is not None
        }
    else: 
        raise ValueError(f"Unsupported template format: {template_format}")
    return sorted(input_variables)

In [6]:
from string import Formatter

In [10]:
template = """您是一个正在与人类对话的聊天机器人.

{chat_history}
人类: {human_input}
聊天机器人:"""

In [11]:
for a, b, c, d in Formatter().parse(template):
    print([a], [b], [c], [d])

['您是一个正在与人类对话的聊天机器人.\n\n'] ['chat_history'] [''] [None]
['\n人类: '] ['human_input'] [''] [None]
['\n聊天机器人:'] [None] [None] [None]


> <font color=blue size=4>这就是 input_keys的开始,以及从Memory加载的过程</font>

# LLMChain相关源码分析