# Python 知识

# 代码分析

## 类继承关系
```mermaid
classDiagram
    %% 基本类
    class IndexingError
    class IndexingBase
    class ParamIndexer
    class LocBase
    
    %% 一级继承
    class iLoc
    class Loc
    class PandasIndexer
    class ParamLoc
    
    %% 所有类都继承自 object
    LocBase <|-- iLoc
    LocBase <|-- Loc
    LocBase <|-- ParamLoc
    IndexingBase <|-- PandasIndexer
    ParamIndexer o-- ParamLoc: has

    PandasIndexer o-- iLoc : has
    PandasIndexer o-- Loc : has
```

## class IndexingBase
```python
IndexingBaseT = tp.TypeVar("IndexingBaseT", bound="IndexingBase")

class IndexingBase:
    def indexing_func(self: IndexingBaseT, pd_indexing_func: tp.Callable, **kwargs) -> IndexingBaseT:
        raise NotImplementedError
```

## class LocBase
```python
class LocBase:
    def __init__(self, indexing_func: tp.Callable, **kwargs) -> None:
        self._indexing_func = indexing_func  
        self._indexing_kwargs = kwargs  

    @property
    def indexing_func(self) -> tp.Callable:
        return self._indexing_func

    @property
    def indexing_kwargs(self) -> dict:
        return self._indexing_kwargs

    def __getitem__(self, key: tp.Any) -> tp.Any:
        raise NotImplementedError
```

## class iLoc(LocBase)
```python
class iLoc(LocBase):
    def __getitem__(self, key: tp.Any) -> tp.Any:
        return self.indexing_func(lambda x: x.iloc.__getitem__(key), **self.indexing_kwargs)
```

## class Loc(LocBase)
```python
class Loc(LocBase):
    def __getitem__(self, key: tp.Any) -> tp.Any:
        return self.indexing_func(lambda x: x.loc.__getitem__(key), **self.indexing_kwargs)
```

## class PandasIndexer(IndexingBase)
### 使用方法
`PandasIndexer` 被某类 `A` 继承：
- 类 `A` 拥有若干 `Pandas` 属性。
- 类 `A` 须具体实现 `indexing_func(self: IndexingBaseT, pd_indexing_func: tp.Callable, **kwargs)` 方法（类 `IndexingBase` 要求）来操作其 `Pandas` 属性，然后构建一个 `A` 的实例返回。

类 `A` 拥有了继承的：
- `self._iloc`：
  - `a.iloc[key]——>a._iloc.__getitem__(key)——>indexing_func(lambda x: x.iloc.__getitem__(key))`
- `self._loc`：
  - `a._loc.__getitem__(key)——>indexing_func(lambda x: x.loc.__getitem__(key))`
- `__getitem__`：
  - `.__getitem__(key)——>indexing_func(lambda x: x.__getitem__(key))`
- `xs`：
  - `.xs(*args, **kwargs)——>indexing_func(lambda x: x.xs(*args, **kwargs))`
  
例子：
  ```python
  class Portfolio(PandasIndexer):
      def __init__(self, returns_df, positions_df):
          self.returns = returns_df
          self.positions = positions_df
          super().__init__()
          
      def indexing_func(self, pd_indexing_func):
          return Portfolio(
              pd_indexing_func(self.returns),
              pd_indexing_func(self.positions)
          )
  
  portfolio = Portfolio(returns_df, positions_df)
  # 现在可以像操作DataFrame一样操作Portfolio
  recent_data = portfolio.iloc[-30:]
  ```

### 源码
```python
PandasIndexerT = tp.TypeVar("PandasIndexerT", bound="PandasIndexer")

class PandasIndexer(IndexingBase):
    def __init__(self, **kwargs) -> None:
        self._iloc = iLoc(self.indexing_func, **kwargs)
        self._loc = Loc(self.indexing_func, **kwargs)
        self._indexing_kwargs = kwargs

    @property
    def indexing_kwargs(self) -> dict:
        return self._indexing_kwargs

    @property
    def iloc(self) -> iLoc:
        return self._iloc

    iloc.__doc__ = iLoc.__doc__

    @property
    def loc(self) -> Loc:
        return self._loc

    loc.__doc__ = Loc.__doc__

    def xs(self: PandasIndexerT, *args, **kwargs) -> PandasIndexerT:
        return self.indexing_func(lambda x: x.xs(*args, **kwargs), **self.indexing_kwargs)

    def __getitem__(self: PandasIndexerT, key: tp.Any) -> PandasIndexerT:
        return self.indexing_func(lambda x: x.__getitem__(key), **self.indexing_kwargs)
```

## class ParamLoc(LocBase)
### 使用方法
作为某类 `A` 的属性：
- 类 `A` 拥有若干 `Pandas` 属性

    须实现 `indexing_func(self, pd_indexing_func: tp.Callable, **kwargs)` 方法来操作这些 `Pandas` 属性，然后构建一个 `A` 的实例返回。
- 类 `A` 的 `__init__` 方法中构建 `ParamLoc` 实例 `_paramloc`，传递 `mapper`、`indexing_func`、`level_name`

类 `A` 拥有了 `self._paramloc`：`self.paramloc[key]——>self._paramloc.__getitem__(key)`
- 首先获取 `key` 在 `self._paramloc.mapper` 中的整数索引 `indices`
- 然后调用 `self._paramloc.indexing_func(pd_indexing_func, **self.indexing_kwargs)`
  - 其中 `pd_indexing_func(obj: tp.SeriesFrame)`：从参数 `obj` 中选择 `indices` 列并删除 `self.level_name` 列，并返回新的 `obj`

### 源码
```python
class ParamLoc(LocBase):
    def __init__(self, mapper: tp.Series, indexing_func: tp.Callable, level_name: tp.Level = None, **kwargs) -> None:
        checks.assert_instance_of(mapper, pd.Series)

        if mapper.dtype == 'O':  # 'O'表示object类型
            mapper = mapper.astype(str)
            
        self._mapper = mapper
        self._level_name = level_name

        LocBase.__init__(self, indexing_func, **kwargs)

    @property
    def mapper(self) -> tp.Series:
        return self._mapper

    @property
    def level_name(self) -> tp.Level:
        return self._level_name

    # 返回参数 `key` 对应 `self._mapper` 的整数索引
    def get_indices(self, key: tp.Any) -> tp.Array1d:
        if self.mapper.dtype == 'O':
            if isinstance(key, slice):
                # 处理切片对象，转换start和stop为字符串
                start = str(key.start) if key.start is not None else None
                stop = str(key.stop) if key.stop is not None else None
                key = slice(start, stop, key.step)
            elif isinstance(key, (list, np.ndarray)):
                key = list(map(str, key))
            else:
                key = str(key)
                
        mapper = pd.Series(np.arange(len(self.mapper.index)), index=self.mapper.values)
        
        indices = mapper.loc.__getitem__(key)
        
        if isinstance(indices, pd.Series):
            indices = indices.values
            
        return indices

    def __getitem__(self, key: tp.Any) -> tp.Any:
        indices = self.get_indices(key)
        
        is_multiple = isinstance(key, (slice, list, np.ndarray))

        def pd_indexing_func(obj: tp.SeriesFrame) -> tp.MaybeSeriesFrame:
            new_obj = obj.iloc[:, indices]
            if not is_multiple:
                if self.level_name is not None:
                    if checks.is_frame(new_obj):
                        if isinstance(new_obj.columns, pd.MultiIndex):
                            new_obj.columns = index_fns.drop_levels(new_obj.columns, self.level_name)
                            
            return new_obj

        return self.indexing_func(pd_indexing_func, **self.indexing_kwargs)
```

## def indexing_on_mapper
`mapper` 是Series：索引Index为 `ref_obj` 的列名，值为映射后的值。

`pd_indexing_func` 操作和 `ref_obj` 形状一致的DataFrame的部分列。

该函数的功能就是返回 `pd_indexing_func` 操作 `ref_obj` 后的修正的 `mapper`。


- 先将 `np.arange(len(mapper.index))` 广播到 `ref_obj`
  - 类似于
  
    |  |  |  | |
    |:---------|:-----------:|:------------:|:------------:|
    | |0 | 1| 2|
    | | 0 | 1 | 2 |
    |  | 0 | 1 | 1 |

- `pd_indexing_func` 例如操作结果

    |  |  |  |
    |:---------|:-----------:|:------------:|
    | |0 | 1|
    | | 0 | 1 |
    |  | 0 | 1 |

- 然后从 `mapper` 中选中 `mapper[0, 1]`

```python
def indexing_on_mapper(mapper: tp.Series, ref_obj: tp.SeriesFrame,
                       pd_indexing_func: tp.Callable) -> tp.Optional[tp.Series]:

    checks.assert_instance_of(mapper, pd.Series)
    checks.assert_instance_of(ref_obj, (pd.Series, pd.DataFrame))

    df_range_mapper = reshape_fns.broadcast_to(np.arange(len(mapper.index)), ref_obj)
    
    loced_range_mapper = pd_indexing_func(df_range_mapper)
    
    new_mapper = mapper.iloc[loced_range_mapper.values[0]]
    
    if checks.is_frame(loced_range_mapper):
        return pd.Series(new_mapper.values, index=loced_range_mapper.columns, name=mapper.name)
    elif checks.is_series(loced_range_mapper):
        return pd.Series([new_mapper], index=[loced_range_mapper.name], name=mapper.name)
    
    return None
```

## def build_param_indexer
动态生成一个类型：
- `__name__、__qualname__、__module__` 分别为参数 `class_name、class_name、module_name`
- 拥有 `property` 类型的描述符 `{param_name}_loc`，其中 `param_name` 是参数 `param_names` 中的各项：
  - 返回 `self.__dict__[_{param_name}_loc]`

生成的类型被某类 `A` 继承：
- 类 `A` 拥有若干 `Pandas` 属性。
- 类 `A` 须具体实现 `indexing_func(self: IndexingBaseT, pd_indexing_func: tp.Callable, **kwargs)` 方法（类 `IndexingBase` 要求）来操作其 `Pandas` 属性，然后构建一个 `A` 的实例返回。
- 继承后类 `A` 会拥有多个 `ParamLoc` 类型的 `self._{param_name}_loc` 属性
  - 于是可以使用 `self._{param_name}_loc[key]`

```python
def build_param_indexer(param_names: tp.Sequence[str], class_name: str = 'ParamIndexer',
                        module_name: tp.Optional[str] = None) -> tp.Type[IndexingBase]:
    
    class ParamIndexer(IndexingBase):
        def __init__(self, param_mappers: tp.Sequence[tp.Series],
                     level_names: tp.Optional[tp.LevelSequence] = None, **kwargs) -> None:

            checks.assert_len_equal(param_names, param_mappers)
            # 为每个参数创建对应的ParamLoc索引器
            for i, param_name in enumerate(param_names):
                level_name = level_names[i] if level_names is not None else None
                _param_loc = ParamLoc(param_mappers[i], self.indexing_func, level_name=level_name, **kwargs)
                # 将ParamLoc实例设置为私有属性，命名格式为_{param_name}_loc
                setattr(self, f'_{param_name}_loc', _param_loc)

    for i, param_name in enumerate(param_names):
        
        def param_loc(self, _param_name=param_name) -> ParamLoc:
            return getattr(self, f'_{_param_name}_loc')

        # 为属性方法设置文档字符串
        param_loc.__doc__ = f"""Access a group of columns by parameter `{param_name}` using `pd.Series.loc`.
        
        Forwards this operation to each Series/DataFrame and returns a new class instance.
        """

        setattr(ParamIndexer, param_name + '_loc', property(param_loc))

    ParamIndexer.__name__ = class_name  
    ParamIndexer.__qualname__ = class_name  
    if module_name is not None:
        ParamIndexer.__module__ = module_name  

    return ParamIndexer
```