# 代码分析

## 类继承关系
```mermaid
classDiagram
    %% 元类层次结构
    class dict
    class symbol_dict
    class Configured
    class PandasIndexer
    class AttrResolver
    class Wrapping
    class MetaPlotsBuilderMixin {
        <<metaclass>>
    }
    class MetaStatsBuilderMixin {
        <<metaclass>>
    }
    class MetaData {
        <<metaclass>>
    }

    class StatsBuilderMixin
    class PlotsBuilderMixin
    class Data

    
    %% 继承关系
    dict <|-- symbol_dict
    Wrapping <|-- Data

    Configured <|-- Wrapping
    PandasIndexer <|-- Wrapping
    AttrResolver <|-- Wrapping

    MetaPlotsBuilderMixin <|-- PlotsBuilderMixin: metaclass
    MetaPlotsBuilderMixin <|-- MetaData: metaclass
    MetaStatsBuilderMixin <|-- MetaData: metaclass
    MetaStatsBuilderMixin <|-- StatsBuilderMixin: metaclass

    StatsBuilderMixin <|-- Data
    PlotsBuilderMixin <|-- Data
    MetaData <|-- Data: metaclass
```

## class symbol_dict(dict)
用于存储以金融符号为键的配置参数。例如：
```python
# 为不同符号设置不同的起始值
start_values = symbol_dict({
    'AAPL': 1000000,    # 苹果股票起始投资100万
    'GOOGL': 500000     # 谷歌股票起始投资50万
})

# 为不同符号设置不同的时间范围
start_dates = symbol_dict({
    'AAPL': '2020-01-01',
    'GOOGL': '2021-01-01'
})
```

## class MetaData(type(StatsBuilderMixin), type(PlotsBuilderMixin))
将统计构建器元类 `MetaStatsBuilderMixin` 和图表构建器元类 `MetaPlotsBuilderMixin` 整合为一个元类。
```python
class MetaData(type(StatsBuilderMixin), type(PlotsBuilderMixin)):
    pass
```

## class Data(Wrapping, StatsBuilderMixin, PlotsBuilderMixin, metaclass=MetaData)

### `__init__`
参数：
- `wrapper (ArrayWrapper)`: 数组包装器，提供统一的数组操作接口
- `data (tp.Data)`: 数据字典，以符号为键，pandas对象为值
- `tz_localize (tp.TimezoneLike, optional)`: 时区本地化参数
- `tz_convert (tp.TimezoneLike, optional)`: 时区转换参数  
- `missing_index (str)`: 索引缺失处理策略 ('nan', 'drop', 'raise')
- `missing_columns (str)`: 列缺失处理策略 ('nan', 'drop', 'raise')
- `download_kwargs (dict)`: 下载时使用的关键字参数
- `**kwargs`: 传递给父类的额外参数

初始化过程：
- 调用 `Wrapping` 父类初始化，设置数组包装器
- 调用 `StatsBuilderMixin` 初始化，启用统计功能
- 调用 `PlotsBuilderMixin` 初始化，启用图表功能
- 验证数据字典 `data` 的格式和一致性
- 存储配置参数为属性
```python
def __init__(self,
                wrapper: ArrayWrapper,
                data: tp.Data,
                tz_localize: tp.Optional[tp.TimezoneLike],
                tz_convert: tp.Optional[tp.TimezoneLike],
                missing_index: str,
                missing_columns: str,
                download_kwargs: dict,
                **kwargs) -> None:
    Wrapping.__init__(
        self,
        wrapper,
        data=data,
        tz_localize=tz_localize,
        tz_convert=tz_convert,
        missing_index=missing_index,
        missing_columns=missing_columns,
        download_kwargs=download_kwargs,
        **kwargs
    )
    StatsBuilderMixin.__init__(self)
    PlotsBuilderMixin.__init__(self)

    # 验证数据参数必须是字典类型
    checks.assert_instance_of(data, dict)
    # 验证所有数据项具有相同的元数据（索引、列等）
    for k, v in data.items():
        # 检查每个数据项与第一个数据项的元数据是否一致
        checks.assert_meta_equal(v, data[list(data.keys())[0]])
        
    self._data = data
    self._tz_localize = tz_localize
    self._tz_convert = tz_convert 
    self._missing_index = missing_index
    self._missing_columns = missing_columns
    self._download_kwargs = download_kwargs
```

### indexing_func
由基类 `Wrapping` 使用，可以对 `Data` 实例进行 pandas 风格的索引操作，如 `.loc`、`.iloc`、切片等。

参考 [indexing.ipynb](../base/indexing.ipynb)。
```python
def indexing_func(self: DataT, pd_indexing_func: tp.PandasIndexingFunc, **kwargs) -> DataT:
    # 对数组包装器执行索引操作，获取新的包装器实例
    new_wrapper = pd_indexing_func(self.wrapper)
    # 对数据字典中每个pandas对象执行相同的索引操作
    new_data = {k: pd_indexing_func(v) for k, v in self.data.items()}
    # 创建并返回新的Data实例，保持其他配置不变
    return self.replace(
        wrapper=new_wrapper,
        data=new_data
    )
```

### align_index
统一 `data` 不同符号数据中的时间索引。

参数：
- `data (tp.Data)`: 待对齐的数据字典，键为符号，值为pandas对象
- `missing (str)`
  - `'nan'`: 将缺失的数据点设置为NaN
  - `'drop'`: 删除缺失的数据点
  - `'raise'`: 遇到不匹配时抛出异常
```python
@classmethod
def align_index(cls, data: tp.Data, missing: str = 'nan') -> tp.Data:
    if len(data) == 1:
        return data

    index = None
    for k, v in data.items():
        if index is None:
            index = v.index
        else:
            if len(index.intersection(v.index)) != len(index.union(v.index)):
                if missing == 'nan':
                    warnings.warn("Symbols have mismatching index. "
                                    "Setting missing data points to NaN.", stacklevel=2)
                    index = index.union(v.index) # 使用并集索引
                elif missing == 'drop':
                    warnings.warn("Symbols have mismatching index. "
                                    "Dropping missing data points.", stacklevel=2)
                    index = index.intersection(v.index) # 使用交集索引
                elif missing == 'raise':
                    raise ValueError("Symbols have mismatching index")
                else:
                    raise ValueError(f"missing='{missing}' is not recognized")

    new_data = {k: v.reindex(index=index) for k, v in data.items()}
    return new_data
```

In [None]:
import pandas as pd
from vectorbt.data.base import Data

data = {
    'AAPL': pd.Series([100, 101, 102], index=['2023-01-01', '2023-01-02', '2023-01-03']),
    'GOOGL': pd.Series([2000, 2010], index=['2023-01-01', '2023-01-03'])  # 缺少01-02
}

# 使用'nan'策略
aligned = Data.align_index(data, missing='nan')
print(aligned)  # GOOGL在2023-01-02处为NaN

# 使用'drop'策略
aligned = Data.align_index(data, missing='drop') 
print(aligned)  # 只保留01-01和01-03两个日期

# 使用'raise'策略
try:
    aligned = Data.align_index(data, missing='raise')
except ValueError:
    print("索引不匹配，抛出异常")

### align_columns
统一 `data` 不同符号数据中的列。

参数：
- `data (tp.Data)`: 待对齐的数据字典，键为符号，值为pandas对象
- `missing (str)`
  - `'nan'`: 将缺失的数据点设置为NaN
  - `'drop'`: 删除缺失的数据点
  - `'raise'`: 遇到不匹配时抛出异常
```python
@classmethod
def align_columns(cls, data: tp.Data, missing: str = 'raise') -> tp.Data:
    if len(data) == 1:
        return data

    columns = None
    multiple_columns = False
    name_is_none = False
    for k, v in data.items():
        if isinstance(v, pd.Series):
            if v.name is None:
                name_is_none = True
            v = v.to_frame()
        else:
            multiple_columns = True
        if columns is None:
            columns = v.columns
        else:
            if len(columns.intersection(v.columns)) != len(columns.union(v.columns)):
                if missing == 'nan':
                    warnings.warn("Symbols have mismatching columns. "
                                    "Setting missing data points to NaN.", stacklevel=2)
                    columns = columns.union(v.columns)
                elif missing == 'drop':
                    warnings.warn("Symbols have mismatching columns. "
                                    "Dropping missing data points.", stacklevel=2)
                    columns = columns.intersection(v.columns)
                elif missing == 'raise':
                    raise ValueError("Symbols have mismatching columns")
                else:
                    raise ValueError(f"missing='{missing}' is not recognized")

    new_data = {}
    for k, v in data.items():
        if isinstance(v, pd.Series):
            v = v.to_frame()
        v = v.reindex(columns=columns)
        if not multiple_columns:
            v = v[columns[0]]
            if name_is_none:
                v = v.rename(None)
        new_data[k] = v
    return new_data
```

In [None]:
import pandas as pd
from vectorbt.data.base import Data

data = {
    'AAPL': pd.DataFrame({
        'Open': [100, 101], 'High': [102, 103], 
        'Low': [99, 100], 'Close': [101, 102], 'Volume': [1000, 1100]
    }),
    'GOOGL': pd.DataFrame({
        'Close': [2000, 2010], 'Volume': [500, 600]  # 只有收盘价和成交量
    })
}

# 使用'nan'策略
aligned = Data.align_columns(data, missing='nan')
print(aligned)  # 结果：GOOGL的Open, High, Low列为NaN

# 使用'drop'策略  
aligned = Data.align_columns(data, missing='drop')
print(aligned)  # 结果：只保留Close和Volume列

# 使用'raise'策略（默认）
try:
    aligned = Data.align_columns(data, missing='raise')
except ValueError:
    print("列结构不匹配，抛出异常")

### select_symbol_kwargs
对于字典 `kwargs` 中键为 `symbol_dict` 类型的那些项，保留存在于 `symbol` 的成分（如果不存在则删除该项）。
```python
@classmethod
def select_symbol_kwargs(cls, symbol: tp.Label, kwargs: dict) -> dict:
    """Select keyword arguments belonging to `symbol`."""
    _kwargs = dict()
    for k, v in kwargs.items():
        if isinstance(v, symbol_dict):
            if symbol in v:
                _kwargs[k] = v[symbol]
        else:
            _kwargs[k] = v
    return _kwargs
```

In [None]:
import pandas as pd
from vectorbt.data.base import Data, symbol_dict

kwargs = {
    'start_date': '2020-01-01',
    'period': symbol_dict({
        'AAPL': '1y',
        'GOOGL': '2y'
    }),
    'interval': symbol_dict({
        'AAPL': '1d',
        'GOOGL': '1h'
    })
}

aapl_kwargs = Data.select_symbol_kwargs('AAPL', kwargs)
print(aapl_kwargs)  # {'start_date': '2020-01-01', 'period': '1y', 'interval': '1d'}

googl_kwargs = Data.select_symbol_kwargs('GOOGL', kwargs)
print(googl_kwargs)  # {'start_date': '2020-01-01', 'period': '2y', 'interval': '1h'}

msft_kwargs = Data.select_symbol_kwargs('MSFT', kwargs)
print(msft_kwargs)  # {'start_date': '2020-01-01'}  # 只包含通用参数

### from_data
根据参数：
- `data`: 以符号为键的数据字典，值为类数组对象
- `tz_localize`: `data` 的 `Index` 的时区
- `tz_convert`: 要转换到的时区
- `missing_index`: 索引缺失处理策略
- `missing_columns`: 列缺失处理策略
- `wrapper_kwargs`: 传递给ArrayWrapper的关键字参数
- `**kwargs`: 传递给 `__init__` 方法的其他关键字参数

构建一个新的 `type(调用者)` 类型的实例并返回。具体过程：
- 对于 `data` 中的每一项，如果其 `index` 是 `DatetimeIndex` 并且无时区信息，从 `tz_localize` 转到 `tz_convert` 时区
- 对齐 `data` 中各项的时间索引，以及列
- 构建一个新的 `type(调用者)` 类型的实例并返回。
```python
@classmethod
def from_data(cls: tp.Type[DataT],
                data: tp.Data,
                tz_localize: tp.Optional[tp.TimezoneLike] = None,
                tz_convert: tp.Optional[tp.TimezoneLike] = None,
                missing_index: tp.Optional[str] = None,
                missing_columns: tp.Optional[str] = None,
                wrapper_kwargs: tp.KwargsLike = None,
                **kwargs) -> DataT:
    from vectorbt._settings import settings
    data_cfg = settings['data']

    # Get global defaults
    if tz_localize is None:
        tz_localize = data_cfg['tz_localize']
    if tz_convert is None:
        tz_convert = data_cfg['tz_convert']
    if missing_index is None:
        missing_index = data_cfg['missing_index']
    if missing_columns is None:
        missing_columns = data_cfg['missing_columns']
    if wrapper_kwargs is None:
        wrapper_kwargs = {}

    data = data.copy()
    for k, v in data.items():
        # Convert array to pandas
        if not isinstance(v, (pd.Series, pd.DataFrame)):
            v = np.asarray(v)
            if v.ndim == 1:
                v = pd.Series(v)
            else:
                v = pd.DataFrame(v)

        # Perform operations with datetime-like index
        if isinstance(v.index, pd.DatetimeIndex):
            if tz_localize is not None:
                if not is_tz_aware(v.index):
                    v = v.tz_localize(to_timezone(tz_localize))
            if tz_convert is not None:
                v = v.tz_convert(to_timezone(tz_convert))
            v.index.freq = v.index.inferred_freq
        data[k] = v

    # Align index and columns
    data = cls.align_index(data, missing=missing_index)
    data = cls.align_columns(data, missing=missing_columns)

    # Create new instance
    symbols = list(data.keys())
    wrapper = ArrayWrapper.from_obj(data[symbols[0]], **wrapper_kwargs)
    return cls(
        wrapper,
        data,
        tz_localize=tz_localize,
        tz_convert=tz_convert,
        missing_index=missing_index,
        missing_columns=missing_columns,
        **kwargs
    )
```

In [None]:
import pandas as pd
from vectorbt.data.base import Data


data_dict = {
    'AAPL': pd.Series([100, 101, 102], 
                        index=pd.date_range('2023-01-01', periods=3)),
    'GOOGL': pd.Series([2000, 2010, 2020], 
                        index=pd.date_range('2023-01-01', periods=3))
}
data = Data.from_data(
    data_dict,
    tz_localize='UTC',      # 本地化为UTC
    tz_convert='US/Eastern' # 转换为美东时间
)

print(data)
print(data.data['AAPL'])
print(data.data['GOOGL'])

### download_symbol
下载单个符号数据（例如 Yahoo Finance、Alpha Vantage 等）的抽象方法，必须在子类中实现具体的数据下载逻辑。
- `Args`：
  - `symbol (tp.Label)`：要下载的金融符号（如'AAPL', 'GOOGL'）
  - `**kwargs`：下载参数，具体参数取决于数据源的要求
- `Returns`：返回 `Series` 或 `DataFrame`，包含该符号的数据
```python
@classmethod
def download_symbol(cls, symbol: tp.Label, **kwargs) -> tp.SeriesFrame:
    raise NotImplementedError
```

### download
使用 `download_symbol` 下载参数 `symbols` 对应的数据，然后使用 `from_data` 构建一个新的 `type(调用者)` 类型的实例并返回
```python
@classmethod
def download(cls: tp.Type[DataT],
                symbols: tp.Union[tp.Label, tp.Labels],
                tz_localize: tp.Optional[tp.TimezoneLike] = None,
                tz_convert: tp.Optional[tp.TimezoneLike] = None,
                missing_index: tp.Optional[str] = None,
                missing_columns: tp.Optional[str] = None,
                wrapper_kwargs: tp.KwargsLike = None,
                **kwargs) -> DataT:
    if checks.is_hashable(symbols):
        symbols = [symbols]
    elif not checks.is_sequence(symbols):
        raise TypeError("Symbols must be either hashable or sequence of hashable")

    data = dict()
    for s in symbols:
        _kwargs = cls.select_symbol_kwargs(s, kwargs)

        data[s] = cls.download_symbol(s, **_kwargs)

    return cls.from_data(
        data,
        tz_localize=tz_localize,
        tz_convert=tz_convert,
        missing_index=missing_index,
        missing_columns=missing_columns,
        wrapper_kwargs=wrapper_kwargs,
        download_kwargs=kwargs
    )
```

### update_symbol
下载 `symbol` 对应符号的新数据的的抽象方法，需要在子类中实现具体的数据更新逻辑。
```python
def update_symbol(self, symbol: tp.Label, **kwargs) -> tp.SeriesFrame:
    raise NotImplementedError
```

### update
下载 `self.data` 中各符号的新数据来更新实例 `self`。具体过程：
- 对于 `self.data` 中的每个符号 `(k: v)`
  - 从 `kwargs` 挑选出对应的下载参数，使用 `update_symbol` 下载新的数据 `new_obj`
  - 延续 `v` 的索引，然后与 `new_obj` 可以构成新的 Series/DataFrame
    - 如果索引是 `DatetimeIndex` 并且无时区信息，从 `self.tz_localize` 转到 `self.tz_convert` 时区
  - 将 `new_obj` 存到 `new_data[k]`
- 对齐 `new_data` 中各项的时间索引，以及列
- 合并旧数据和新数据，对于 `self.data` 和 `new_data` 中的每个符号
  - 确保结构一致：从 `new_data[k]` 中取出 `self.data[k].name/self.data[k].columns` 对应的
  - 纵向连接，取出重复索引，保留最新的，重新赋给　`new_data[k]`
- 使用 `Configured.replace` 更新实例
```python
def update(self: DataT, **kwargs) -> DataT:
    new_data = dict()
    for k, v in self.data.items():
        _kwargs = self.select_symbol_kwargs(k, kwargs)
        new_obj = self.update_symbol(k, **_kwargs)

        if not isinstance(new_obj, (pd.Series, pd.DataFrame)):
            new_obj = np.asarray(new_obj)
            index = pd.RangeIndex(
                start=v.index[-1],
                stop=v.index[-1] + new_obj.shape[0],
                step=1
            )
            if new_obj.ndim == 1:
                new_obj = pd.Series(new_obj, index=index)
            else:
                new_obj = pd.DataFrame(new_obj, index=index)

        if isinstance(new_obj.index, pd.DatetimeIndex):
            if self.tz_localize is not None:
                if not is_tz_aware(new_obj.index):
                    new_obj = new_obj.tz_localize(to_timezone(self.tz_localize))
            if self.tz_convert is not None:
                new_obj = new_obj.tz_convert(to_timezone(self.tz_convert))

        new_data[k] = new_obj

    new_data = self.align_index(new_data, missing=self.missing_index)
    new_data = self.align_columns(new_data, missing=self.missing_columns)

    for k, v in new_data.items():
        if isinstance(self.data[k], pd.Series):
            if isinstance(v, pd.DataFrame):
                v = v[self.data[k].name]
        else:
            v = v[self.data[k].columns]
        v = pd.concat((self.data[k], v), axis=0)
        v = v[~v.index.duplicated(keep='last')]
        if isinstance(v.index, pd.DatetimeIndex):
            v.index.freq = v.index.inferred_freq
        new_data[k] = v

    new_index = new_data[self.symbols[0]].index
    return self.replace(
        wrapper=self.wrapper.replace(index=new_index),
        data=new_data
    )
```

### concat
对于多符号数据 `self.data`，返回其以列名为键的字典。例如：
```python
data.data = {
    'AAPL': pd.DataFrame({
        'Open': [100, 101], 'Close': [101, 102], 'Volume': [1000, 1100]
    }),
    'GOOGL': pd.DataFrame({
        'Open': [2000, 2010], 'Close': [2010, 2020], 'Volume': [500, 600]
    })
}
concat_data = data.concat()
# 结果：
# {
#     'Open': DataFrame with columns ['AAPL', 'GOOGL'],
#     'Close': DataFrame with columns ['AAPL', 'GOOGL'], 
#     'Volume': DataFrame with columns ['AAPL', 'GOOGL']
# }
```

```python
@cached_method
def concat(self, level_name: str = 'symbol') -> tp.Data:
    first_data = self.data[self.symbols[0]]
    index = first_data.index
    if isinstance(first_data, pd.Series):
        columns = pd.Index([first_data.name])
    else:
        columns = first_data.columns
    if len(self.symbols) > 1:
        new_data = {c: pd.DataFrame(
            index=index,
            columns=pd.Index(self.symbols, name=level_name)
        ) for c in columns}
    else:
        new_data = {c: pd.Series(
            index=index,
            name=self.symbols[0]
        ) for c in columns}
    for c in columns:
        for s in self.symbols:
            if isinstance(self.data[s], pd.Series):
                col_data = self.data[s]
            else:
                col_data = self.data[s][c]
            if len(self.symbols) > 1:
                new_data[c].loc[:, s] = col_data
            else:
                new_data[c].loc[:] = col_data
    for c in columns:
        new_data[c] = new_data[c].infer_objects()
    return new_data
```

### get
获取 `self.data` 中 `column` 对应的数据。具体逻辑：
- 只有一个符号
  - `column` 空，返回该符号的整个数据
  - `column` 不空，返回该符号的 `column` 列数据
- 使用 `concat` 返回 `self.data` 的以列名为键的字典
  - 只有一个键值对：返回对应的值
  - 多于一个键值对
    - `column` 空
      - 将所有值构成一个元组返回
    - `column` 不空且为 `list`
      - 取出键在 `column` 中的值，构成一个元组返回
    - 否则
      - 返回 `column` 对应的值
```python
def get(self, column: tp.Optional[tp.Label] = None, **kwargs) -> tp.MaybeTuple[tp.SeriesFrame]:
    if len(self.symbols) == 1:
        if column is None:
            return self.data[self.symbols[0]]
        return self.data[self.symbols[0]][column]

    concat_data = self.concat(**kwargs)
    if len(concat_data) == 1:
        return tuple(concat_data.values())[0]
    if column is not None:
        if isinstance(column, list):
            return tuple([concat_data[c] for c in column])
        return concat_data[column]
    return tuple(concat_data.values())
```

### stats_defaults
返回的是 `merge_dicts(settings['stats_builder'], dict(settings=dict(freq=self.wrapper.freq)), settings['data']['stats'])`。
```python
@property
def stats_defaults(self) -> tp.Kwargs:
    from vectorbt._settings import settings
    data_stats_cfg = settings['data']['stats']

    return merge_dicts(
        StatsBuilderMixin.stats_defaults.__get__(self),
        data_stats_cfg
    )
```

### plots_defaults
返回的是 `merge_dicts(settings['plots_builder'], dict(settings=dict(freq=self.wrapper.freq)), settings['data']['plots'])`。
```python
@property
def plots_defaults(self) -> tp.Kwargs:
    from vectorbt._settings import settings
    data_plots_cfg = settings['data']['plots']

    return merge_dicts(
        PlotsBuilderMixin.plots_defaults.__get__(self),
        data_plots_cfg
    )
```