# Python 知识

# 代码分析

## 类继承关系
```mermaid
classDiagram
    %% 元类层次结构
    class typing.Protocol
    class SplitterT
    class BaseSplitter
    class RangeSplitter
    class RollingSplitter
    class ExpandingSplitter
    
    %% 继承关系
    typing.Protocol <|-- SplitterT
    BaseSplitter <|-- RangeSplitter
    BaseSplitter <|-- RollingSplitter
    BaseSplitter <|-- ExpandingSplitter
```

## def split_ranges_into_sets

### 例子

In [2]:
from vectorbt.generic.splitters import split_ranges_into_sets

In [1]:
# 示例1：简单分割，50%训练，50%测试
start_idxs = [0, 50]
end_idxs = [99, 149]
set_lens = (0.5,)  # 50%作为第一个数据集
for train_idx, test_idx in split_ranges_into_sets(start_idxs, end_idxs, set_lens):
    print(f"训练集: {train_idx}, 测试集: {test_idx}")

训练集: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49], 测试集: [50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
 98 99]
训练集: [50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
 98 99], 测试集: [100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
 136 137 138 139 140 141 142 143 144 145 146 147 148 149]


In [None]:
# 示例2：三分割，50%训练，25%验证，25%测试
start_idxs = [0, 50]
end_idxs = [99, 149]
set_lens = (0.5, 0.25)  # 50%训练，25%验证，剩余25%测试
for train_idx, valid_idx, test_idx in split_ranges_into_sets(start_idxs, end_idxs, set_lens):
    print(f"训练集: {train_idx}, 验证集: {valid_idx}, 测试集: {test_idx}")

训练集: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49], 验证集: [50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
 74], 测试集: [75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
 99]
训练集: [50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
 98 99], 验证集: [100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
 118 119 120 121 122 123 124], 测试集: [125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
 143 144 145 146 147 148 149]


In [6]:
# 示例3：固定数量分割
start_idxs = [0, 50]
end_idxs = [99, 149]
set_lens = (50, 30)  # 50个样本训练，30个样本验证，剩余测试
for train_idx, valid_idx, test_idx in split_ranges_into_sets(start_idxs, end_idxs, set_lens):
    print(f"训练集: {train_idx}, 验证集: {valid_idx}, 测试集: {test_idx}")

训练集: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49], 验证集: [50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
 74 75 76 77 78 79], 测试集: [80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99]
训练集: [50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
 98 99], 验证集: [100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
 118 119 120 121 122 123 124 125 126 127 128 129], 测试集: [130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
 148 149]


In [7]:
start_idxs = [0, 50]
end_idxs = [99, 149]
# 示例4：反向分割（测试集在前）
set_lens = (50, 30)
left_to_right = False  # 变长训练集在前，固定测试集在后
for train_idx, valid_idx, test_idx in split_ranges_into_sets(start_idxs, end_idxs, set_lens, left_to_right):
    print(f"训练集: {train_idx}, 验证集: {valid_idx}, 测试集: {test_idx}")

训练集: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19], 验证集: [20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
 68 69], 测试集: [70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
 94 95 96 97 98 99]
训练集: [50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69], 验证集: [ 70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87
  88  89  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105
 106 107 108 109 110 111 112 113 114 115 116 117 118 119], 测试集: [120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
 138 139 140 141 142 143 144 145 146 147 148 149]


### 源码

```python
def split_ranges_into_sets(start_idxs: tp.ArrayLike,
                           end_idxs: tp.ArrayLike,
                           set_lens: tp.MaybeSequence[tp.Sequence[float]] = (),
                           left_to_right: tp.MaybeSequence[bool] = True) -> RangesT:
    start_idxs = np.asarray(start_idxs)
    end_idxs = np.asarray(end_idxs)
    checks.assert_len_equal(start_idxs, end_idxs)

    for i in range(len(start_idxs)):
        start_idx = start_idxs[i]
        end_idx = end_idxs[i]

        range_len = end_idx - start_idx + 1
        new_set_lens = []
        if len(set_lens) == 0:
            yield (np.arange(start_idx, end_idx + 1),)
        else:
            if checks.is_sequence(set_lens[0]):
                _set_lens = set_lens[i]
            else:
                _set_lens = set_lens
            if checks.is_sequence(left_to_right):
                _left_to_right = left_to_right[i]
            else:
                _left_to_right = left_to_right
            for j, set_len in enumerate(_set_lens):
                if 0 < set_len < 1:
                    set_len = math.floor(set_len * range_len)
                if set_len == 0:
                    raise ValueError(f"Set {j} in the range {i} is empty")
                new_set_lens.append(set_len)
            if sum(new_set_lens) < range_len:
                if _left_to_right:
                    new_set_lens = new_set_lens + [range_len - sum(new_set_lens)]
                else:
                    new_set_lens = [range_len - sum(new_set_lens)] + new_set_lens
            else:
                raise ValueError(f"Range of length {range_len} too short to split into {len(_set_lens) + 1} sets")

            idx_offset = 0
            set_ranges = []
            for set_len in new_set_lens:
                new_idx_offset = idx_offset + set_len
                set_ranges.append(np.arange(start_idx + idx_offset, start_idx + new_idx_offset))
                idx_offset = new_idx_offset

            yield tuple(set_ranges)
```