/
iterator.py
71 lines (61 loc) · 2.08 KB
/
iterator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership. The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.
"""Place to define the Modin iterator."""
from collections.abc import Iterator
class PartitionIterator(Iterator):
"""
Iterator on partitioned data.
Parameters
----------
df : modin.pandas.DataFrame
The dataframe to iterate over.
axis : {0, 1}
Axis to iterate over.
func : callable
The function to get inner iterables from each partition.
"""
def __init__(self, df, axis, func):
self.df = df
self.axis = axis
self.index_iter = (
zip(
iter(slice(None) for _ in range(len(self.df.columns))),
range(len(self.df.columns)),
)
if axis
else zip(
range(len(self.df.index)),
iter(slice(None) for _ in range(len(self.df.index))),
)
)
self.func = func
def __iter__(self):
"""
Implement iterator interface.
Returns
-------
PartitionIterator
Iterator object.
"""
return self
def __next__(self):
"""
Implement iterator interface.
Returns
-------
PartitionIterator
Incremented iterator object.
"""
key = next(self.index_iter)
df = self.df.iloc[key]
return self.func(df)