-
Notifications
You must be signed in to change notification settings - Fork 20
/
pagination.py
148 lines (119 loc) · 5.27 KB
/
pagination.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import pandas as pd
from fireant.utils import alias_selector
from pypika import Order
def _get_window(limit, offset):
start = offset
end = offset + limit if None not in (offset, limit) else limit
return start, end
def _apply_sorting(orders):
sort_values, ascending = zip(
*[
(alias_selector(field.alias), orientation is Order.asc)
for field, orientation in orders
]
)
return list(sort_values), ascending
def paginate(data_frame, widgets, orders=(), limit=None, offset=None):
"""
:param data_frame:
The result set to paginate.
:param widgets:
An iterable of widgets that the pagination is being applied for.
:param orders:
An iterable of (<Dimension/Metric>, pypika.Order)
:param limit:
A limit of the number of data points/series
:param offset:
A offset of the number of data points/series
:return:
A paginated data frame. If the widget required grouped pagination, then there should be an upperbound
`limit*(n_index_level_0)`. Otherwise the data frame should have the same length as the limit.
"""
if len(data_frame) == 0:
return data_frame
start, end = _get_window(limit, offset)
group_pagination = isinstance(data_frame.index, pd.MultiIndex) and any(
[getattr(widget, "group_pagination", False) for widget in widgets]
)
if group_pagination:
return _group_paginate(data_frame, start, end, orders)
return _simple_paginate(data_frame, start, end, orders)
def _simple_paginate(data_frame, start=None, end=None, orders=()):
"""
Applies pagination which limits the number of rows in the data frame.
:param data_frame:
A data frame to paginate
:param start:
The index starting point to slice the data frame at
:param end:
The index ending point to slice the data frame at
:param orders:
A list of tuples that contain a slicer field definition (with an alias matching the columns of the data frame)
and a pypika.Order.
"""
if orders:
sort, ascending = _apply_sorting(orders)
data_frame = data_frame.sort_values(by=sort, ascending=ascending)
return data_frame[start:end]
def _index_isnull(data_frame):
if isinstance(data_frame.index, pd.MultiIndex):
return [
any(pd.isnull(value) for value in level) for level in list(data_frame.index)
]
return pd.isnull(data_frame.index)
def _group_paginate(data_frame, start=None, end=None, orders=()):
"""
Applies pagination which limits the number of rows in the data frame grouped by the zeroth index level. This will
in turn paginate the number of series in the data frame.
:param data_frame:
A data frame to paginate
:param start:
The index starting point to slice the data frame at
:param end:
The index ending point to slice the data frame at
:param orders:
A list of tuples that contain a slicer field definition (with an alias matching the columns of the data frame)
and a pypika.Order.
"""
dimension_levels = data_frame.index.names[1:]
dimension_groups = data_frame.groupby(level=dimension_levels)
# Do not apply ordering on the 0th dimension !!!
# This would not have any result since the X-Axis on a chart is ordered sequentially
orders = [order for order in orders if order[0].alias != data_frame.index.names[0]]
if orders:
# FIXME this should aggregate according to field definition, instead of sum
# Need a way to interpret definitions in python code in order to do that
aggregated_df = dimension_groups.sum()
sort, ascending = _apply_sorting(orders)
sorted_df = aggregated_df.sort_values(by=sort, ascending=ascending)
sorted_dimension_values = tuple(sorted_df.index)[start:end]
else:
sorted_dimension_values = tuple(dimension_groups.apply(lambda g: g.name))[
start:end
]
sorted_dimension_values = (
pd.Index(sorted_dimension_values, name=dimension_levels[0])
if len(dimension_levels) == 1
else pd.MultiIndex.from_tuples(sorted_dimension_values, names=dimension_levels)
)
def _apply_pagination(df):
# This function applies sorting by using the sorted dimension values as an index to select values in the right
# order out of the data frame. The index must be filtered to only values that are in this data frame, since
# there might be missing combinations of index values.
dfx = df.reset_index(level=0, drop=True)
value_in_index = sorted_dimension_values.isin(dfx.index)
index_slice = sorted_dimension_values[value_in_index].values
"""
In the case of bool dimensions, convert index_slice to an array of literal `True`, because pandas `.loc` handles
lists of bool as a mask.
"""
if bool in {type(x) for x in sorted_dimension_values}:
index_slice |= True
# Need to include nulls so append them to the end of the sorted data frame
isnull = _index_isnull(dfx)
return dfx.loc[index_slice, :].append(dfx[isnull])
return (
data_frame.sort_values(data_frame.index.names[0], ascending=True)
.groupby(level=0)
.apply(_apply_pagination)
)