1- # pylint: disable=E1101
2-
31from datetime import datetime
42from operator import methodcaller
53
64import numpy as np
75import pytest
86
9- from pandas .compat import zip
10-
117import pandas as pd
128from pandas import DataFrame , Panel , Series
139from pandas .core .indexes .datetimes import date_range
@@ -104,20 +100,21 @@ def f(x):
104100 tm .assert_panel_equal (result , binagg )
105101
106102
107- def test_fails_on_no_datetime_index ():
108- index_names = ('Int64Index' , 'Index' , 'Float64Index' , 'MultiIndex' )
109- index_funcs = (tm .makeIntIndex ,
110- tm .makeUnicodeIndex , tm .makeFloatIndex ,
111- lambda m : tm .makeCustomIndex (m , 2 ))
103+ @pytest .mark .parametrize ('name, func' , [
104+ ('Int64Index' , tm .makeIntIndex ),
105+ ('Index' , tm .makeUnicodeIndex ),
106+ ('Float64Index' , tm .makeFloatIndex ),
107+ ('MultiIndex' , lambda m : tm .makeCustomIndex (m , 2 ))
108+ ])
109+ def test_fails_on_no_datetime_index (name , func ):
112110 n = 2
113- for name , func in zip (index_names , index_funcs ):
114- index = func (n )
115- df = DataFrame ({'a' : np .random .randn (n )}, index = index )
111+ index = func (n )
112+ df = DataFrame ({'a' : np .random .randn (n )}, index = index )
116113
117- msg = ("Only valid with DatetimeIndex, TimedeltaIndex "
118- "or PeriodIndex, but got an instance of %r" % name )
119- with pytest .raises (TypeError , match = msg ):
120- df .groupby (TimeGrouper ('D' ))
114+ msg = ("Only valid with DatetimeIndex, TimedeltaIndex "
115+ "or PeriodIndex, but got an instance of %r" % name )
116+ with pytest .raises (TypeError , match = msg ):
117+ df .groupby (TimeGrouper ('D' ))
121118
122119
123120def test_aaa_group_order ():
@@ -143,11 +140,13 @@ def test_aaa_group_order():
143140 df [4 ::5 ])
144141
145142
146- def test_aggregate_normal ():
147- # check TimeGrouper's aggregation is identical as normal groupby
143+ def test_aggregate_normal (resample_method ):
144+ """Check TimeGrouper's aggregation is identical as normal groupby."""
148145
149- n = 20
150- data = np .random .randn (n , 4 )
146+ if resample_method == 'ohlc' :
147+ pytest .xfail (reason = 'DataError: No numeric types to aggregate' )
148+
149+ data = np .random .randn (20 , 4 )
151150 normal_df = DataFrame (data , columns = ['A' , 'B' , 'C' , 'D' ])
152151 normal_df ['key' ] = [1 , 2 , 3 , 4 , 5 ] * 4
153152
@@ -159,35 +158,11 @@ def test_aggregate_normal():
159158 normal_grouped = normal_df .groupby ('key' )
160159 dt_grouped = dt_df .groupby (TimeGrouper (key = 'key' , freq = 'D' ))
161160
162- for func in ['min' , 'max' , 'prod' , 'var' , 'std' , 'mean' ]:
163- expected = getattr (normal_grouped , func )()
164- dt_result = getattr (dt_grouped , func )()
165- expected .index = date_range (start = '2013-01-01' , freq = 'D' ,
166- periods = 5 , name = 'key' )
167- assert_frame_equal (expected , dt_result )
168-
169- for func in ['count' , 'sum' ]:
170- expected = getattr (normal_grouped , func )()
171- expected .index = date_range (start = '2013-01-01' , freq = 'D' ,
172- periods = 5 , name = 'key' )
173- dt_result = getattr (dt_grouped , func )()
174- assert_frame_equal (expected , dt_result )
175-
176- # GH 7453
177- for func in ['size' ]:
178- expected = getattr (normal_grouped , func )()
179- expected .index = date_range (start = '2013-01-01' , freq = 'D' ,
180- periods = 5 , name = 'key' )
181- dt_result = getattr (dt_grouped , func )()
182- assert_series_equal (expected , dt_result )
183-
184- # GH 7453
185- for func in ['first' , 'last' ]:
186- expected = getattr (normal_grouped , func )()
187- expected .index = date_range (start = '2013-01-01' , freq = 'D' ,
188- periods = 5 , name = 'key' )
189- dt_result = getattr (dt_grouped , func )()
190- assert_frame_equal (expected , dt_result )
161+ expected = getattr (normal_grouped , resample_method )()
162+ dt_result = getattr (dt_grouped , resample_method )()
163+ expected .index = date_range (start = '2013-01-01' , freq = 'D' ,
164+ periods = 5 , name = 'key' )
165+ tm .assert_equal (expected , dt_result )
191166
192167 # if TimeGrouper is used included, 'nth' doesn't work yet
193168
@@ -201,34 +176,23 @@ def test_aggregate_normal():
201176 """
202177
203178
204- @pytest .mark .parametrize ('method, unit' , [
205- ('sum' , 0 ),
206- ('prod' , 1 ),
179+ @pytest .mark .parametrize ('method, method_args, unit' , [
180+ ('sum' , dict (), 0 ),
181+ ('sum' , dict (min_count = 0 ), 0 ),
182+ ('sum' , dict (min_count = 1 ), np .nan ),
183+ ('prod' , dict (), 1 ),
184+ ('prod' , dict (min_count = 0 ), 1 ),
185+ ('prod' , dict (min_count = 1 ), np .nan )
207186])
208- def test_resample_entirly_nat_window (method , unit ):
187+ def test_resample_entirly_nat_window (method , method_args , unit ):
209188 s = pd .Series ([0 ] * 2 + [np .nan ] * 2 ,
210189 index = pd .date_range ('2017' , periods = 4 ))
211- # 0 / 1 by default
212- result = methodcaller (method )(s .resample ("2d" ))
213- expected = pd .Series ([0.0 , unit ],
214- index = pd .to_datetime (['2017-01-01' ,
215- '2017-01-03' ]))
216- tm .assert_series_equal (result , expected )
217-
218- # min_count=0
219- result = methodcaller (method , min_count = 0 )(s .resample ("2d" ))
190+ result = methodcaller (method , ** method_args )(s .resample ("2d" ))
220191 expected = pd .Series ([0.0 , unit ],
221192 index = pd .to_datetime (['2017-01-01' ,
222193 '2017-01-03' ]))
223194 tm .assert_series_equal (result , expected )
224195
225- # min_count=1
226- result = methodcaller (method , min_count = 1 )(s .resample ("2d" ))
227- expected = pd .Series ([0.0 , np .nan ],
228- index = pd .to_datetime (['2017-01-01' ,
229- '2017-01-03' ]))
230- tm .assert_series_equal (result , expected )
231-
232196
233197@pytest .mark .parametrize ('func, fill_value' , [
234198 ('min' , np .nan ),
@@ -302,33 +266,22 @@ def test_repr():
302266 assert result == expected
303267
304268
305- @pytest .mark .parametrize ('method, unit' , [
306- ('sum' , 0 ),
307- ('prod' , 1 ),
269+ @pytest .mark .parametrize ('method, method_args, expected_values' , [
270+ ('sum' , dict (), [1 , 0 , 1 ]),
271+ ('sum' , dict (min_count = 0 ), [1 , 0 , 1 ]),
272+ ('sum' , dict (min_count = 1 ), [1 , np .nan , 1 ]),
273+ ('sum' , dict (min_count = 2 ), [np .nan , np .nan , np .nan ]),
274+ ('prod' , dict (), [1 , 1 , 1 ]),
275+ ('prod' , dict (min_count = 0 ), [1 , 1 , 1 ]),
276+ ('prod' , dict (min_count = 1 ), [1 , np .nan , 1 ]),
277+ ('prod' , dict (min_count = 2 ), [np .nan , np .nan , np .nan ]),
308278])
309- def test_upsample_sum (method , unit ):
279+ def test_upsample_sum (method , method_args , expected_values ):
310280 s = pd .Series (1 , index = pd .date_range ("2017" , periods = 2 , freq = "H" ))
311281 resampled = s .resample ("30T" )
312282 index = pd .to_datetime (['2017-01-01T00:00:00' ,
313283 '2017-01-01T00:30:00' ,
314284 '2017-01-01T01:00:00' ])
315-
316- # 0 / 1 by default
317- result = methodcaller (method )(resampled )
318- expected = pd .Series ([1 , unit , 1 ], index = index )
319- tm .assert_series_equal (result , expected )
320-
321- # min_count=0
322- result = methodcaller (method , min_count = 0 )(resampled )
323- expected = pd .Series ([1 , unit , 1 ], index = index )
324- tm .assert_series_equal (result , expected )
325-
326- # min_count=1
327- result = methodcaller (method , min_count = 1 )(resampled )
328- expected = pd .Series ([1 , np .nan , 1 ], index = index )
329- tm .assert_series_equal (result , expected )
330-
331- # min_count>1
332- result = methodcaller (method , min_count = 2 )(resampled )
333- expected = pd .Series ([np .nan , np .nan , np .nan ], index = index )
285+ result = methodcaller (method , ** method_args )(resampled )
286+ expected = pd .Series (expected_values , index = index )
334287 tm .assert_series_equal (result , expected )
0 commit comments