Skip to content

Commit

Permalink
Fix stats tests and pin sphinx version (#3313)
Browse files Browse the repository at this point in the history
  • Loading branch information
hekaisheng committed Jan 6, 2023
1 parent bde43bb commit 996ce47
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 3 deletions.
2 changes: 1 addition & 1 deletion docs/requirements-doc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ pytest-cov>=2.5.0
pytest-timeout>=1.2.0
cloudpickle>=1.0.0
sqlalchemy>=1.2.0
sphinx>=3.0.0
sphinx<6.0.0
pydata-sphinx-theme>=0.3.0
sphinx-intl>=0.9.9
ipython>=4.0
Expand Down
69 changes: 68 additions & 1 deletion mars/dataframe/datasource/date_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import numpy as np
import pandas as pd
from pandas import Timestamp, NaT
from pandas.tseries.frequencies import to_offset
from pandas.tseries.offsets import Tick
from pandas._libs.tslibs import timezones
Expand Down Expand Up @@ -51,6 +52,72 @@ def normalize_date(dt): # from pandas/_libs/tslibs/conversion.pyx
_date_range_use_inclusive = pd_release_version[:2] >= (1, 4)


# adapted from pandas.core.arrays.datetimes.generate_range
def generate_range_count(
start=None, end=None, periods=None, offset=None
): # pragma: no cover
offset = to_offset(offset)

start = Timestamp(start)
start = start if start is not NaT else None
end = Timestamp(end)
end = end if end is not NaT else None

if start and not offset.is_on_offset(start):
start = offset.rollforward(start)

elif end and not offset.is_on_offset(end):
end = offset.rollback(end)

if periods is None and end < start and offset.n >= 0:
end = None
periods = 0

if end is None:
end = start + (periods - 1) * offset

if start is None:
start = end - (periods - 1) * offset

cur = start
count = 0
if offset.n >= 0:
while cur <= end:
count += 1

if cur == end:
# GH#24252 avoid overflows by not performing the addition
# in offset.apply unless we have to
break

# faster than cur + offset
try:
next_date = offset._apply(cur)
except AttributeError:
next_date = cur + offset
if next_date <= cur:
raise ValueError(f"Offset {offset} did not increment date")
cur = next_date
else:
while cur >= end:
count += 1

if cur == end:
# GH#24252 avoid overflows by not performing the addition
# in offset.apply unless we have to
break

# faster than cur + offset
try:
next_date = offset._apply(cur)
except AttributeError:
next_date = cur + offset
if next_date >= cur:
raise ValueError(f"Offset {offset} did not decrement date")
cur = next_date
return count


class DataFrameDateRange(DataFrameOperand, DataFrameOperandMixin):
_op_type_ = OperandDef.DATE_RANGE

Expand Down Expand Up @@ -511,7 +578,7 @@ def date_range(
inclusive = "both"
else:
if periods is None:
periods = size = int((end - start) / freq + 1)
periods = size = generate_range_count(start, end, periods, freq)
else:
size = periods
if inclusive in ("left", "right"):
Expand Down
5 changes: 5 additions & 0 deletions mars/dataframe/datasource/tests/test_datasource_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -1007,6 +1007,11 @@ def test_date_range_execution(setup):
expected = pd.date_range(start="1/1/2018", periods=5, freq="M")
pd.testing.assert_index_equal(result, expected)

dr = md.date_range(start="2018/01/01", end="2018/07/01", freq="M")
result = dr.execute().fetch()
expected = pd.date_range(start="2018/01/01", end="2018/07/01", freq="M")
pd.testing.assert_index_equal(result, expected)


parquet_engines = ["auto"]
if pa is not None:
Expand Down
18 changes: 17 additions & 1 deletion mars/tensor/stats/tests/test_stats_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,6 @@ def test_t_test_execution(setup):
functools.partial(mt_from_stats, equal_var=False),
functools.partial(sp_from_stats, equal_var=False),
),
(ttest_1samp, sp_ttest_1samp),
]

fa_raw = np.array([16, 18, 16, 14, 12, 12])
Expand Down Expand Up @@ -233,6 +232,23 @@ def test_t_test_execution(setup):
np.testing.assert_almost_equal(expected[0], result[0])
np.testing.assert_almost_equal(expected[1], result[1])

# second param size must be 1 for ttest_1samp
fb_raw = np.array([16])
fb = tensor(fb_raw)
for alt in alternatives:
if parse_version(scipy.__version__) >= parse_version("1.6.0"):
r = ttest_1samp(fa, fb, alternative=alt)
else:
r = ttest_1samp(fa, fb)
result = r.execute().fetch()

if parse_version(scipy.__version__) >= parse_version("1.6.0"):
expected = sp_ttest_1samp(fa_raw, fb_raw, alternative=alt)
else:
expected = sp_ttest_1samp(fa_raw, fb_raw)
np.testing.assert_almost_equal(expected[0], result[0])
np.testing.assert_almost_equal(expected[1], result[1])


@pytest.mark.parametrize("chunk_size", [5, 15])
@pytest.mark.parametrize(
Expand Down

0 comments on commit 996ce47

Please sign in to comment.