From 5557e3627df67211f5b6406d07995dccb02196f9 Mon Sep 17 00:00:00 2001 From: sudhir mohanraj Date: Sat, 23 Feb 2019 14:47:46 -0500 Subject: [PATCH 1/2] ERR: doc update for ParsingError (#25414) Closes gh-22881 --- pandas/errors/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index c57d27ff03ac6a..493ee65f63c6ac 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -32,6 +32,8 @@ class UnsortedIndexError(KeyError): class ParserError(ValueError): """ Exception that is raised by an error encountered in `pd.read_csv`. + + e.g. HTML Parsing will raise this error. """ From 3855a27be4f04d15e7ba7aee12f0220c93148d3d Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Sat, 23 Feb 2019 21:43:20 +0000 Subject: [PATCH 2/2] ENH: Add in sort keyword to DatetimeIndex.union (#25110) --- doc/source/styled.xlsx | Bin 0 -> 5682 bytes doc/source/whatsnew/v0.25.0.rst | 1 + pandas/core/indexes/datetimes.py | 38 ++++- pandas/tests/indexes/datetimes/test_setops.py | 138 ++++++++++++------ 4 files changed, 123 insertions(+), 54 deletions(-) create mode 100644 doc/source/styled.xlsx diff --git a/doc/source/styled.xlsx b/doc/source/styled.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..1233ff2b8692bad1cfcd54f59d26eb315cfa728c GIT binary patch literal 5682 zcmZ`-1z3}9_aEI#3Ze)|3o^QEv>+i}0uqkcMt6@=I!2e2sC0wWMu&ia0@58SjWqI~ z@B95fU%B3IyYA=Nu4})ubD!s&^E>xXQxyxF5&!_;13p(uKBi|H#zLaMQD6c9MCiMj zvo*v8$o=bDnxGNcE<@barjm9}tB>liko4Mw8eU*Jv0loFCo)o0`ul{pKoS{&O~q2K$v?!mX##}swl>kxCc&A6ncxA^jty9ve{7U(Bm4#& zy}?TKB1JDN2TM(72S*p+Ge<{mPbfq!MvbVQhgAMbWpftBu>rD>sZc^C9>>8T41HmP zgr}#k;q>)h&Ch7uEc<(xDqRJ`bkr{&B*X%`%4pPXr>hT&EZGz=N(W{h+yYwVkR>7o zV(^d;Qg4~KpNdb~yA6qY+IMP?>N}(nif*pi@v@(!7$3x#)Jc$>*I!h?=O0~S{^_s7 z3&@upw4a>l2SfHxe=QxHt$#T@nD9ikgNL-IEn=mmg_SNZ_gkc;1|b})t3Ga~&O9+} zrC_#RxYRg&182xpQ*ve-w)C>98d|}(? z8>b#x!*{ z-xA=>>@Ex9G>4NZrnC~-+*&fZq!9NA%u2MZQnjNAbAGdKG8fb_J+?g1414Bxe^qeJ ztWQM@$E12-W#a5v^>me<&PCZ9PsVKzS>@J*;xj+z%S8_=_m)bZ^Vn!09E#Ppop(Jt zX(pLWq__Y;CL20+(b4gQ09|dYq1Jz2d4I)c%gESeN`%sXr~IQQ_y|hWEfiE&L*ZiU zK3$U*vZYQRN@frZ@v@2!320LiA{|I3*-B7*pCN(GwRt|IptQar;&rgiZLtyd0a=$v?ZDD{_B|t`%7O4cKLpPEM@RW%RQP-(;$m3_ zL+-vuom)9k{Zp<2P}+s%Jx`Ikfze3EyVwg)nzGxo4k2v?AcC5F+mBY0M^k0-uir&$ zYm6QH&40_V$s~+6))=$*m>r9G*C7W`bxglu3&^671Om+>nMPHKr?C!!Iw;jEmETHI|6^C;J zAly0fSkt#0R2Q+(o-k?>H$Kc!t4WKW-W&8o3C`FxQ-1j)rKzug-#g1sY{(uFU|Wn) zwAGX`oOt1~v654iIH4vx)gIqW)^i6hAo&r+#LR$?PJ4 z%x5a`v9{GrMXs!a8~n5=#qv9D;WncZ-am0pGX)9n#^0gC1mCGvWDK!w+PzoK9Ftz! zPR1!KWpP`G*17gpP@U7;5OX8)?h#M*(HgP~!xF(nDf8a%gYj-d_|_xK2L_8RX=t$KSB3MY3rYnH)%Fs??XgxiZx;7HK`ZolEeoyJ4S+|hbr zCOntF>B!n@@&~VuubhBDoGQo<&EqO62>c=<+-j>T*Idl zX6rSBMEr8~&Z=MQ5UsVWA}WWK9~{;14G@ciB@*cBJYLZWo*zARUtucb(N2+Tpp|XK zg1*n15fjNyjc+6z(~5KGINj{>q+JsIg5Btsst;YC92q%ZM?I;?-;qeB>v^%C$GDvv zmO9)a7w+X_mI_Yl))b(GLQ0sq6zk0!Cib`}7p%-5zQPMGe}cKMrEAV>+>ON&bkt%OE;`*ykF63Cs{+KKK#?L zv^R&CO#K!Sxw0jK`n5{l!ENodPZiM_kH)X6c3d7{-tA|(#=)zb+f~#a%KKUv&(0& zL~-TEnOBKqWaG2vFP?*^ux5w+|SH*!!7l}d>Q zC~yUbshf_nkW)rf5aTdXJ1_6-B-(jIO`LHBK|^%-tUgos%e_QN@;l2->aZM?O|cS% zPA};UHqo}SnyYqr!?GT-T}_*QlB}VYR6NK}Tew#p?vml%(5_H&WiWp4ly6?taawE+ z%XE!`ORm5l(3vy5MMbf!Oy`u~DHFCz-S=o-cg@ zu4R<*p@3N|rA6uM%F^yBUSh96g9GB+=OXjr3hA``pCd~|qX`{ApJi61r$3R{9FYjz zjmI(NO{){iUr{HNnEAOwQqkTSv$z%6hVdu8>adM%$zTHj3O4`%n!gFe!@=3!#m3s& z)dl$1?JpK_OEGg;79vKrsl*+3-Tx8$U=&d6gF51Q)v)M33@a8wu{aI6Pj5}fsyG|U zzGaV_;^Rn=jz0sXNmXC;E5Ww&=b6i-)Qu%IlqkOPXd2VuAHz;thuFphbqgV=;eX>fuDne#?nV zWnh5Byt?i_J8Wqz^|j6EybomQdE#>mY5#22tZXQ2*TA}2HSi|Ng(r54Gy`_vcPUW@ zD-y_(=t(((^80T|`NtLoe*c0xcZX0Pw4SFP_VgY@F^Pp^=zLUy-Do9!EeC^?=Be#W zTdHqru)SnW3ABHCZh~3P(z-%^W@M43np!R2MV8rt)S~h5%x&cieL~N&D=Ql^LdpD` z77G)u1jdY`2r{!{F1osTa-v}N;YUWgt)sHa^a3d{E;)XL7$ZAoVV`I|JeZH-sZ}07 z>-{AoFj-`%z3Vo`2Lw3huF*HmT(ZXzw2Fy%(Nb|jk7cZsw!_j5!Nej}=8PhxIH%T+ zaDso}%g-^|spVAgDd@3UvM7IIt!8hxdCcM5la^0>o)2;o!PJxD2**H)#iUE*FBxE$ zBt#s@_j?Xe)%4p|G7scOG^#-I0`IlpV%;JbD1pQ$4H^)T*ADtpatk(M7rnJIcF7HQUkY8ldzPG3Z16-B-t0nuln;?~D$ zrZibbHELA#Q-e;45lGGR53e=|qk*~%!m+PJK_zU&Ds{WQqn~`@?gqM>dfB^@mN-)h zk&A$c3-_6rGIUW71((Gn1?9&VrOm<=6buk3av}Tv%tK5)Zj@@wLIXmJF&)8%d5emc zz&UoQuH#Y&yVppZN>0DmZNP_t^Oc9*%OumavO>kLlER*rseDcPuc;zxWEE8_-pD=I zwDL(*-Svk>FLL*%g{E!=H)5u|I!eY`b5k<)>w7>sOpNf(z|lKEi5P~k)Q`?#6d?Ak z1m0;JfixBL!WZog+MoN8HwrvZDG+^W0*+wni4HjiTQ(N=9{Fp|y)i#gP#u(5(1B?}4yCl-}iy zC*u48?Ob>y8$4<&bC4W^a%2vRrBj#Xq^)WeNk{NMice9t;Jm%${`GWU;eqE4zO|Yj zwMU#Q>a2RpY@+Ge{$#Xey(Fp6=6l}#3rK*N;a=AEMb%57rT5{*3_Vb&C#7;aha39& zbgkj!=AT_>KM8Bu1>J3`(CgP33_Z5G*gSK#w$gTWwuOORe$A~{Tfqftx_kTF(b;iB=Bo5U%<4Xi-A@ENp zpalEeh_AQk&{vKFwXd+xwaIT(uoY&Y1W1(E^D?fkUj1px#DPP-;a{ITRy26&1f#tUkflh3lR>ePbV zhT7VCz8_Qb{v5F^iF4lC4Z~&75XVK9&gCz=NO;!DKR@^STG7>A#i72vkwM|jq_E1I z(Q{z*v^M*B;`@X%JUvqyW2dI^?2kD8+I{v&SI+ctNtA26F*Xljv`R&{&DtGfzPhx| zVO_=Cu0{hJZqQ0|2pq|BmOceBr(j9hUm*AYY|qB)&Nt57>03O*PCjv9u@spx$Ww)K zx-hOow&b?>P(R%496@#jmfG%I-6P&*8?fi~4k*$0vjRV5D=OC#FxvY#pvdE|%i~{U zo?WF7Ueg^xz2Z_-C?9C75;Fs5r-SeGI}97N+2SQ!O$7y1&ihLE>@(IX9W+D2_94vr z2+H6%hC{BAh<8gtv$K)6FE`7Ky$0#n_ckgp%eVObKa_e7;u2_-Ey#(;vYj`H!}l&} ze>7`Z9a>#2B++~V13k}#RssNjCXCDKlEVr*Q9>~QfSZ3O%r7praB#5ymC(^~APp== z($FiyMZ8{EJ}c~f%SPeL5mwrrYMzDyBqXrzr!QW5w4^ufT1$$9>7*$;9a~=h34zqw zlc7ScU@gO88ix51cs!jdhs>g&UvV=tb|Scwym`uXHlJ`IZw9JA&iHmZlb zFg)dAeL@$2H*`WU#xC9`>P7)c8TTp`Ex4j&vCrzJ9xV#2G|6B+)NES$^3X5>A4BQ% zNnZ=)R{LC3RVxu?-4J(EUp}R+Bcz~Z)3~h`0s3}7U3krET+q``IWMOB%Z&7)hSp@_ z@o-5?k3PJ+Jn9FYd)uD@xk9qyxBLnSx{7H34#?lS%0I*MAB_b>7|cToXuIzvqYvH8 zeN$xGTz4t+@$-xHknhP~cQU-6fO7GG1kXA?$RtQqB+k&Ny{n`o_ABC`iuf`Ic%wO~ zDt0?^O5?$@ci+zqyNW6s>uA#OMXH$u#wx2m;(b0UZXgp`b3t*uHijG?{j!nPi^n|B zkv^h}q>&|QZ*5(*AP?+%!?_Bo-q&1N6~T)vqJDv77_U)f(n%rsL&k?eg?j zCe-SvX>{-qUvg@f+YBxsF`!KV{Z#A#24>hDL9?oskxQQ$n4HA}3!7<{mj~aEP02YA zj-j~et|%oVN)i`;&4|ZXAni0hdC7y{1R?mli$_KKX3t;U`?{8k(E6 z>wU|xwjxtXG4M{*^OU?Pc^`SPS&C$Q|K8kg>-X^?7W*>!BlMVKH_l8BwanoooPz=BFYx7yWrs*)AgJjel znmE-5)ipR#k4TWPW=YlEk0h*&kNF8BzAMJCCF{WxzES|a@}ozv1=hg7_Dy=DLCF!})Im zC-{FE_}5YWx|Qo>``Zc(O>6%~z1J;VXTskWDA31-Ul#t$i`Svod*W}XJX(E3L;u+d yu7j`lkKbTjv=oR2|Br5R-Olxr{B1{t{9g-HQxykI`TzhS`W=XVR++yF8SsDP;o9B+ literal 0 HcmV?d00001 diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index c0e00c7bf6f54e..83ca93bdfa7033 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -22,6 +22,7 @@ Other Enhancements - Indexing of ``DataFrame`` and ``Series`` now accepts zerodim ``np.ndarray`` (:issue:`24919`) - :meth:`Timestamp.replace` now supports the ``fold`` argument to disambiguate DST transition times (:issue:`25017`) - :meth:`DataFrame.at_time` and :meth:`Series.at_time` now support :meth:`datetime.time` objects with timezones (:issue:`24043`) +- :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behaviour of the sort parameter matches that of :meth:`Index.union` (:issue:`24994`) - .. _whatsnew_0250.api_breaking: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 1037e2d9a3bd63..a6697e8879b08b 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -460,7 +460,7 @@ def _formatter_func(self): # -------------------------------------------------------------------- # Set Operation Methods - def union(self, other): + def union(self, other, sort=None): """ Specialized union for DatetimeIndex objects. If combine overlapping ranges with the same DateOffset, will be much @@ -469,15 +469,29 @@ def union(self, other): Parameters ---------- other : DatetimeIndex or array-like + sort : bool or None, default None + Whether to sort the resulting Index. + + * None : Sort the result, except when + + 1. `self` and `other` are equal. + 2. `self` or `other` has length 0. + 3. Some values in `self` or `other` cannot be compared. + A RuntimeWarning is issued in this case. + + * False : do not sort the result + + .. versionadded:: 0.25.0 Returns ------- y : Index or DatetimeIndex """ + self._validate_sort_keyword(sort) self._assert_can_do_setop(other) if len(other) == 0 or self.equals(other) or len(self) == 0: - return super(DatetimeIndex, self).union(other) + return super(DatetimeIndex, self).union(other, sort=sort) if not isinstance(other, DatetimeIndex): try: @@ -488,9 +502,9 @@ def union(self, other): this, other = self._maybe_utc_convert(other) if this._can_fast_union(other): - return this._fast_union(other) + return this._fast_union(other, sort=sort) else: - result = Index.union(this, other) + result = Index.union(this, other, sort=sort) if isinstance(result, DatetimeIndex): # TODO: we shouldn't be setting attributes like this; # in all the tests this equality already holds @@ -563,16 +577,28 @@ def _can_fast_union(self, other): # this will raise return False - def _fast_union(self, other): + def _fast_union(self, other, sort=None): if len(other) == 0: return self.view(type(self)) if len(self) == 0: return other.view(type(self)) - # to make our life easier, "sort" the two ranges + # Both DTIs are monotonic. Check if they are already + # in the "correct" order if self[0] <= other[0]: left, right = self, other + # DTIs are not in the "correct" order and we don't want + # to sort but want to remove overlaps + elif sort is False: + left, right = self, other + left_start = left[0] + loc = right.searchsorted(left_start, side='left') + right_chunk = right.values[:loc] + dates = _concat._concat_compat((left.values, right_chunk)) + return self._shallow_copy(dates) + # DTIs are not in the "correct" order and we want + # to sort else: left, right = other, self diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 19009e45ee83a4..cf1f75234ec621 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -21,83 +21,107 @@ class TestDatetimeIndexSetOps(object): 'dateutil/US/Pacific'] # TODO: moved from test_datetimelike; dedup with version below - def test_union2(self): + @pytest.mark.parametrize("sort", [None, False]) + def test_union2(self, sort): everything = tm.makeDateIndex(10) first = everything[:5] second = everything[5:] - union = first.union(second) - assert tm.equalContents(union, everything) + union = first.union(second, sort=sort) + tm.assert_index_equal(union, everything) # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: - result = first.union(case) - assert tm.equalContents(result, everything) + result = first.union(case, sort=sort) + tm.assert_index_equal(result, everything) @pytest.mark.parametrize("tz", tz) - def test_union(self, tz): + @pytest.mark.parametrize("sort", [None, False]) + def test_union(self, tz, sort): rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz) expected1 = pd.date_range('1/1/2000', freq='D', periods=10, tz=tz) + expected1_notsorted = pd.DatetimeIndex(list(other1) + list(rng1)) rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz) expected2 = pd.date_range('1/1/2000', freq='D', periods=8, tz=tz) + expected2_notsorted = pd.DatetimeIndex(list(other2) + list(rng2[:3])) rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) other3 = pd.DatetimeIndex([], tz=tz) expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + expected3_notsorted = rng3 - for rng, other, expected in [(rng1, other1, expected1), - (rng2, other2, expected2), - (rng3, other3, expected3)]: + for rng, other, exp, exp_notsorted in [(rng1, other1, expected1, + expected1_notsorted), + (rng2, other2, expected2, + expected2_notsorted), + (rng3, other3, expected3, + expected3_notsorted)]: - result_union = rng.union(other) - tm.assert_index_equal(result_union, expected) + result_union = rng.union(other, sort=sort) + tm.assert_index_equal(result_union, exp) - def test_union_coverage(self): + result_union = other.union(rng, sort=sort) + if sort is None: + tm.assert_index_equal(result_union, exp) + else: + tm.assert_index_equal(result_union, exp_notsorted) + + @pytest.mark.parametrize("sort", [None, False]) + def test_union_coverage(self, sort): idx = DatetimeIndex(['2000-01-03', '2000-01-01', '2000-01-02']) ordered = DatetimeIndex(idx.sort_values(), freq='infer') - result = ordered.union(idx) + result = ordered.union(idx, sort=sort) tm.assert_index_equal(result, ordered) - result = ordered[:0].union(ordered) + result = ordered[:0].union(ordered, sort=sort) tm.assert_index_equal(result, ordered) assert result.freq == ordered.freq - def test_union_bug_1730(self): + @pytest.mark.parametrize("sort", [None, False]) + def test_union_bug_1730(self, sort): rng_a = date_range('1/1/2012', periods=4, freq='3H') rng_b = date_range('1/1/2012', periods=4, freq='4H') - result = rng_a.union(rng_b) + result = rng_a.union(rng_b, sort=sort) exp = DatetimeIndex(sorted(set(list(rng_a)) | set(list(rng_b)))) tm.assert_index_equal(result, exp) - def test_union_bug_1745(self): + @pytest.mark.parametrize("sort", [None, False]) + def test_union_bug_1745(self, sort): left = DatetimeIndex(['2012-05-11 15:19:49.695000']) right = DatetimeIndex(['2012-05-29 13:04:21.322000', '2012-05-11 15:27:24.873000', '2012-05-11 15:31:05.350000']) - result = left.union(right) - exp = DatetimeIndex(sorted(set(list(left)) | set(list(right)))) + result = left.union(right, sort=sort) + exp = DatetimeIndex(['2012-05-11 15:19:49.695000', + '2012-05-29 13:04:21.322000', + '2012-05-11 15:27:24.873000', + '2012-05-11 15:31:05.350000']) + if sort is None: + exp = exp.sort_values() tm.assert_index_equal(result, exp) - def test_union_bug_4564(self): + @pytest.mark.parametrize("sort", [None, False]) + def test_union_bug_4564(self, sort): from pandas import DateOffset left = date_range("2013-01-01", "2013-02-01") right = left + DateOffset(minutes=15) - result = left.union(right) + result = left.union(right, sort=sort) exp = DatetimeIndex(sorted(set(list(left)) | set(list(right)))) tm.assert_index_equal(result, exp) - def test_union_freq_both_none(self): + @pytest.mark.parametrize("sort", [None, False]) + def test_union_freq_both_none(self, sort): # GH11086 expected = bdate_range('20150101', periods=10) expected.freq = None - result = expected.union(expected) + result = expected.union(expected, sort=sort) tm.assert_index_equal(result, expected) assert result.freq is None @@ -112,11 +136,14 @@ def test_union_dataframe_index(self): exp = pd.date_range('1/1/1980', '1/1/2012', freq='MS') tm.assert_index_equal(df.index, exp) - def test_union_with_DatetimeIndex(self): + @pytest.mark.parametrize("sort", [None, False]) + def test_union_with_DatetimeIndex(self, sort): i1 = Int64Index(np.arange(0, 20, 2)) i2 = date_range(start='2012-01-03 00:00:00', periods=10, freq='D') - i1.union(i2) # Works - i2.union(i1) # Fails with "AttributeError: can't set attribute" + # Works + i1.union(i2, sort=sort) + # Fails with "AttributeError: can't set attribute" + i2.union(i1, sort=sort) # TODO: moved from test_datetimelike; de-duplicate with version below def test_intersection2(self): @@ -262,11 +289,12 @@ def test_datetimeindex_diff(self, sort): periods=98) assert len(dti1.difference(dti2, sort)) == 2 - def test_datetimeindex_union_join_empty(self): + @pytest.mark.parametrize("sort", [None, False]) + def test_datetimeindex_union_join_empty(self, sort): dti = date_range(start='1/1/2001', end='2/1/2001', freq='D') empty = Index([]) - result = dti.union(empty) + result = dti.union(empty, sort=sort) assert isinstance(result, DatetimeIndex) assert result is result @@ -287,35 +315,40 @@ class TestBusinessDatetimeIndex(object): def setup_method(self, method): self.rng = bdate_range(START, END) - def test_union(self): + @pytest.mark.parametrize("sort", [None, False]) + def test_union(self, sort): # overlapping left = self.rng[:10] right = self.rng[5:10] - the_union = left.union(right) + the_union = left.union(right, sort=sort) assert isinstance(the_union, DatetimeIndex) # non-overlapping, gap in middle left = self.rng[:5] right = self.rng[10:] - the_union = left.union(right) + the_union = left.union(right, sort=sort) assert isinstance(the_union, Index) # non-overlapping, no gap left = self.rng[:5] right = self.rng[5:10] - the_union = left.union(right) + the_union = left.union(right, sort=sort) assert isinstance(the_union, DatetimeIndex) # order does not matter - tm.assert_index_equal(right.union(left), the_union) + if sort is None: + tm.assert_index_equal(right.union(left, sort=sort), the_union) + else: + expected = pd.DatetimeIndex(list(right) + list(left)) + tm.assert_index_equal(right.union(left, sort=sort), expected) # overlapping, but different offset rng = date_range(START, END, freq=BMonthEnd()) - the_union = self.rng.union(rng) + the_union = self.rng.union(rng, sort=sort) assert isinstance(the_union, DatetimeIndex) def test_outer_join(self): @@ -350,16 +383,21 @@ def test_outer_join(self): assert isinstance(the_join, DatetimeIndex) assert the_join.freq is None - def test_union_not_cacheable(self): + @pytest.mark.parametrize("sort", [None, False]) + def test_union_not_cacheable(self, sort): rng = date_range('1/1/2000', periods=50, freq=Minute()) rng1 = rng[10:] rng2 = rng[:25] - the_union = rng1.union(rng2) - tm.assert_index_equal(the_union, rng) + the_union = rng1.union(rng2, sort=sort) + if sort is None: + tm.assert_index_equal(the_union, rng) + else: + expected = pd.DatetimeIndex(list(rng[10:]) + list(rng[:10])) + tm.assert_index_equal(the_union, expected) rng1 = rng[10:] rng2 = rng[15:35] - the_union = rng1.union(rng2) + the_union = rng1.union(rng2, sort=sort) expected = rng[10:] tm.assert_index_equal(the_union, expected) @@ -388,7 +426,8 @@ def test_intersection_bug(self): result = a.intersection(b) tm.assert_index_equal(result, b) - def test_month_range_union_tz_pytz(self): + @pytest.mark.parametrize("sort", [None, False]) + def test_month_range_union_tz_pytz(self, sort): from pytz import timezone tz = timezone('US/Eastern') @@ -403,10 +442,11 @@ def test_month_range_union_tz_pytz(self): late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd()) - early_dr.union(late_dr) + early_dr.union(late_dr, sort=sort) @td.skip_if_windows_python_3 - def test_month_range_union_tz_dateutil(self): + @pytest.mark.parametrize("sort", [None, False]) + def test_month_range_union_tz_dateutil(self, sort): from pandas._libs.tslibs.timezones import dateutil_gettz tz = dateutil_gettz('US/Eastern') @@ -421,7 +461,7 @@ def test_month_range_union_tz_dateutil(self): late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd()) - early_dr.union(late_dr) + early_dr.union(late_dr, sort=sort) class TestCustomDatetimeIndex(object): @@ -429,35 +469,37 @@ class TestCustomDatetimeIndex(object): def setup_method(self, method): self.rng = bdate_range(START, END, freq='C') - def test_union(self): + @pytest.mark.parametrize("sort", [None, False]) + def test_union(self, sort): # overlapping left = self.rng[:10] right = self.rng[5:10] - the_union = left.union(right) + the_union = left.union(right, sort=sort) assert isinstance(the_union, DatetimeIndex) # non-overlapping, gap in middle left = self.rng[:5] right = self.rng[10:] - the_union = left.union(right) + the_union = left.union(right, sort) assert isinstance(the_union, Index) # non-overlapping, no gap left = self.rng[:5] right = self.rng[5:10] - the_union = left.union(right) + the_union = left.union(right, sort=sort) assert isinstance(the_union, DatetimeIndex) # order does not matter - tm.assert_index_equal(right.union(left), the_union) + if sort is None: + tm.assert_index_equal(right.union(left, sort=sort), the_union) # overlapping, but different offset rng = date_range(START, END, freq=BMonthEnd()) - the_union = self.rng.union(rng) + the_union = self.rng.union(rng, sort=sort) assert isinstance(the_union, DatetimeIndex) def test_outer_join(self):