Skip to content

Commit

Permalink
Ensure index used in reindex() is unique
Browse files Browse the repository at this point in the history
pandas.Series.reindex allows non unique indices to be used for
reindexing which results in duplicated data. This should not be allowed
since it will lead to unexpected downstream behaviour.
  • Loading branch information
matthewgilbert committed Aug 18, 2018
1 parent fed7daa commit bda6baa
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 0 deletions.
8 changes: 8 additions & 0 deletions mapping/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,3 +766,11 @@ def test_reindex(self):

exp_rets = pd.Series([0.02, np.NaN], index=widx)
assert_series_equal(exp_rets, new_rets)

# check unique index to avoid duplicates from pd.Series.reindex
idx = pd.MultiIndex.from_tuples([(TS('2015-01-03'), 'CLF5'),
(TS('2015-01-04'), 'CLF5')])
returns = pd.Series([0.02, -0.02], index=idx)
widx = pd.MultiIndex.from_tuples([(TS('2015-01-03'), 'CLF5'),
(TS('2015-01-03'), 'CLF5')])
self.assertRaises(ValueError, util.reindex, returns, widx, limit=1)
3 changes: 3 additions & 0 deletions mapping/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,9 @@ def reindex(returns, index, limit):
to be dropped since it is not present in weights. A discussion of this
issue available at https://github.com/matthewgilbert/mapping/issues/9
"""
if not index.is_unique:
raise ValueError("'index' must be unique")

cumulative_rets = (returns + 1).groupby(level=1).cumprod()
# reindexing can both drop days and introduce NaNs for days not present
cumulative_rets = cumulative_rets.reindex(index)
Expand Down

0 comments on commit bda6baa

Please sign in to comment.