Ensure index used in reindex() is unique

pandas.Series.reindex allows non unique indices to be used for reindexing which results in duplicated data. This should not be allowed since it will lead to unexpected downstream behaviour.
matthewgilbert · Aug 18, 2018 · bda6baa · bda6baa
1 parent fed7daa
commit bda6baa
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 0 deletions.
diff --git a/mapping/tests/test_util.py b/mapping/tests/test_util.py
@@ -766,3 +766,11 @@ def test_reindex(self):
 
         exp_rets = pd.Series([0.02, np.NaN], index=widx)
         assert_series_equal(exp_rets, new_rets)
+
+        # check unique index to avoid duplicates from pd.Series.reindex
+        idx = pd.MultiIndex.from_tuples([(TS('2015-01-03'), 'CLF5'),
+                                         (TS('2015-01-04'), 'CLF5')])
+        returns = pd.Series([0.02, -0.02], index=idx)
+        widx = pd.MultiIndex.from_tuples([(TS('2015-01-03'), 'CLF5'),
+                                          (TS('2015-01-03'), 'CLF5')])
+        self.assertRaises(ValueError, util.reindex, returns, widx, limit=1)
diff --git a/mapping/util.py b/mapping/util.py
@@ -326,6 +326,9 @@ def reindex(returns, index, limit):
     to be dropped since it is not present in weights. A discussion of this
     issue available at https://github.com/matthewgilbert/mapping/issues/9
     """
+    if not index.is_unique:
+        raise ValueError("'index' must be unique")
+
     cumulative_rets = (returns + 1).groupby(level=1).cumprod()
     # reindexing can both drop days and introduce NaNs for days not present
     cumulative_rets = cumulative_rets.reindex(index)