fix limit

tompollard · Mar 9, 2018 · 931a687 · 931a687
1 parent 0b0cfb9
commit 931a687
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 9 deletions.
diff --git a/setup.py b/setup.py
@@ -17,7 +17,7 @@
     # Versions should comply with PEP440. For a discussion on single-sourcing
     # the version across setup.py and the project code, see
     # https://packaging.python.org/en/latest/single_source_version.html
-    version='0.5.0',
+    version='0.5.1',
 
     description='TableOne',
     long_description=long_description,

diff --git a/tableone.py b/tableone.py
@@ -5,7 +5,7 @@
 """
 
 __author__ = "Tom Pollard <tpollard@mit.edu>, Alistair Johnson"
-__version__ = "0.5.0"
+__version__ = "0.5.1"
 
 import pandas as pd
 from scipy import stats
@@ -483,23 +483,26 @@ def _create_tableone(self,data):
         table.reset_index().set_index(['variable','level'], inplace=True)
         if self.sort:
             # alphabetical
-            newindex = sorted(table.index.values)
+            new_index = sorted(table.index.values)
         else:
             # sort by the columns argument
-            newindex = sorted(table.index.values,key=lambda x: self.columns.index(x[0]))
-        table = table.reindex(newindex)
+            new_index = sorted(table.index.values,key=lambda x: self.columns.index(x[0]))
+        table = table.reindex(new_index)
 
         # if a limit has been set on the number of categorical variables
-        # then order the variables by frequency
+        # then re-order the variables by frequency
         if self.limit:
             levelcounts = data[self.categorical].nunique()
             levelcounts = levelcounts[levelcounts >= self.limit]
             for v,_ in levelcounts.iteritems():
                 count = data[v].value_counts().sort_values(ascending=False)
                 new_index = [(v, i) for i in count.index]
-                old_index = table.index.values.copy()
-                old_index[table.index.get_loc(v)] = new_index
-                table = table.reindex(old_index)
+                # restructure to match orig_index
+                new_index_array=np.empty((len(new_index),), dtype=object)
+                new_index_array[:]=[tuple(i) for i in new_index]
+                orig_index = table.index.values.copy()
+                orig_index[table.index.get_loc(v)] = new_index_array
+                table = table.reindex(orig_index)
 
         # inserts n row
         n_row = pd.DataFrame(columns = ['variable','level','isnull'])