Skip to content

Commit

Permalink
fix limit
Browse files Browse the repository at this point in the history
  • Loading branch information
tompollard committed Mar 9, 2018
1 parent 0b0cfb9 commit 931a687
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 9 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# Versions should comply with PEP440. For a discussion on single-sourcing
# the version across setup.py and the project code, see
# https://packaging.python.org/en/latest/single_source_version.html
version='0.5.0',
version='0.5.1',

description='TableOne',
long_description=long_description,
Expand Down
19 changes: 11 additions & 8 deletions tableone.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"""

__author__ = "Tom Pollard <tpollard@mit.edu>, Alistair Johnson"
__version__ = "0.5.0"
__version__ = "0.5.1"

import pandas as pd
from scipy import stats
Expand Down Expand Up @@ -483,23 +483,26 @@ def _create_tableone(self,data):
table.reset_index().set_index(['variable','level'], inplace=True)
if self.sort:
# alphabetical
newindex = sorted(table.index.values)
new_index = sorted(table.index.values)
else:
# sort by the columns argument
newindex = sorted(table.index.values,key=lambda x: self.columns.index(x[0]))
table = table.reindex(newindex)
new_index = sorted(table.index.values,key=lambda x: self.columns.index(x[0]))
table = table.reindex(new_index)

# if a limit has been set on the number of categorical variables
# then order the variables by frequency
# then re-order the variables by frequency
if self.limit:
levelcounts = data[self.categorical].nunique()
levelcounts = levelcounts[levelcounts >= self.limit]
for v,_ in levelcounts.iteritems():
count = data[v].value_counts().sort_values(ascending=False)
new_index = [(v, i) for i in count.index]
old_index = table.index.values.copy()
old_index[table.index.get_loc(v)] = new_index
table = table.reindex(old_index)
# restructure to match orig_index
new_index_array=np.empty((len(new_index),), dtype=object)
new_index_array[:]=[tuple(i) for i in new_index]
orig_index = table.index.values.copy()
orig_index[table.index.get_loc(v)] = new_index_array
table = table.reindex(orig_index)

# inserts n row
n_row = pd.DataFrame(columns = ['variable','level','isnull'])
Expand Down

0 comments on commit 931a687

Please sign in to comment.