Skip to content

Commit

Permalink
Merge pull request #1984 from martinholmer/fix-bins
Browse files Browse the repository at this point in the history
Simplify table-creation Calculator methods and related utility functions
  • Loading branch information
martinholmer committed Apr 26, 2018
2 parents f555ec7 + 1308118 commit a83f298
Show file tree
Hide file tree
Showing 26 changed files with 493 additions and 674 deletions.
115 changes: 38 additions & 77 deletions taxcalc/calculate.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,18 +422,15 @@ def diagnostic_table(self, num_years):
del diag
return pd.concat(tlist, axis=1)

def distribution_tables(self, calc,
groupby='weighted_deciles',
income_measure='expanded_income',
result_type='weighted_sum'):
"""
Get results from self and calc, sort them based on groupby using
income_measure, compute grouped statistics based on result_type,
and return tables as a pair of Pandas dataframes.
def distribution_tables(self, calc, groupby):
"""
Get results from self and calc, sort them by expanded_income into
table rows defined by groupby, compute grouped statistics, and
return tables as a pair of Pandas dataframes.
This method leaves the Calculator object(s) unchanged.
Note that the returned tables have consistent income groups (based
on the self income_measure) even though the baseline income_measure
in self and the income_measure in calc are different.
on the self expanded_income) even though the baseline expanded_income
in self and the reform expanded_income in calc are different.
Parameters
----------
Expand All @@ -442,23 +439,14 @@ def distribution_tables(self, calc,
if calc is None, the second returned table is None
groupby : String object
options for input: 'weighted_deciles', 'standard_income_bins',
'large_income_bins', 'small_income_bins';
options for input: 'weighted_deciles', 'standard_income_bins'
determines how the columns in resulting Pandas DataFrame are sorted
income_measure : String object
options for input: 'expanded_income' or 'c00100'(AGI)
specifies statistic used to place filing units in bins or deciles
result_type : String object
options for input: 'weighted_sum' or 'weighted_avg';
determines how the table statistices are computed
Return and typical usage
------------------------
dist1, dist2 = calc1.distribution_tables(calc2)
dist1, dist2 = calc1.distribution_tables(calc2, 'weighted_deciles')
OR
dist1, _ = calc1.distribution_tables(None)
dist1, _ = calc1.distribution_tables(None, 'weighted_deciles')
(where calc1 is a baseline Calculator object
and calc2 is a reform Calculator object).
Each of the dist1 and optional dist2 is a distribution table as a
Expand All @@ -475,90 +463,69 @@ def distribution_tables(self, calc,
specified income_measure.
"""
# nested function used only by this method
def have_same_income_measure(calc1, calc2, income_measure):
def have_same_income_measure(calc1, calc2):
"""
Return true if calc1 and calc2 contain the same income_measure;
Return true if calc1 and calc2 contain the same expanded_income;
otherwise, return false. (Note that "same" means nobody's
income_measure differs by more than one cent.)
expanded_income differs by more than one cent.)
"""
im1 = calc1.array(income_measure)
im2 = calc2.array(income_measure)
im1 = calc1.array('expanded_income')
im2 = calc2.array('expanded_income')
return np.allclose(im1, im2, rtol=0.0, atol=0.01)
# main logic of method
assert calc is None or isinstance(calc, Calculator)
assert (groupby == 'weighted_deciles' or
groupby == 'standard_income_bins' or
groupby == 'large_income_bins' or
groupby == 'small_income_bins')
assert (income_measure == 'expanded_income' or
income_measure == 'c00100')
assert (result_type == 'weighted_sum' or
result_type == 'weighted_avg')
groupby == 'standard_income_bins')
if calc is not None:
assert np.allclose(self.array('s006'),
calc.array('s006')) # check rows in same order
var_dataframe = self.distribution_table_dataframe()
dt1 = create_distribution_table(var_dataframe,
groupby=groupby,
income_measure=income_measure,
result_type=result_type)
imeasure = 'expanded_income'
dt1 = create_distribution_table(var_dataframe, groupby, imeasure)
del var_dataframe
if calc is None:
dt2 = None
else:
assert calc.current_year == self.current_year
assert calc.array_len == self.array_len
if income_measure == 'expanded_income':
assert np.allclose(self.consump_benval_params(),
calc.consump_benval_params())
assert np.allclose(self.consump_benval_params(),
calc.consump_benval_params())
var_dataframe = calc.distribution_table_dataframe()
if have_same_income_measure(self, calc, income_measure):
imeasure = income_measure
if have_same_income_measure(self, calc):
imeasure = 'expanded_income'
else:
imeasure = income_measure + '_baseline'
var_dataframe[imeasure] = self.array(income_measure)
dt2 = create_distribution_table(var_dataframe,
groupby=groupby,
income_measure=imeasure,
result_type=result_type)
imeasure = 'expanded_income_baseline'
var_dataframe[imeasure] = self.array('expanded_income')
dt2 = create_distribution_table(var_dataframe, groupby, imeasure)
del var_dataframe
return dt1, dt2
return (dt1, dt2)

def difference_table(self, calc,
groupby='weighted_deciles',
income_measure='expanded_income',
tax_to_diff='combined'):
def difference_table(self, calc, groupby, tax_to_diff):
"""
Get results from self and calc, sort them based on groupby using
income_measure, and return tax-difference table as a Pandas dataframe.
Get results from self and calc, sort them by expanded_income into
table rows defined by groupby, compute grouped statistics, and
return tax-difference table as a Pandas dataframe.
This method leaves the Calculator objects unchanged.
Note that the returned tables have consistent income groups (based
on the self income_measure) even though the baseline income_measure
in self and the income_measure in calc are different.
Note that filing units are put into groupby categories using the
specified income_measure in the baseline (self) situation.
on the self expanded_income) even though the baseline expanded_income
in self and the reform expanded_income in calc are different.
Parameters
----------
calc : Calculator object
calc represents the reform while self represents the baseline
groupby : String object
options for input: 'weighted_deciles', 'standard_income_bins',
'large_income_bins', 'small_income_bins';
options for input: 'weighted_deciles', 'standard_income_bins'
determines how the columns in resulting Pandas DataFrame are sorted
income_measure : String object
options for input: 'expanded_income' or 'c00100'(AGI)
specifies statistic used to place filing units in bins or deciles
tax_to_diff : String object
options for input: 'iitax', 'payrolltax', 'combined'
specifies which tax to difference
Returns and typical usage
-------------------------
diff = calc1.difference_table(calc2)
diff = calc1.difference_table(calc2, 'weighted_deciles', 'iitax')
(where calc1 is a baseline Calculator object
and calc2 is a reform Calculator object).
The returned diff is a difference table as a Pandas DataFrame
Expand All @@ -577,16 +544,13 @@ def difference_table(self, calc,
assert isinstance(calc, Calculator)
assert calc.current_year == self.current_year
assert calc.array_len == self.array_len
if income_measure == 'expanded_income':
assert np.allclose(self.consump_benval_params(),
calc.consump_benval_params())
assert np.allclose(self.consump_benval_params(),
calc.consump_benval_params())
self_var_dataframe = self.dataframe(DIFF_VARIABLES)
calc_var_dataframe = calc.dataframe(DIFF_VARIABLES)
diff = create_difference_table(self_var_dataframe,
calc_var_dataframe,
groupby=groupby,
income_measure=income_measure,
tax_to_diff=tax_to_diff)
groupby, tax_to_diff)
del self_var_dataframe
del calc_var_dataframe
return diff
Expand Down Expand Up @@ -1130,10 +1094,7 @@ def decile_graph(self, calc,
assert isinstance(calc, Calculator)
assert calc.current_year == self.current_year
assert calc.array_len == self.array_len
dt1, dt2 = self.distribution_tables(calc,
groupby='weighted_deciles',
income_measure='expanded_income',
result_type='weighted_sum')
dt1, dt2 = self.distribution_tables(calc, 'weighted_deciles')
# construct data for graph
data = dec_graph_data(
dt1, dt2, year=self.current_year,
Expand Down
14 changes: 7 additions & 7 deletions taxcalc/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,9 @@ def set_default_vals(self, known_years=999999):
"""
if hasattr(self, '_vals'):
for name, data in self._vals.items():
intg_val = data.get('integer_value', None)
bool_val = data.get('boolean_value', None)
values = data.get('value', None)
intg_val = data.get('integer_value')
bool_val = data.get('boolean_value')
values = data.get('value')
if values:
cpi_inflated = data.get('cpi_inflated', False)
if cpi_inflated:
Expand Down Expand Up @@ -357,8 +357,8 @@ def _update(self, year_mods):
if name.endswith('_cpi'):
continue # handle elsewhere in this method
vals_indexed = self._vals[name].get('cpi_inflated', False)
intg_val = self._vals[name].get('integer_value', None)
bool_val = self._vals[name].get('boolean_value', None)
intg_val = self._vals[name].get('integer_value')
bool_val = self._vals[name].get('boolean_value')
name_plus_cpi = name + '_cpi'
if name_plus_cpi in year_mods[year].keys():
used_names.add(name_plus_cpi)
Expand Down Expand Up @@ -388,8 +388,8 @@ def _update(self, year_mods):
pvalues = [cval[year - self.start_year]]
index_rates = self._indexing_rates_for_update(name, year,
num_years_to_expand)
intg_val = self._vals[pname].get('integer_value', None)
bool_val = self._vals[pname].get('boolean_value', None)
intg_val = self._vals[pname].get('integer_value')
bool_val = self._vals[pname].get('boolean_value')
nval = self._expand_array(pvalues, intg_val, bool_val,
inflate=pindexed,
inflation_rates=index_rates,
Expand Down
28 changes: 15 additions & 13 deletions taxcalc/reforms/2017_law.out
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
AGI pexempt stdded taxinc tax-wo-credits inctax paytax ataxinc
0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0
5 60000 12995 16792 30213 3466 489 9180 54921
6 0 0 0 0 0 0 0 0
7 180000 30322 30375 119303 16514 13514 27540 152716
8 0 0 0 0 0 0 0 0
9 240000 25990 23584 190426 32344 30844 36720 190796
10 600000 18875 20375 560750 142810 142810 66067 423437
sums 1080000 88182 91126 900692 195134 187657 139507 821869
AGI pexempt stdded taxinc tax-wo-credits inctax paytax ataxinc
<$0K 0 0 0 0 0 0 0 0
=$0K 0 0 0 0 0 0 0 0
$0-10K 0 0 0 0 0 0 0 0
$10-20K 0 0 0 0 0 0 0 0
$20-30K 0 0 0 0 0 0 0 0
$30-40K 60000 12995 16792 30213 3466 489 9180 54921
$40-50K 0 0 0 0 0 0 0 0
$50-75K 180000 30322 30375 119303 16514 13514 27540 152716
$75-100K 0 0 0 0 0 0 0 0
$100-200K 240000 25990 23584 190426 32344 30844 36720 190796
$200-500K 600000 18875 20375 560750 142810 142810 66067 423437
$500-1000K 0 0 0 0 0 0 0 0
>$1000K 0 0 0 0 0 0 0 0
ALL 1080000 88182 91126 900692 195134 187657 139507 821869
28 changes: 15 additions & 13 deletions taxcalc/reforms/BrownKhanna.out
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
AGI pexempt stdded taxinc tax-wo-credits inctax paytax ataxinc
0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0
5 60000 12995 16792 30213 3466 -4405 9180 59815
6 0 0 0 0 0 0 0 0
7 180000 30322 30375 119303 16514 8698 27540 157532
8 0 0 0 0 0 0 0 0
9 240000 25990 23584 190426 32344 30844 36720 190796
10 600000 18875 20375 560750 142810 142810 66067 423437
sums 1080000 88182 91126 900692 195134 177947 139507 831580
AGI pexempt stdded taxinc tax-wo-credits inctax paytax ataxinc
<$0K 0 0 0 0 0 0 0 0
=$0K 0 0 0 0 0 0 0 0
$0-10K 0 0 0 0 0 0 0 0
$10-20K 0 0 0 0 0 0 0 0
$20-30K 0 0 0 0 0 0 0 0
$30-40K 60000 12995 16792 30213 3466 -4405 9180 59815
$40-50K 0 0 0 0 0 0 0 0
$50-75K 180000 30322 30375 119303 16514 8698 27540 157532
$75-100K 0 0 0 0 0 0 0 0
$100-200K 240000 25990 23584 190426 32344 30844 36720 190796
$200-500K 600000 18875 20375 560750 142810 142810 66067 423437
$500-1000K 0 0 0 0 0 0 0 0
>$1000K 0 0 0 0 0 0 0 0
ALL 1080000 88182 91126 900692 195134 177947 139507 831580
28 changes: 15 additions & 13 deletions taxcalc/reforms/Clinton2016.out
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
AGI pexempt stdded taxinc tax-wo-credits inctax paytax ataxinc
0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0
5 60000 12995 16792 30213 3466 -511 9180 55921
6 0 0 0 0 0 0 0 0
7 180000 30322 30375 119303 16514 11514 27540 154716
8 0 0 0 0 0 0 0 0
9 240000 25990 23584 190426 32344 29844 36720 191796
10 600000 18875 20375 560750 142810 142810 66067 423437
sums 1080000 88182 91126 900692 195134 183657 139507 825869
AGI pexempt stdded taxinc tax-wo-credits inctax paytax ataxinc
<$0K 0 0 0 0 0 0 0 0
=$0K 0 0 0 0 0 0 0 0
$0-10K 0 0 0 0 0 0 0 0
$10-20K 0 0 0 0 0 0 0 0
$20-30K 0 0 0 0 0 0 0 0
$30-40K 60000 12995 16792 30213 3466 -511 9180 55921
$40-50K 0 0 0 0 0 0 0 0
$50-75K 180000 30322 30375 119303 16514 11514 27540 154716
$75-100K 0 0 0 0 0 0 0 0
$100-200K 240000 25990 23584 190426 32344 29844 36720 191796
$200-500K 600000 18875 20375 560750 142810 142810 66067 423437
$500-1000K 0 0 0 0 0 0 0 0
>$1000K 0 0 0 0 0 0 0 0
ALL 1080000 88182 91126 900692 195134 183657 139507 825869

0 comments on commit a83f298

Please sign in to comment.