Merge pull request #1984 from martinholmer/fix-bins

Simplify table-creation Calculator methods and related utility functions
PSLmodels · Apr 26, 2018 · a83f298 · a83f298
2 parents f555ec7 + 1308118
commit a83f298
Show file tree

Hide file tree

Showing 26 changed files with 493 additions and 674 deletions.
diff --git a/taxcalc/calculate.py b/taxcalc/calculate.py
@@ -422,18 +422,15 @@ def diagnostic_table(self, num_years):
         del diag
         return pd.concat(tlist, axis=1)
 
-    def distribution_tables(self, calc,
-                            groupby='weighted_deciles',
-                            income_measure='expanded_income',
-                            result_type='weighted_sum'):
-        """
-        Get results from self and calc, sort them based on groupby using
-        income_measure, compute grouped statistics based on result_type,
-        and return tables as a pair of Pandas dataframes.
+    def distribution_tables(self, calc, groupby):
+        """
+        Get results from self and calc, sort them by expanded_income into
+        table rows defined by groupby, compute grouped statistics, and
+        return tables as a pair of Pandas dataframes.
         This method leaves the Calculator object(s) unchanged.
         Note that the returned tables have consistent income groups (based
-        on the self income_measure) even though the baseline income_measure
-        in self and the income_measure in calc are different.
+        on the self expanded_income) even though the baseline expanded_income
+        in self and the reform expanded_income in calc are different.
 
         Parameters
         ----------
@@ -442,23 +439,14 @@ def distribution_tables(self, calc,
             if calc is None, the second returned table is None
 
         groupby : String object
-            options for input: 'weighted_deciles', 'standard_income_bins',
-                               'large_income_bins', 'small_income_bins';
+            options for input: 'weighted_deciles', 'standard_income_bins'
             determines how the columns in resulting Pandas DataFrame are sorted
 
-        income_measure : String object
-            options for input: 'expanded_income' or 'c00100'(AGI)
-            specifies statistic used to place filing units in bins or deciles
-
-        result_type : String object
-            options for input: 'weighted_sum' or 'weighted_avg';
-            determines how the table statistices are computed
-
         Return and typical usage
         ------------------------
-        dist1, dist2 = calc1.distribution_tables(calc2)
+        dist1, dist2 = calc1.distribution_tables(calc2, 'weighted_deciles')
         OR
-        dist1, _ = calc1.distribution_tables(None)
+        dist1, _ = calc1.distribution_tables(None, 'weighted_deciles')
         (where calc1 is a baseline Calculator object
         and calc2 is a reform Calculator object).
         Each of the dist1 and optional dist2 is a distribution table as a
@@ -475,90 +463,69 @@ def distribution_tables(self, calc,
               specified income_measure.
         """
         # nested function used only by this method
-        def have_same_income_measure(calc1, calc2, income_measure):
+        def have_same_income_measure(calc1, calc2):
             """
-            Return true if calc1 and calc2 contain the same income_measure;
+            Return true if calc1 and calc2 contain the same expanded_income;
             otherwise, return false.  (Note that "same" means nobody's
-            income_measure differs by more than one cent.)
+            expanded_income differs by more than one cent.)
             """
-            im1 = calc1.array(income_measure)
-            im2 = calc2.array(income_measure)
+            im1 = calc1.array('expanded_income')
+            im2 = calc2.array('expanded_income')
             return np.allclose(im1, im2, rtol=0.0, atol=0.01)
         # main logic of method
         assert calc is None or isinstance(calc, Calculator)
         assert (groupby == 'weighted_deciles' or
-                groupby == 'standard_income_bins' or
-                groupby == 'large_income_bins' or
-                groupby == 'small_income_bins')
-        assert (income_measure == 'expanded_income' or
-                income_measure == 'c00100')
-        assert (result_type == 'weighted_sum' or
-                result_type == 'weighted_avg')
+                groupby == 'standard_income_bins')
         if calc is not None:
             assert np.allclose(self.array('s006'),
                                calc.array('s006'))  # check rows in same order
         var_dataframe = self.distribution_table_dataframe()
-        dt1 = create_distribution_table(var_dataframe,
-                                        groupby=groupby,
-                                        income_measure=income_measure,
-                                        result_type=result_type)
+        imeasure = 'expanded_income'
+        dt1 = create_distribution_table(var_dataframe, groupby, imeasure)
         del var_dataframe
         if calc is None:
             dt2 = None
         else:
             assert calc.current_year == self.current_year
             assert calc.array_len == self.array_len
-            if income_measure == 'expanded_income':
-                assert np.allclose(self.consump_benval_params(),
-                                   calc.consump_benval_params())
+            assert np.allclose(self.consump_benval_params(),
+                               calc.consump_benval_params())
             var_dataframe = calc.distribution_table_dataframe()
-            if have_same_income_measure(self, calc, income_measure):
-                imeasure = income_measure
+            if have_same_income_measure(self, calc):
+                imeasure = 'expanded_income'
             else:
-                imeasure = income_measure + '_baseline'
-                var_dataframe[imeasure] = self.array(income_measure)
-            dt2 = create_distribution_table(var_dataframe,
-                                            groupby=groupby,
-                                            income_measure=imeasure,
-                                            result_type=result_type)
+                imeasure = 'expanded_income_baseline'
+                var_dataframe[imeasure] = self.array('expanded_income')
+            dt2 = create_distribution_table(var_dataframe, groupby, imeasure)
             del var_dataframe
-        return dt1, dt2
+        return (dt1, dt2)
 
-    def difference_table(self, calc,
-                         groupby='weighted_deciles',
-                         income_measure='expanded_income',
-                         tax_to_diff='combined'):
+    def difference_table(self, calc, groupby, tax_to_diff):
         """
-        Get results from self and calc, sort them based on groupby using
-        income_measure, and return tax-difference table as a Pandas dataframe.
+        Get results from self and calc, sort them by expanded_income into
+        table rows defined by groupby, compute grouped statistics, and
+        return tax-difference table as a Pandas dataframe.
         This method leaves the Calculator objects unchanged.
         Note that the returned tables have consistent income groups (based
-        on the self income_measure) even though the baseline income_measure
-        in self and the income_measure in calc are different.
-        Note that filing units are put into groupby categories using the
-        specified income_measure in the baseline (self) situation.
+        on the self expanded_income) even though the baseline expanded_income
+        in self and the reform expanded_income in calc are different.
 
         Parameters
         ----------
         calc : Calculator object
             calc represents the reform while self represents the baseline
 
         groupby : String object
-            options for input: 'weighted_deciles', 'standard_income_bins',
-                               'large_income_bins', 'small_income_bins';
+            options for input: 'weighted_deciles', 'standard_income_bins'
             determines how the columns in resulting Pandas DataFrame are sorted
 
-        income_measure : String object
-            options for input: 'expanded_income' or 'c00100'(AGI)
-            specifies statistic used to place filing units in bins or deciles
-
         tax_to_diff : String object
             options for input: 'iitax', 'payrolltax', 'combined'
             specifies which tax to difference
 
         Returns and typical usage
         -------------------------
-        diff = calc1.difference_table(calc2)
+        diff = calc1.difference_table(calc2, 'weighted_deciles', 'iitax')
         (where calc1 is a baseline Calculator object
         and calc2 is a reform Calculator object).
         The returned diff is a difference table as a Pandas DataFrame
@@ -577,16 +544,13 @@ def difference_table(self, calc,
         assert isinstance(calc, Calculator)
         assert calc.current_year == self.current_year
         assert calc.array_len == self.array_len
-        if income_measure == 'expanded_income':
-            assert np.allclose(self.consump_benval_params(),
-                               calc.consump_benval_params())
+        assert np.allclose(self.consump_benval_params(),
+                           calc.consump_benval_params())
         self_var_dataframe = self.dataframe(DIFF_VARIABLES)
         calc_var_dataframe = calc.dataframe(DIFF_VARIABLES)
         diff = create_difference_table(self_var_dataframe,
                                        calc_var_dataframe,
-                                       groupby=groupby,
-                                       income_measure=income_measure,
-                                       tax_to_diff=tax_to_diff)
+                                       groupby, tax_to_diff)
         del self_var_dataframe
         del calc_var_dataframe
         return diff
@@ -1130,10 +1094,7 @@ def decile_graph(self, calc,
         assert isinstance(calc, Calculator)
         assert calc.current_year == self.current_year
         assert calc.array_len == self.array_len
-        dt1, dt2 = self.distribution_tables(calc,
-                                            groupby='weighted_deciles',
-                                            income_measure='expanded_income',
-                                            result_type='weighted_sum')
+        dt1, dt2 = self.distribution_tables(calc, 'weighted_deciles')
         # construct data for graph
         data = dec_graph_data(
             dt1, dt2, year=self.current_year,

diff --git a/taxcalc/parameters.py b/taxcalc/parameters.py
@@ -94,9 +94,9 @@ def set_default_vals(self, known_years=999999):
         """
         if hasattr(self, '_vals'):
             for name, data in self._vals.items():
-                intg_val = data.get('integer_value', None)
-                bool_val = data.get('boolean_value', None)
-                values = data.get('value', None)
+                intg_val = data.get('integer_value')
+                bool_val = data.get('boolean_value')
+                values = data.get('value')
                 if values:
                     cpi_inflated = data.get('cpi_inflated', False)
                     if cpi_inflated:
@@ -357,8 +357,8 @@ def _update(self, year_mods):
             if name.endswith('_cpi'):
                 continue  # handle elsewhere in this method
             vals_indexed = self._vals[name].get('cpi_inflated', False)
-            intg_val = self._vals[name].get('integer_value', None)
-            bool_val = self._vals[name].get('boolean_value', None)
+            intg_val = self._vals[name].get('integer_value')
+            bool_val = self._vals[name].get('boolean_value')
             name_plus_cpi = name + '_cpi'
             if name_plus_cpi in year_mods[year].keys():
                 used_names.add(name_plus_cpi)
@@ -388,8 +388,8 @@ def _update(self, year_mods):
             pvalues = [cval[year - self.start_year]]
             index_rates = self._indexing_rates_for_update(name, year,
                                                           num_years_to_expand)
-            intg_val = self._vals[pname].get('integer_value', None)
-            bool_val = self._vals[pname].get('boolean_value', None)
+            intg_val = self._vals[pname].get('integer_value')
+            bool_val = self._vals[pname].get('boolean_value')
             nval = self._expand_array(pvalues, intg_val, bool_val,
                                       inflate=pindexed,
                                       inflation_rates=index_rates,

diff --git a/taxcalc/reforms/2017_law.out b/taxcalc/reforms/2017_law.out
@@ -1,13 +1,15 @@
-         AGI  pexempt  stdded  taxinc  tax-wo-credits  inctax  paytax  ataxinc
-0          0        0       0       0               0       0       0        0
-1          0        0       0       0               0       0       0        0
-2          0        0       0       0               0       0       0        0
-3          0        0       0       0               0       0       0        0
-4          0        0       0       0               0       0       0        0
-5      60000    12995   16792   30213            3466     489    9180    54921
-6          0        0       0       0               0       0       0        0
-7     180000    30322   30375  119303           16514   13514   27540   152716
-8          0        0       0       0               0       0       0        0
-9     240000    25990   23584  190426           32344   30844   36720   190796
-10    600000    18875   20375  560750          142810  142810   66067   423437
-sums 1080000    88182   91126  900692          195134  187657  139507   821869
+               AGI  pexempt  stdded  taxinc  tax-wo-credits  inctax  paytax  ataxinc
+<$0K             0        0       0       0               0       0       0        0
+=$0K             0        0       0       0               0       0       0        0
+$0-10K           0        0       0       0               0       0       0        0
+$10-20K          0        0       0       0               0       0       0        0
+$20-30K          0        0       0       0               0       0       0        0
+$30-40K      60000    12995   16792   30213            3466     489    9180    54921
+$40-50K          0        0       0       0               0       0       0        0
+$50-75K     180000    30322   30375  119303           16514   13514   27540   152716
+$75-100K         0        0       0       0               0       0       0        0
+$100-200K   240000    25990   23584  190426           32344   30844   36720   190796
+$200-500K   600000    18875   20375  560750          142810  142810   66067   423437
+$500-1000K       0        0       0       0               0       0       0        0
+>$1000K          0        0       0       0               0       0       0        0
+ALL        1080000    88182   91126  900692          195134  187657  139507   821869
diff --git a/taxcalc/reforms/BrownKhanna.out b/taxcalc/reforms/BrownKhanna.out
@@ -1,13 +1,15 @@
-         AGI  pexempt  stdded  taxinc  tax-wo-credits  inctax  paytax  ataxinc
-0          0        0       0       0               0       0       0        0
-1          0        0       0       0               0       0       0        0
-2          0        0       0       0               0       0       0        0
-3          0        0       0       0               0       0       0        0
-4          0        0       0       0               0       0       0        0
-5      60000    12995   16792   30213            3466   -4405    9180    59815
-6          0        0       0       0               0       0       0        0
-7     180000    30322   30375  119303           16514    8698   27540   157532
-8          0        0       0       0               0       0       0        0
-9     240000    25990   23584  190426           32344   30844   36720   190796
-10    600000    18875   20375  560750          142810  142810   66067   423437
-sums 1080000    88182   91126  900692          195134  177947  139507   831580
+               AGI  pexempt  stdded  taxinc  tax-wo-credits  inctax  paytax  ataxinc
+<$0K             0        0       0       0               0       0       0        0
+=$0K             0        0       0       0               0       0       0        0
+$0-10K           0        0       0       0               0       0       0        0
+$10-20K          0        0       0       0               0       0       0        0
+$20-30K          0        0       0       0               0       0       0        0
+$30-40K      60000    12995   16792   30213            3466   -4405    9180    59815
+$40-50K          0        0       0       0               0       0       0        0
+$50-75K     180000    30322   30375  119303           16514    8698   27540   157532
+$75-100K         0        0       0       0               0       0       0        0
+$100-200K   240000    25990   23584  190426           32344   30844   36720   190796
+$200-500K   600000    18875   20375  560750          142810  142810   66067   423437
+$500-1000K       0        0       0       0               0       0       0        0
+>$1000K          0        0       0       0               0       0       0        0
+ALL        1080000    88182   91126  900692          195134  177947  139507   831580
diff --git a/taxcalc/reforms/Clinton2016.out b/taxcalc/reforms/Clinton2016.out
@@ -1,13 +1,15 @@
-         AGI  pexempt  stdded  taxinc  tax-wo-credits  inctax  paytax  ataxinc
-0          0        0       0       0               0       0       0        0
-1          0        0       0       0               0       0       0        0
-2          0        0       0       0               0       0       0        0
-3          0        0       0       0               0       0       0        0
-4          0        0       0       0               0       0       0        0
-5      60000    12995   16792   30213            3466    -511    9180    55921
-6          0        0       0       0               0       0       0        0
-7     180000    30322   30375  119303           16514   11514   27540   154716
-8          0        0       0       0               0       0       0        0
-9     240000    25990   23584  190426           32344   29844   36720   191796
-10    600000    18875   20375  560750          142810  142810   66067   423437
-sums 1080000    88182   91126  900692          195134  183657  139507   825869
+               AGI  pexempt  stdded  taxinc  tax-wo-credits  inctax  paytax  ataxinc
+<$0K             0        0       0       0               0       0       0        0
+=$0K             0        0       0       0               0       0       0        0
+$0-10K           0        0       0       0               0       0       0        0
+$10-20K          0        0       0       0               0       0       0        0
+$20-30K          0        0       0       0               0       0       0        0
+$30-40K      60000    12995   16792   30213            3466    -511    9180    55921
+$40-50K          0        0       0       0               0       0       0        0
+$50-75K     180000    30322   30375  119303           16514   11514   27540   154716
+$75-100K         0        0       0       0               0       0       0        0
+$100-200K   240000    25990   23584  190426           32344   29844   36720   191796
+$200-500K   600000    18875   20375  560750          142810  142810   66067   423437
+$500-1000K       0        0       0       0               0       0       0        0
+>$1000K          0        0       0       0               0       0       0        0
+ALL        1080000    88182   91126  900692          195134  183657  139507   825869