autopep8

milcent · Nov 3, 2020 · 6dd4983 · 6dd4983
1 parent 3783799
commit 6dd4983
Show file tree

Hide file tree

Showing 13 changed files with 248 additions and 137 deletions.
diff --git a/benford/__init__.py b/benford/__init__.py
@@ -25,4 +25,3 @@
 from .benford import *
 
 __version__ = '0.2.7'
-
diff --git a/benford/benford.py b/benford/benford.py
@@ -5,9 +5,9 @@
     mad_dict, crit_chi2, KS_crit
 from .checks import _check_digs_, _check_confidence_, _check_test_, \
     _check_num_array_, _check_high_Z_
-from .utils import  _set_N_, input_data, prepare, \
+from .utils import _set_N_, input_data, prepare, \
     subtract_sorted, prep_to_roll, mad_to_roll, mse_to_roll, \
-     get_mantissas
+    get_mantissas
 from .expected import First, Second, LastTwo, _test_
 from .viz import _get_plot_args, plot_digs, plot_sum, plot_ordered_mantissas,\
     plot_mantissa_arc_test, plot_roll_mse, plot_roll_mad
@@ -16,6 +16,7 @@
 from .stats import Z_score, chi_sq, chi_sq_2, kolmogorov_smirnov,\
     kolmogorov_smirnov_2
 
+
 class Base(DataFrame):
     """Internalizes and prepares the data for Analysis.
 
@@ -34,6 +35,7 @@ class Base(DataFrame):
     Raises:
         TypeError: if not receiving `int` or `float` as input.
     """
+
     def __init__(self, data, decimals, sign='all', sec_order=False):
 
         DataFrame.__init__(self, {'seq': data})
@@ -166,9 +168,9 @@ def show_plot(self):
         """
         x, figsize, text_x = _get_plot_args(self.digs)
         plot_digs(self, x=x, y_Exp=self.Expected, y_Found=self.Found,
-                    N=self.N, figsize=figsize, conf_Z=confs[self.confidence],
-                    text_x=text_x
-                    )
+                  N=self.N, figsize=figsize, conf_Z=confs[self.confidence],
+                  text_x=text_x
+                  )
 
     def report(self, high_Z='pos', show_plot=True):
         """Handles the report especific to the test, considering its statistics
@@ -189,13 +191,15 @@ def report(self, high_Z='pos', show_plot=True):
         if show_plot:
             self.show_plot()
 
+
 class Summ(DataFrame):
     """Gets the base object and outputs a Summation test object
 
     Args:
        base: The Base object with the data prepared for Analysis
        test: The test for which to compute the summation
     """
+
     def __init__(self, base, test):
         super(Summ, self).__init__(base.abs()
                                    .groupby(test)[['seq']]
@@ -217,7 +221,7 @@ def __init__(self, base, test):
 
     def show_plot(self):
         """Draws the Summation test plot"""
-        figsize=(2 * (self.digs ** 2 + 5), 1.5 * (self.digs ** 2 + 5))
+        figsize = (2 * (self.digs ** 2 + 5), 1.5 * (self.digs ** 2 + 5))
         plot_sum(self, figsize, self.expected)
 
     def report(self, high_diff=None, show_plot=True):
@@ -232,6 +236,7 @@ def report(self, high_diff=None, show_plot=True):
         if show_plot:
             self.show_plot()
 
+
 class Mantissas(object):
     """Computes and holds the mantissas of the logarithms of the records
 
@@ -296,6 +301,7 @@ def arc_test(self, decimals=2, grid=True, figsize=12):
         plot_mantissa_arc_test(self.data, self.stats, decimals=decimals,
                                grid=grid, figsize=figsize)
 
+
 class Benford(object):
     """Initializes a Benford Analysis object and computes the proportions for
     the digits. The tets dataFrames are atributes, i.e., obj.F1D is the First
@@ -367,10 +373,11 @@ def __init__(self, data, decimals=2, sign='all', confidence=95,
                                 for col in digs_dict.values()])}
 
         if self.verbose:
-            print('\n',' Benford Object Instantiated '.center(50, '#'),'\n')
+            print('\n', ' Benford Object Instantiated '.center(50, '#'), '\n')
             print(f'Initial sample size: {len(self.chosen)}.\n')
             print(f'Test performed on {len(self.base)} registries.\n')
-            print(f'Number of discarded entries for each test:\n{self._discarded}')
+            print(
+                f'Number of discarded entries for each test:\n{self._discarded}')
 
         if mantissas:
             self.mantissas()
@@ -405,19 +412,21 @@ def update_confidence(self, new_conf, tests=None):
                 raise ValueError('tests must be a list or None.')
         for test in tests:
             try:
-                getattr(self, test).update_confidence(self.confidence, check=False)
+                getattr(self, test).update_confidence(
+                    self.confidence, check=False)
             except AttributeError:
                 if test in ['Mantissas', 'F1D_Summ', 'F2D_Summ', 'F3D_Summ']:
                     pass
                 else:
-                    print(f"{test} not in Benford instance tests - review test's name.")
+                    print(
+                        f"{test} not in Benford instance tests - review test's name.")
                     pass
 
     @property
     def all_confidences(self):
         """dict: a dictionary with a confidence level for each computed tests,
         when applicable."""
-        con_dic= {}
+        con_dic = {}
         for key in self.tests:
             try:
                 con_dic[key] = getattr(self, key).confidence
@@ -464,7 +473,7 @@ def sec_order(self):
     def summation(self):
         """Creates Summation test DataFrames from Base object"""
         for test in ['F1D', 'F2D', 'F3D']:
-            t =  f'{test}_Summ'
+            t = f'{test}_Summ'
             setattr(self, t, Summ(self.base, test))
             self.tests.append(t)
 
@@ -568,7 +577,6 @@ def mantissas(self, report=True, plot=True, figsize=(15, 8)):
         if plot:
             plot_ordered_mantissas(self.Mant, figsize=figsize)
 
-
     def first_digits(self, digs, confidence=None, high_Z='pos',
                      limit_N=None, MAD=False, MSE=False, chi_square=False,
                      KS=False, show_plot=True, simple=False, ret_df=False):
@@ -623,10 +631,10 @@ def first_digits(self, digs, confidence=None, high_Z='pos',
             self.verbose = False
             show_plot = False
             df = prepare(temp[digs_dict[digs]], digs, limit_N=limit_N,
-                        simple=True, confidence=None)
+                         simple=True, confidence=None)
         else:
             N, df = prepare(temp[digs_dict[digs]], digs, limit_N=limit_N,
-                           simple=False, confidence=confidence)
+                            simple=False, confidence=confidence)
 
         if self.verbose:
             print(f"\nTest performed on {len(temp)} registries.\n"
@@ -648,18 +656,18 @@ def first_digits(self, digs, confidence=None, high_Z='pos',
         # Chi-square statistic
         if chi_square:
             self.chi_square = chi_sq(df, ddf=len(df) - 1,
-                                           confidence=confidence,
-                                           verbose=self.verbose)
+                                     confidence=confidence,
+                                     verbose=self.verbose)
         # KS test
         if KS:
             self.KS = kolmogorov_smirnov(df, confidence=confidence, N=len(temp),
-                           verbose=self.verbose)
+                                         verbose=self.verbose)
 
         # Plotting the expected frequncies (line) against the found ones(bars)
         if show_plot:
             plot_digs(df, x=x, y_Exp=df.Expected, y_Found=df.Found, N=N,
-                       figsize=(2 * (digs ** 2 + 5), 1.5 * (digs ** 2 + 5)),
-                       conf_Z=confs[confidence])
+                      figsize=(2 * (digs ** 2 + 5), 1.5 * (digs ** 2 + 5)),
+                      conf_Z=confs[confidence])
         if ret_df:
             return df
 
@@ -708,10 +716,10 @@ def second_digit(self, confidence=None, high_Z='pos',
             self.verbose = False
             show_plot = False
             df = prepare(temp['SD'], 22, limit_N=limit_N, simple=True,
-                        confidence=None)
+                         confidence=None)
         else:
             N, df = prepare(temp['SD'], 22, limit_N=limit_N, simple=False,
-                           confidence=confidence)
+                            confidence=confidence)
 
         if self.verbose:
             print(f"\nTest performed on {len(temp)} registries.\nDiscarded "
@@ -731,16 +739,16 @@ def second_digit(self, confidence=None, high_Z='pos',
         # Chi-square statistic
         if chi_square:
             self.chi_square = chi_sq(df, ddf=9, confidence=confidence,
-                                           verbose=self.verbose)
+                                     verbose=self.verbose)
         # KS test
         if KS:
             self.KS = kolmogorov_smirnov(df, confidence=confidence, N=len(temp),
-                           verbose=self.verbose)
+                                         verbose=self.verbose)
 
         # Plotting the expected frequncies (line) against the found ones(bars)
         if show_plot:
             plot_digs(df, x=arange(0, 10), y_Exp=df.Expected,
-                       y_Found=df.Found, N=N, figsize=(10, 6), conf_Z=conf)
+                      y_Found=df.Found, N=N, figsize=(10, 6), conf_Z=conf)
         if ret_df:
             return df
 
@@ -785,10 +793,10 @@ def last_two_digits(self, confidence=None, high_Z='pos',
             self.verbose = False
             show_plot = False
             df = prepare(temp['L2D'], -2, limit_N=limit_N, simple=True,
-                        confidence=None)
+                         confidence=None)
         else:
             N, df = prepare(temp['L2D'], -2, limit_N=limit_N, simple=False,
-                           confidence=confidence)
+                            confidence=confidence)
 
         if self.verbose:
             print(f"\nTest performed on {len(temp)} registries.\n\nDiscarded "
@@ -808,17 +816,17 @@ def last_two_digits(self, confidence=None, high_Z='pos',
         # Chi-square statistic
         if chi_square:
             self.chi_square = chi_sq(df, ddf=99, confidence=confidence,
-                                           verbose=self.verbose)
+                                     verbose=self.verbose)
         # KS test
         if KS:
             self.KS = kolmogorov_smirnov(df, confidence=confidence, N=len(temp),
-                           verbose=self.verbose)
+                                         verbose=self.verbose)
 
         # Plotting expected frequencies (line) versus found ones (bars)
         if show_plot:
             plot_digs(df, x=arange(0, 100), y_Exp=df.Expected,
-                       y_Found=df.Found, N=N, figsize=(15, 5),
-                       conf_Z=conf, text_x=True)
+                      y_Found=df.Found, N=N, figsize=(15, 5),
+                      conf_Z=conf, text_x=True)
         if ret_df:
             return df
 
@@ -863,7 +871,7 @@ def summation(self, digs=2, top=20, show_plot=True,
 
         if show_plot:
             plot_sum(df, figsize=(
-                       2 * (digs ** 2 + 5), 1.5 * (digs ** 2 + 5)), li=li)
+                2 * (digs ** 2 + 5), 1.5 * (digs ** 2 + 5)), li=li)
 
         if ret_df:
             return df
@@ -1015,8 +1023,8 @@ def __init__(self, data, test, window, decimals=2, sign='all'):
         Exp, ind = prep_to_roll(start, self.test)
 
         self.roll_series = start[digs_dict[test]].rolling(
-                                window=window).apply(mad_to_roll,
-                                    args=(Exp, ind), raw=False)
+            window=window).apply(mad_to_roll,
+                                 args=(Exp, ind), raw=False)
         self.roll_series.dropna(inplace=True)
 
     def show_plot(self, figsize=(15, 8)):
@@ -1058,8 +1066,8 @@ def __init__(self, data, test, window, decimals=2, sign='all'):
         Exp, ind = prep_to_roll(start, test)
 
         self.roll_series = start[digs_dict[test]].rolling(
-                                window=window).apply(mse_to_roll,
-                                    args=(Exp, ind), raw=False)
+            window=window).apply(mse_to_roll,
+                                 args=(Exp, ind), raw=False)
         self.roll_series.dropna(inplace=True)
 
     def show_plot(self, figsize=(15, 8)):
@@ -1071,7 +1079,6 @@ def show_plot(self, figsize=(15, 8)):
         plot_roll_mse(self.roll_series, figsize=figsize)
 
 
-
 def first_digits(data, digs, decimals=2, sign='all', verbose=True,
                  confidence=None, high_Z='pos', limit_N=None,
                  MAD=False, MSE=False, chi_square=False, KS=False,

diff --git a/benford/checks.py b/benford/checks.py
@@ -2,6 +2,7 @@
 from numpy import array, ndarray
 from .constants import digs_dict, rev_digs, confs
 
+
 def _check_digs_(digs):
     """Checks the possible values for the digs parameter of the
     First Digits tests
@@ -31,16 +32,20 @@ def _check_test_(test):
                          f'values are\n {list(digs_dict.keys())} for ints and'
                          f'\n {list(rev_digs.keys())} for strings.')
 
+
 def _check_decimals_(decimals):
     """"""
     if isinstance(decimals, int):
         if (decimals < 0):
-            raise ValueError("Parameter -decimals- must be an int >= 0, or 'infer'.")
+            raise ValueError(
+                "Parameter -decimals- must be an int >= 0, or 'infer'.")
     else:
         if decimals != 'infer':
-            raise ValueError("Parameter -decimals- must be an int >= 0, or 'infer'.")
+            raise ValueError(
+                "Parameter -decimals- must be an int >= 0, or 'infer'.")
     return decimals
 
+
 def _check_sign_(sign):
     """"""
     if sign not in ['all', 'pos', 'neg']:
@@ -56,6 +61,7 @@ def _check_confidence_(confidence):
                          f"following:\n {list(confs.keys())}")
     return confidence
 
+
 def _check_high_Z_(high_Z):
     """"""
     if not high_Z in ['pos', 'all']:
@@ -64,11 +70,12 @@ def _check_high_Z_(high_Z):
                              "'all' or an int.")
     return high_Z
 
+
 def _check_num_array_(data):
     """"""
     if (not isinstance(data, ndarray)) & (not isinstance(data, Series)):
         print('\n`data` not a numpy NDarray nor a pandas Series.'
-                ' Trying to convert...')
+              ' Trying to convert...')
         try:
             data = array(data)
         except:

diff --git a/benford/constants.py b/benford/constants.py
@@ -60,7 +60,7 @@
                    99.99999: 1137.082, None: None}
              }
 
-# Critical Kolmogorov-Smirnov values according to the confidence levels 
+# Critical Kolmogorov-Smirnov values according to the confidence levels
 # These values are yet to be divided by the square root of the sample size
 KS_crit = {80: 1.075, 85: 1.139, 90: 1.125, 95: 1.36, 99: 1.63,
            99.9: 1.95, 99.99: 2.23, 99.999: 2.47,

diff --git a/benford/expected.py b/benford/expected.py
@@ -37,6 +37,7 @@ class Second(DataFrame):
         plot: option to plot a bar chart of the Expected proportions.
             Defaults to True.
     """
+
     def __init__(self, plot=True):
         a = arange(10, 100)
         Expe = log10(1 + (1. / a))
@@ -58,20 +59,22 @@ class LastTwo(DataFrame):
         plot: option to plot a bar chart of the Expected proportions.
             Defaults to True.
     """
+
     def __init__(self, num=False, plot=True):
         exp = array([1 / 99.] * 100)
         DataFrame.__init__(self, {'Expected': exp,
-                              'Last_2_Dig': _lt_(num=num)})
+                                  'Last_2_Dig': _lt_(num=num)})
         self.set_index('Last_2_Dig', inplace=True)
         if plot:
             plot_expected(self, -2)
 
+
 def _test_(digs):
     """Chooses the Exxpected class to be used in a test
 
     Args:
         digs: the int corresponding to the Expected class to be instantiated
-    
+
     Returns:
         the Expected instance forthe propoer test to be performed
     """
@@ -89,7 +92,7 @@ def _lt_(num=False):
     Args:
         num: returns numeric (ints) values. Defaluts to False,
             which returns strings.
-    
+
     Returns:
         Array of ints or str, in any case representing all 100 possible
             combinations of last two digits
@@ -99,5 +102,5 @@ def _lt_(num=False):
     else:
         n = arange(0, 100).astype(str)
         n[:10] = array(['00', '01', '02', '03', '04', '05',
-                           '06', '07', '08', '09'])
-    return n
+                        '06', '07', '08', '09'])
+    return n