Skip to content

Commit

Permalink
autopep8
Browse files Browse the repository at this point in the history
  • Loading branch information
milcent committed Nov 3, 2020
1 parent 3783799 commit 6dd4983
Show file tree
Hide file tree
Showing 13 changed files with 248 additions and 137 deletions.
1 change: 0 additions & 1 deletion benford/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,3 @@
from .benford import *

__version__ = '0.2.7'

81 changes: 44 additions & 37 deletions benford/benford.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
mad_dict, crit_chi2, KS_crit
from .checks import _check_digs_, _check_confidence_, _check_test_, \
_check_num_array_, _check_high_Z_
from .utils import _set_N_, input_data, prepare, \
from .utils import _set_N_, input_data, prepare, \
subtract_sorted, prep_to_roll, mad_to_roll, mse_to_roll, \
get_mantissas
get_mantissas
from .expected import First, Second, LastTwo, _test_
from .viz import _get_plot_args, plot_digs, plot_sum, plot_ordered_mantissas,\
plot_mantissa_arc_test, plot_roll_mse, plot_roll_mad
Expand All @@ -16,6 +16,7 @@
from .stats import Z_score, chi_sq, chi_sq_2, kolmogorov_smirnov,\
kolmogorov_smirnov_2


class Base(DataFrame):
"""Internalizes and prepares the data for Analysis.
Expand All @@ -34,6 +35,7 @@ class Base(DataFrame):
Raises:
TypeError: if not receiving `int` or `float` as input.
"""

def __init__(self, data, decimals, sign='all', sec_order=False):

DataFrame.__init__(self, {'seq': data})
Expand Down Expand Up @@ -166,9 +168,9 @@ def show_plot(self):
"""
x, figsize, text_x = _get_plot_args(self.digs)
plot_digs(self, x=x, y_Exp=self.Expected, y_Found=self.Found,
N=self.N, figsize=figsize, conf_Z=confs[self.confidence],
text_x=text_x
)
N=self.N, figsize=figsize, conf_Z=confs[self.confidence],
text_x=text_x
)

def report(self, high_Z='pos', show_plot=True):
"""Handles the report especific to the test, considering its statistics
Expand All @@ -189,13 +191,15 @@ def report(self, high_Z='pos', show_plot=True):
if show_plot:
self.show_plot()


class Summ(DataFrame):
"""Gets the base object and outputs a Summation test object
Args:
base: The Base object with the data prepared for Analysis
test: The test for which to compute the summation
"""

def __init__(self, base, test):
super(Summ, self).__init__(base.abs()
.groupby(test)[['seq']]
Expand All @@ -217,7 +221,7 @@ def __init__(self, base, test):

def show_plot(self):
"""Draws the Summation test plot"""
figsize=(2 * (self.digs ** 2 + 5), 1.5 * (self.digs ** 2 + 5))
figsize = (2 * (self.digs ** 2 + 5), 1.5 * (self.digs ** 2 + 5))
plot_sum(self, figsize, self.expected)

def report(self, high_diff=None, show_plot=True):
Expand All @@ -232,6 +236,7 @@ def report(self, high_diff=None, show_plot=True):
if show_plot:
self.show_plot()


class Mantissas(object):
"""Computes and holds the mantissas of the logarithms of the records
Expand Down Expand Up @@ -296,6 +301,7 @@ def arc_test(self, decimals=2, grid=True, figsize=12):
plot_mantissa_arc_test(self.data, self.stats, decimals=decimals,
grid=grid, figsize=figsize)


class Benford(object):
"""Initializes a Benford Analysis object and computes the proportions for
the digits. The tets dataFrames are atributes, i.e., obj.F1D is the First
Expand Down Expand Up @@ -367,10 +373,11 @@ def __init__(self, data, decimals=2, sign='all', confidence=95,
for col in digs_dict.values()])}

if self.verbose:
print('\n',' Benford Object Instantiated '.center(50, '#'),'\n')
print('\n', ' Benford Object Instantiated '.center(50, '#'), '\n')
print(f'Initial sample size: {len(self.chosen)}.\n')
print(f'Test performed on {len(self.base)} registries.\n')
print(f'Number of discarded entries for each test:\n{self._discarded}')
print(
f'Number of discarded entries for each test:\n{self._discarded}')

if mantissas:
self.mantissas()
Expand Down Expand Up @@ -405,19 +412,21 @@ def update_confidence(self, new_conf, tests=None):
raise ValueError('tests must be a list or None.')
for test in tests:
try:
getattr(self, test).update_confidence(self.confidence, check=False)
getattr(self, test).update_confidence(
self.confidence, check=False)
except AttributeError:
if test in ['Mantissas', 'F1D_Summ', 'F2D_Summ', 'F3D_Summ']:
pass
else:
print(f"{test} not in Benford instance tests - review test's name.")
print(
f"{test} not in Benford instance tests - review test's name.")
pass

@property
def all_confidences(self):
"""dict: a dictionary with a confidence level for each computed tests,
when applicable."""
con_dic= {}
con_dic = {}
for key in self.tests:
try:
con_dic[key] = getattr(self, key).confidence
Expand Down Expand Up @@ -464,7 +473,7 @@ def sec_order(self):
def summation(self):
"""Creates Summation test DataFrames from Base object"""
for test in ['F1D', 'F2D', 'F3D']:
t = f'{test}_Summ'
t = f'{test}_Summ'
setattr(self, t, Summ(self.base, test))
self.tests.append(t)

Expand Down Expand Up @@ -568,7 +577,6 @@ def mantissas(self, report=True, plot=True, figsize=(15, 8)):
if plot:
plot_ordered_mantissas(self.Mant, figsize=figsize)


def first_digits(self, digs, confidence=None, high_Z='pos',
limit_N=None, MAD=False, MSE=False, chi_square=False,
KS=False, show_plot=True, simple=False, ret_df=False):
Expand Down Expand Up @@ -623,10 +631,10 @@ def first_digits(self, digs, confidence=None, high_Z='pos',
self.verbose = False
show_plot = False
df = prepare(temp[digs_dict[digs]], digs, limit_N=limit_N,
simple=True, confidence=None)
simple=True, confidence=None)
else:
N, df = prepare(temp[digs_dict[digs]], digs, limit_N=limit_N,
simple=False, confidence=confidence)
simple=False, confidence=confidence)

if self.verbose:
print(f"\nTest performed on {len(temp)} registries.\n"
Expand All @@ -648,18 +656,18 @@ def first_digits(self, digs, confidence=None, high_Z='pos',
# Chi-square statistic
if chi_square:
self.chi_square = chi_sq(df, ddf=len(df) - 1,
confidence=confidence,
verbose=self.verbose)
confidence=confidence,
verbose=self.verbose)
# KS test
if KS:
self.KS = kolmogorov_smirnov(df, confidence=confidence, N=len(temp),
verbose=self.verbose)
verbose=self.verbose)

# Plotting the expected frequncies (line) against the found ones(bars)
if show_plot:
plot_digs(df, x=x, y_Exp=df.Expected, y_Found=df.Found, N=N,
figsize=(2 * (digs ** 2 + 5), 1.5 * (digs ** 2 + 5)),
conf_Z=confs[confidence])
figsize=(2 * (digs ** 2 + 5), 1.5 * (digs ** 2 + 5)),
conf_Z=confs[confidence])
if ret_df:
return df

Expand Down Expand Up @@ -708,10 +716,10 @@ def second_digit(self, confidence=None, high_Z='pos',
self.verbose = False
show_plot = False
df = prepare(temp['SD'], 22, limit_N=limit_N, simple=True,
confidence=None)
confidence=None)
else:
N, df = prepare(temp['SD'], 22, limit_N=limit_N, simple=False,
confidence=confidence)
confidence=confidence)

if self.verbose:
print(f"\nTest performed on {len(temp)} registries.\nDiscarded "
Expand All @@ -731,16 +739,16 @@ def second_digit(self, confidence=None, high_Z='pos',
# Chi-square statistic
if chi_square:
self.chi_square = chi_sq(df, ddf=9, confidence=confidence,
verbose=self.verbose)
verbose=self.verbose)
# KS test
if KS:
self.KS = kolmogorov_smirnov(df, confidence=confidence, N=len(temp),
verbose=self.verbose)
verbose=self.verbose)

# Plotting the expected frequncies (line) against the found ones(bars)
if show_plot:
plot_digs(df, x=arange(0, 10), y_Exp=df.Expected,
y_Found=df.Found, N=N, figsize=(10, 6), conf_Z=conf)
y_Found=df.Found, N=N, figsize=(10, 6), conf_Z=conf)
if ret_df:
return df

Expand Down Expand Up @@ -785,10 +793,10 @@ def last_two_digits(self, confidence=None, high_Z='pos',
self.verbose = False
show_plot = False
df = prepare(temp['L2D'], -2, limit_N=limit_N, simple=True,
confidence=None)
confidence=None)
else:
N, df = prepare(temp['L2D'], -2, limit_N=limit_N, simple=False,
confidence=confidence)
confidence=confidence)

if self.verbose:
print(f"\nTest performed on {len(temp)} registries.\n\nDiscarded "
Expand All @@ -808,17 +816,17 @@ def last_two_digits(self, confidence=None, high_Z='pos',
# Chi-square statistic
if chi_square:
self.chi_square = chi_sq(df, ddf=99, confidence=confidence,
verbose=self.verbose)
verbose=self.verbose)
# KS test
if KS:
self.KS = kolmogorov_smirnov(df, confidence=confidence, N=len(temp),
verbose=self.verbose)
verbose=self.verbose)

# Plotting expected frequencies (line) versus found ones (bars)
if show_plot:
plot_digs(df, x=arange(0, 100), y_Exp=df.Expected,
y_Found=df.Found, N=N, figsize=(15, 5),
conf_Z=conf, text_x=True)
y_Found=df.Found, N=N, figsize=(15, 5),
conf_Z=conf, text_x=True)
if ret_df:
return df

Expand Down Expand Up @@ -863,7 +871,7 @@ def summation(self, digs=2, top=20, show_plot=True,

if show_plot:
plot_sum(df, figsize=(
2 * (digs ** 2 + 5), 1.5 * (digs ** 2 + 5)), li=li)
2 * (digs ** 2 + 5), 1.5 * (digs ** 2 + 5)), li=li)

if ret_df:
return df
Expand Down Expand Up @@ -1015,8 +1023,8 @@ def __init__(self, data, test, window, decimals=2, sign='all'):
Exp, ind = prep_to_roll(start, self.test)

self.roll_series = start[digs_dict[test]].rolling(
window=window).apply(mad_to_roll,
args=(Exp, ind), raw=False)
window=window).apply(mad_to_roll,
args=(Exp, ind), raw=False)
self.roll_series.dropna(inplace=True)

def show_plot(self, figsize=(15, 8)):
Expand Down Expand Up @@ -1058,8 +1066,8 @@ def __init__(self, data, test, window, decimals=2, sign='all'):
Exp, ind = prep_to_roll(start, test)

self.roll_series = start[digs_dict[test]].rolling(
window=window).apply(mse_to_roll,
args=(Exp, ind), raw=False)
window=window).apply(mse_to_roll,
args=(Exp, ind), raw=False)
self.roll_series.dropna(inplace=True)

def show_plot(self, figsize=(15, 8)):
Expand All @@ -1071,7 +1079,6 @@ def show_plot(self, figsize=(15, 8)):
plot_roll_mse(self.roll_series, figsize=figsize)



def first_digits(data, digs, decimals=2, sign='all', verbose=True,
confidence=None, high_Z='pos', limit_N=None,
MAD=False, MSE=False, chi_square=False, KS=False,
Expand Down
13 changes: 10 additions & 3 deletions benford/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from numpy import array, ndarray
from .constants import digs_dict, rev_digs, confs


def _check_digs_(digs):
"""Checks the possible values for the digs parameter of the
First Digits tests
Expand Down Expand Up @@ -31,16 +32,20 @@ def _check_test_(test):
f'values are\n {list(digs_dict.keys())} for ints and'
f'\n {list(rev_digs.keys())} for strings.')


def _check_decimals_(decimals):
""""""
if isinstance(decimals, int):
if (decimals < 0):
raise ValueError("Parameter -decimals- must be an int >= 0, or 'infer'.")
raise ValueError(
"Parameter -decimals- must be an int >= 0, or 'infer'.")
else:
if decimals != 'infer':
raise ValueError("Parameter -decimals- must be an int >= 0, or 'infer'.")
raise ValueError(
"Parameter -decimals- must be an int >= 0, or 'infer'.")
return decimals


def _check_sign_(sign):
""""""
if sign not in ['all', 'pos', 'neg']:
Expand All @@ -56,6 +61,7 @@ def _check_confidence_(confidence):
f"following:\n {list(confs.keys())}")
return confidence


def _check_high_Z_(high_Z):
""""""
if not high_Z in ['pos', 'all']:
Expand All @@ -64,11 +70,12 @@ def _check_high_Z_(high_Z):
"'all' or an int.")
return high_Z


def _check_num_array_(data):
""""""
if (not isinstance(data, ndarray)) & (not isinstance(data, Series)):
print('\n`data` not a numpy NDarray nor a pandas Series.'
' Trying to convert...')
' Trying to convert...')
try:
data = array(data)
except:
Expand Down
2 changes: 1 addition & 1 deletion benford/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
99.99999: 1137.082, None: None}
}

# Critical Kolmogorov-Smirnov values according to the confidence levels
# Critical Kolmogorov-Smirnov values according to the confidence levels
# These values are yet to be divided by the square root of the sample size
KS_crit = {80: 1.075, 85: 1.139, 90: 1.125, 95: 1.36, 99: 1.63,
99.9: 1.95, 99.99: 2.23, 99.999: 2.47,
Expand Down
13 changes: 8 additions & 5 deletions benford/expected.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class Second(DataFrame):
plot: option to plot a bar chart of the Expected proportions.
Defaults to True.
"""

def __init__(self, plot=True):
a = arange(10, 100)
Expe = log10(1 + (1. / a))
Expand All @@ -58,20 +59,22 @@ class LastTwo(DataFrame):
plot: option to plot a bar chart of the Expected proportions.
Defaults to True.
"""

def __init__(self, num=False, plot=True):
exp = array([1 / 99.] * 100)
DataFrame.__init__(self, {'Expected': exp,
'Last_2_Dig': _lt_(num=num)})
'Last_2_Dig': _lt_(num=num)})
self.set_index('Last_2_Dig', inplace=True)
if plot:
plot_expected(self, -2)


def _test_(digs):
"""Chooses the Exxpected class to be used in a test
Args:
digs: the int corresponding to the Expected class to be instantiated
Returns:
the Expected instance forthe propoer test to be performed
"""
Expand All @@ -89,7 +92,7 @@ def _lt_(num=False):
Args:
num: returns numeric (ints) values. Defaluts to False,
which returns strings.
Returns:
Array of ints or str, in any case representing all 100 possible
combinations of last two digits
Expand All @@ -99,5 +102,5 @@ def _lt_(num=False):
else:
n = arange(0, 100).astype(str)
n[:10] = array(['00', '01', '02', '03', '04', '05',
'06', '07', '08', '09'])
return n
'06', '07', '08', '09'])
return n

0 comments on commit 6dd4983

Please sign in to comment.