Skip to content

Commit

Permalink
Merge branch 'hotfix/mad_verbose_arg' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
milcent committed Mar 14, 2020
2 parents f58aa37 + 8dca432 commit fc40c61
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 105 deletions.
101 changes: 50 additions & 51 deletions benford/benford.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,11 @@
get_mantissas
from .expected import First, Second, LastTwo, _test_
from .viz import _get_plot_args, plot_digs, plot_sum, plot_ordered_mantissas,\
plot_mantissa_arc_test
plot_mantissa_arc_test, plot_roll_mse, plot_roll_mad
from .reports import _inform_, _report_mad_, _report_summ_, _report_KS_,\
_report_Z_, _report_chi2_, _report_test_, _deprecate_inform_,\
_report_mantissa_
from .stats import Z_score, chi_square, chi_square_2, KS, KS_2, \
mad, mse
from .stats import Z_score, chi_square, chi_square_2, KS, KS_2

class Base(DataFrame):
"""Internalizes and prepares the data for Analysis.
Expand Down Expand Up @@ -127,6 +126,7 @@ def __init__(self, base, digs, confidence, limit_N=None, sec_order=False):
self.chi_square = chi_square_2(self)
self.KS = KS_2(self)
self.MAD = self.AbsDif.mean()
self.MSE = (self.AbsDif ** 2).mean()
self.confidence = confidence
self.digs = digs
self.sec_order = sec_order
Expand Down Expand Up @@ -207,6 +207,7 @@ def __init__(self, base, test):
self.index = self.index.astype(int)
#: Mean Absolute Deviation for the test
self.MAD = self.AbsDif.mean()
self.MSE = (self.AbsDif ** 2).mean()
#: Confidence level to consider when setting some critical values
self.confidence = None
# (int): numerical representation of the test at hand
Expand Down Expand Up @@ -636,11 +637,13 @@ def first_digits(self, digs, confidence=None, high_Z='pos',

# Mean absolute difference
if MAD:
self.MAD = mad(df, test=digs, verbose=self.verbose)
self.MAD = df.AbsDif.mean()
if self.verbose:
_report_mad_(digs, self.MAD)

# Mean Square Error
if MSE:
self.MSE = mse(df, verbose=self.verbose)
self.MSE = (df.AbsDif ** 2).mean()

# Chi-square statistic
if chi_square:
Expand Down Expand Up @@ -697,8 +700,8 @@ def second_digit(self, confidence=None, high_Z='pos',

conf = confs[confidence]

temp = self.loc[self.ZN >= 10]
temp['SD'] = (temp.ZN // 10**((log10(temp.ZN)).astype(
temp = self.loc[self.ZN >= 10, :]
temp['SD'] = (temp.ZN // 10 ** ((log10(temp.ZN)).astype(
int) - 1)) % 10

if simple:
Expand All @@ -718,11 +721,12 @@ def second_digit(self, confidence=None, high_Z='pos',

# Mean absolute difference
if MAD:
self.MAD = mad(df, test=22, verbose=self.verbose)

self.MAD = df.AbsDif.mean()
if self.verbose:
_report_mad_(digs, self.MAD)
# Mean Square Error
if MSE:
self.MSE = mse(df, verbose=self.verbose)
self.MSE = (df.AbsDif ** 2).mean()

# Chi-square statistic
if chi_square:
Expand Down Expand Up @@ -794,11 +798,12 @@ def last_two_digits(self, confidence=None, high_Z='pos',

# Mean absolute difference
if MAD:
self.MAD = mad(df, test=-2, verbose=self.verbose)

self.MAD = df.AbsDif.mean()
if self.verbose:
_report_mad_(-2, self.MAD)
# Mean Square Error
if MSE:
self.MSE = mse(df, verbose=self.verbose)
self.MSE = (df.AbsDif ** 2).mean()

# Chi-square statistic
if chi_square:
Expand Down Expand Up @@ -977,7 +982,7 @@ def arc_test(self, grid=True, figsize=12):
plot_mantissa_arc_test(self, stats['gravity_center'], figsize=figsize)


class Roll_mad(Series):
class Roll_mad(object):
"""Applies the MAD to sequential subsets of the Series, returning another
Series.
Expand All @@ -1000,37 +1005,29 @@ class Roll_mad(Series):

def __init__(self, data, test, window, decimals=2, sign='all'):

test = _check_test_(test)
#: the test (F1D, SD, F2D...) used for the MAD calculation and critical values
self.test = _check_test_(test)

if not isinstance(data, Source):
start = Source(data, sign=sign, decimals=decimals, verbose=False)

Exp, ind = prep_to_roll(start, test)
Exp, ind = prep_to_roll(start, self.test)

Series.__init__(self, start[digs_dict[test]].rolling(
window=window).apply(mad_to_roll, args=(Exp, ind), raw=False))

self.dropna(inplace=True)
#: the test (F1D, SD, F2D...) used for the MAD calculation and critical values
self.test = test
self.roll_series = start[digs_dict[test]].rolling(
window=window).apply(mad_to_roll,
args=(Exp, ind), raw=False)
self.roll_series.dropna(inplace=True)

def show_plot(self, figsize=(15, 8)):
"""Shows the rolling MAD plot
Args:
figsize: the figure dimensions.
"""
fig, ax = plt.subplots(figsize=figsize)
ax.set_facecolor(colors['b'])
ax.plot(self, color=colors['m'])
if self.test != -2:
plt.axhline(y=mad_dict[self.test][0], color=colors['af'], linewidth=3)
plt.axhline(y=mad_dict[self.test][1], color=colors['h2'], linewidth=3)
plt.axhline(y=mad_dict[self.test][2], color=colors['s'], linewidth=3)
plt.show(block=False)
plot_roll_mad(self, figsize=figsize)


class Roll_mse(Series):
class Roll_mse(object):
"""Applies the MSE to sequential subsets of the Series, returning another
Series.
Expand Down Expand Up @@ -1059,21 +1056,19 @@ def __init__(self, data, test, window, decimals=2, sign='all'):

Exp, ind = prep_to_roll(start, test)

Series.__init__(self, start[digs_dict[test]].rolling(
window=window).apply(mse_to_roll, args=(Exp, ind), raw=False))

self.dropna(inplace=True)
self.roll_series = start[digs_dict[test]].rolling(
window=window).apply(mse_to_roll,
args=(Exp, ind), raw=False)
self.roll_series.dropna(inplace=True)

def show_plot(self, figsize=(15, 8)):
"""Shows the rolling MSE plot
Args:
figsize: the figure dimensions.
"""
fig, ax = plt.subplots(figsize=figsize)
ax.set_facecolor(colors['b'])
ax.plot(self, color=colors['m'])
plt.show(block=False)
plot_roll_mse(self.roll_series, figsize=figsize)



def first_digits(data, digs, decimals=2, sign='all', verbose=True,
Expand Down Expand Up @@ -1338,7 +1333,7 @@ def summation(data, digs=2, decimals=2, sign='all', top=20, verbose=True,
return data


def mad(data, test, decimals=2, sign='all'):
def mad(data, test, decimals=2, sign='all', verbose=False):
"""Calculates the Mean Absolute Deviation of the Series
Args:
Expand All @@ -1356,8 +1351,9 @@ def mad(data, test, decimals=2, sign='all'):
Returns:
float: the Mean Absolute Deviation of the Series
"""
_check_test_(test)
start = Source(data.values, sign=sign, decimals=decimals, verbose=False)
data = _check_num_array_(data)
test = _check_test_(test)
start = Source(data, sign=sign, decimals=decimals, verbose=verbose)
if test in [1, 2, 3]:
start.first_digits(digs=test, MAD=True, MSE=True, simple=True)
elif test == 22:
Expand All @@ -1367,7 +1363,7 @@ def mad(data, test, decimals=2, sign='all'):
return start.MAD


def mse(data, test, decimals=2, sign='all'):
def mse(data, test, decimals=2, sign='all', verbose=False):
"""Calculates the Mean Squared Error of the Series
Args:
Expand All @@ -1385,8 +1381,9 @@ def mse(data, test, decimals=2, sign='all'):
Returns:
float: the Mean Squared Error of the Series
"""
data = _check_num_array_(data)
test = _check_test_(test)
start = Source(data, sign=sign, decimals=decimals, verbose=False)
start = Source(data, sign=sign, decimals=decimals, verbose=verbose)
if test in [1, 2, 3]:
start.first_digits(digs=test, MAD=False, MSE=True, simple=True)
elif test == 22:
Expand All @@ -1396,7 +1393,7 @@ def mse(data, test, decimals=2, sign='all'):
return start.MSE


def mad_summ(data, test, decimals=2, sign='all'):
def mad_summ(data, test, decimals=2, sign='all', verbose=False):
"""Calculate the Mean Absolute Deviation of the Summation Test
Args:
Expand All @@ -1415,9 +1412,10 @@ def mad_summ(data, test, decimals=2, sign='all'):
Returns:
float: the Mean Absolute Deviation of the Summation Test
"""
_check_digs_(test)
data = _check_num_array_(data)
test = _check_digs_(test)

start = Source(data, sign=sign, decimals=decimals, verbose=False)
start = Source(data, sign=sign, decimals=decimals, verbose=verbose)
temp = start.loc[start.ZN >= 10 ** (test - 1)]
temp[digs_dict[test]] = (temp.ZN // 10 ** ((log10(temp.ZN).astype(
int)) - (test - 1))).astype(
Expand Down Expand Up @@ -1450,11 +1448,11 @@ def rolling_mad(data, test, window, decimals=2, sign='all', show_plot=False):
Returns:
Series with sequentially computed MADs.
"""
test = _check_test_(test)
data = _check_num_array_(data)
r_mad = Roll_mad(data, test, window, decimals, sign)
if show_plot:
r_mad.show_plot(test)
return r_mad
r_mad.show_plot()
return r_mad.roll_series


def rolling_mse(data, test, window, decimals=2, sign='all', show_plot=False):
Expand All @@ -1479,10 +1477,11 @@ def rolling_mse(data, test, window, decimals=2, sign='all', show_plot=False):
Returns:
Series with sequentially computed MSEs.
"""
data = _check_num_array_(data)
r_mse = Roll_mse(data, test, window, decimals, sign)
if show_plot:
r_mse.show_plot()
return r_mse
return r_mse.roll_series


def duplicates(data, top_Rep=20, verbose=True, inform=None):
Expand Down
49 changes: 0 additions & 49 deletions benford/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,52 +110,3 @@ def KS_2(frame):
ks_frame = frame.sort_index()[['Found', 'Expected']].cumsum()
# finding the supremum - the largest cumul dist difference
return ((ks_frame.Found - ks_frame.Expected).abs()).max()


def mad(frame, test, verbose=True):
"""Computes the Mean Absolute Deviation (MAD) between the found and the
expected proportions.
Args:
frame: DataFrame with the Absolute Deviations already calculated.
test: Test to compute the MAD from (F1D, SD, F2D...)
verbose: prints the MAD result and compares to limit values of
conformity. Defaults to True.
Returns:
The Mean of the Absolute Deviations between the found and expected
proportions.
"""
mad = frame.AbsDif.mean()

if verbose:
print(f"\nThe Mean Absolute Deviation is {mad}")

if test != -2:
print(f"For the {mad_dict[digs_dict[test]]}:\n\
- 0.0000 to {mad_dict[test][0]}: Close Conformity\n\
- {mad_dict[test][0]} to {mad_dict[test][1]}: Acceptable Conformity\n\
- {mad_dict[test][1]} to {mad_dict[test][2]}: Marginally Acceptable Conformity\n\
- Above {mad_dict[test][2]}: Nonconformity")
else:
pass
return mad


def mse(frame, verbose=True):
"""Computes the test's Mean Square Error
Args:
frame: DataFrame with the already computed Absolute Deviations between
the found and expected proportions
verbose: Prints the MSE. Defaults to True.
Returns:
Mean of the squared differences between the found and the expected proportions.
"""
mse = (frame.AbsDif ** 2).mean()

if verbose:
print(f"\nMean Square Error = {mse}")

return mse
6 changes: 2 additions & 4 deletions benford/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,7 @@ def prep_to_roll(start, test):
def mad_to_roll(arr, Exp, ind):
"""Mean Absolute Deviation used in the rolling function
"""
prop = Series(arr)
prop = prop.value_counts(normalize=True).sort_index()
prop = arr.value_counts(normalize=True).sort_index()

if len(prop) < len(Exp):
prop = prop.reindex(ind).fillna(0)
Expand All @@ -141,8 +140,7 @@ def mad_to_roll(arr, Exp, ind):
def mse_to_roll(arr, Exp, ind):
"""Mean Squared Error used in the rolling function
"""
prop = Series(arr)
temp = prop.value_counts(normalize=True).sort_index()
temp = arr.value_counts(normalize=True).sort_index()

if len(temp) < len(Exp):
temp = temp.reindex(ind).fillna(0)
Expand Down
28 changes: 27 additions & 1 deletion benford/viz.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from numpy import array, arange, maximum, sqrt, ones
import matplotlib.pyplot as plt
from matplotlib.text import Annotation
from .constants import colors
from .constants import colors, mad_dict


def plot_expected(df, digs):
Expand Down Expand Up @@ -194,3 +194,29 @@ def plot_mantissa_arc_test(df, gravity_center, grid=True, figsize=12):
ax.legend(loc = 'lower left')
ax.set_title("Mantissas Arc Test")
plt.show(block=False);

def plot_roll_mse(roll_series, figsize):
"""Shows the rolling MSE plot
Args:
figsize: the figure dimensions.
"""
fig, ax = plt.subplots(figsize=figsize)
ax.set_facecolor(colors['b'])
ax.plot(roll_series, color=colors['m'])
plt.show(block=False)

def plot_roll_mad(roll_mad, figsize):
"""Shows the rolling MAD plot
Args:
figsize: the figure dimensions.
"""
fig, ax = plt.subplots(figsize=figsize)
ax.set_facecolor(colors['b'])
ax.plot(roll_mad.roll_series, color=colors['m'])
if roll_mad.test != -2:
plt.axhline(y=mad_dict[roll_mad.test][0], color=colors['af'], linewidth=3)
plt.axhline(y=mad_dict[roll_mad.test][1], color=colors['h2'], linewidth=3)
plt.axhline(y=mad_dict[roll_mad.test][2], color=colors['s'], linewidth=3)
plt.show(block=False)

0 comments on commit fc40c61

Please sign in to comment.