Skip to content

Commit

Permalink
Merge tag 'bht_dist_kl_diverg_attrs' into develop
Browse files Browse the repository at this point in the history
v0.4.1
  • Loading branch information
milcent committed Apr 28, 2021
2 parents 0c59af2 + fce55ee commit 3bdaf46
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 24 deletions.
63 changes: 41 additions & 22 deletions benford/benford.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from .reports import _inform_, _report_mad_, _report_test_, _deprecate_inform_,\
_report_mantissa_
from .stats import Z_score, chi_sq, chi_sq_2, kolmogorov_smirnov,\
kolmogorov_smirnov_2, _bhattacharyya_distance_, \
kolmogorov_smirnov_2, _bhattacharyya_distance_, _bhattacharyya_coefficient,\
_kullback_leibler_divergence_


Expand Down Expand Up @@ -130,9 +130,11 @@ def __init__(self, base, digs, confidence, limit_N=None, sec_order=False):
self.KS = kolmogorov_smirnov_2(self)
self.MAD = self.AbsDif.mean()
self.MSE = (self.AbsDif ** 2).mean()
self._bhattacharyya_distance_ = _bhattacharyya_distance_(
self.bhattacharyya_coefficient = _bhattacharyya_coefficient(
self.Found.values, self.Expected.values)
self._kullback_leibler_divergence_ = _kullback_leibler_divergence_(
self.bhattacharyya_distance = _bhattacharyya_distance_(
self.Found.values, self.Expected.values)
self.kullback_leibler_divergence = _kullback_leibler_divergence_(
self.Found.values, self.Expected.values)
self.confidence = confidence
self.digs = digs
Expand Down Expand Up @@ -667,7 +669,8 @@ def mantissas(self, report=True, show_plot=True, figsize=(15, 8),
def first_digits(self, digs, confidence=None, high_Z='pos',
limit_N=None, MAD=False, MSE=False, chi_square=False,
KS=False, show_plot=True, save_plot=None, save_plot_kwargs=None,
simple=False, bhat_dist=False, kl_diverg=False, ret_df=False):
simple=False, bhat_coeff = False, bhat_dist=False,
kl_diverg=False, ret_df=False):
"""Performs the Benford First Digits test with the series of
numbers provided, and populates the mapping dict for future
selection of the original series.
Expand All @@ -693,8 +696,11 @@ def first_digits(self, digs, confidence=None, high_Z='pos',
found and the expected distributions; defaults to False.
MSE (bool): calculates the Mean Square Error of the sample; defaults to
False.
bhat_dist (bool): calculates the Bhattacharrya Distance between
foudn and the expected (Benford) digits distribution; defaults
bhat_coeff (bool): computes the Bhattacharyya Coefficient between
the found and the expected (Benford) digits distribution; defaults
to Fasle
bhat_dist (bool): calculates the Bhattacharyya Distance between
the found and the expected (Benford) digits distribution; defaults
to Fasle
kl_diverg (bool): calculates the Kulback-Laibler Divergence between
the found and the expected (Benford) digits distribution;
Expand Down Expand Up @@ -764,15 +770,17 @@ def first_digits(self, digs, confidence=None, high_Z='pos',
self.KS = kolmogorov_smirnov(df, confidence=confidence, N=len(temp),
verbose=self.verbose)

if bhat_coeff:
self.bhat_coeff = _bhattacharyya_coefficient(
df.Found.values, df.Expected.values)

if bhat_dist:
self.bhat_dist = _bhattacharyya_distance_(
df.Found.values, df.Expected.values
)
df.Found.values, df.Expected.values)

if kl_diverg:
self.kl_diverg = _kullback_leibler_divergence_(
df.Found.values, df.Expected.values
)
df.Found.values, df.Expected.values)

# Plotting the expected frequncies (line) against the found ones(bars)
if show_plot:
Expand All @@ -785,9 +793,8 @@ def first_digits(self, digs, confidence=None, high_Z='pos',

def second_digit(self, confidence=None, high_Z='pos',
limit_N=None, MAD=False, MSE=False, chi_square=False,
KS=False, bhat_dist=False, kl_diverg=False,
show_plot=True, save_plot=None,
save_plot_kwargs=None,
KS=False, bhat_coeff=False, bhat_dist=False, kl_diverg=False,
show_plot=True, save_plot=None, save_plot_kwargs=None,
simple=False, ret_df=False):
"""Performs the Benford Second Digit test with the series of
numbers provided.
Expand All @@ -811,8 +818,11 @@ def second_digit(self, confidence=None, high_Z='pos',
the Z scores if the sample is too big. Defaults to None.
MSE (bool): calculates the Mean Square Error of the sample; defaults to
False.
bhat_dist (bool): calculates the Bhattacharrya Distance between
foudn and the expected (Benford) digits distribution; defaults
bhat_coeff (bool): computes the Bhattacharyya Coefficient between
the found and the expected (Benford) digits distribution; defaults
to Fasle
bhat_dist (bool): calculates the Bhattacharyya Distance between
the found and the expected (Benford) digits distribution; defaults
to Fasle
kl_diverg (bool): calculates the Kulback-Laibler Divergence between
the found and the expected (Benford) digits distribution;
Expand Down Expand Up @@ -871,7 +881,11 @@ def second_digit(self, confidence=None, high_Z='pos',
# KS test
if KS:
self.KS = kolmogorov_smirnov(df, confidence=confidence, N=len(temp),

verbose=self.verbose)
if bhat_coeff:
self.bhat_coeff = _bhattacharyya_coefficient(
df.Found.values, df.Expected.values)

if bhat_dist:
self.bhat_dist = _bhattacharyya_distance_(
Expand All @@ -893,7 +907,7 @@ def second_digit(self, confidence=None, high_Z='pos',

def last_two_digits(self, confidence=None, high_Z='pos',
limit_N=None, MAD=False, MSE=False, chi_square=False,
KS=False, bhat_dist=False, kl_diverg=False,
KS=False, bhat_coeff=False, bhat_dist=False, kl_diverg=False,
show_plot=True, save_plot=None, save_plot_kwargs=None,
simple=False, ret_df=False):
"""Performs the Benford Last Two Digits test with the series of
Expand All @@ -918,8 +932,11 @@ def last_two_digits(self, confidence=None, high_Z='pos',
the Z scores if the sample is too big. Defaults to None.
MSE (bool): calculates the Mean Square Error of the sample; defaults to
False.
bhat_dist (bool): calculates the Bhattacharrya Distance between
foudn and the expected (Benford) digits distribution; defaults
bhat_coeff (bool): computes the Bhattacharyya Coefficient between
the found and the expected (Benford) digits distribution; defaults
to Fasle
bhat_dist (bool): calculates the Bhattacharyya Distance between
the found and the expected (Benford) digits distribution; defaults
to Fasle
kl_diverg (bool): calculates the Kulback-Laibler Divergence between
the found and the expected (Benford) digits distribution;
Expand Down Expand Up @@ -976,15 +993,17 @@ def last_two_digits(self, confidence=None, high_Z='pos',
self.KS = kolmogorov_smirnov(df, confidence=confidence, N=len(temp),
verbose=self.verbose)

if bhat_coeff:
self.bhat_coeff = _bhattacharyya_coefficient(
df.Found.values, df.Expected.values)

if bhat_dist:
self.bhat_dist = _bhattacharyya_distance_(
df.Found.values, df.Expected.values
)
df.Found.values, df.Expected.values)

if kl_diverg:
self.kl_diverg = _kullback_leibler_divergence_(
df.Found.values, df.Expected.values
)
df.Found.values, df.Expected.values)

# Plotting expected frequencies (line) versus found ones (bars)
if show_plot:
Expand Down
11 changes: 9 additions & 2 deletions benford/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,12 @@ def _report_summ_(test, high_diff):
print(test.sort_values('AbsDif', ascending=False))


def _report_bhattac_coeff_(bhattac_coeff):
"""
"""
print(f"Bhattacharyya Coefficient: {bhattac_coeff:6f}\n")


def _report_bhattac_dist_(bhattac_dist):
"""
"""
Expand All @@ -120,8 +126,9 @@ def _report_test_(test, high=None, crit_vals=None):
print('\n', f' {test.name} '.center(50, '#'), '\n')
if not 'Summation' in test.name:
_report_mad_(test.digs, test.MAD)
_report_bhattac_dist_(test._bhattacharyya_distance_)
_report_kl_diverg_(test._kullback_leibler_divergence_)
_report_bhattac_coeff_(test.bhattacharyya_coefficient)
_report_bhattac_dist_(test.bhattacharyya_distance)
_report_kl_diverg_(test.kullback_leibler_divergence)
if test.confidence is not None:
print(f"For confidence level {test.confidence}%: ")
_report_KS_(test.KS, crit_vals['KS'])
Expand Down

0 comments on commit 3bdaf46

Please sign in to comment.