Skip to content

Commit

Permalink
Merge branch 'feature/non_sample_size_tests' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
milcent committed Apr 14, 2021
2 parents 99a0f9b + 5175508 commit c9022ca
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 2 deletions.
6 changes: 5 additions & 1 deletion benford/benford.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from .reports import _inform_, _report_mad_, _report_test_, _deprecate_inform_,\
_report_mantissa_
from .stats import Z_score, chi_sq, chi_sq_2, kolmogorov_smirnov,\
kolmogorov_smirnov_2
kolmogorov_smirnov_2, bhattacharyya_distance, kullback_leibler_divergence


class Base(DataFrame):
Expand Down Expand Up @@ -129,6 +129,10 @@ def __init__(self, base, digs, confidence, limit_N=None, sec_order=False):
self.KS = kolmogorov_smirnov_2(self)
self.MAD = self.AbsDif.mean()
self.MSE = (self.AbsDif ** 2).mean()
self.bhattacharyya_distance = bhattacharyya_distance(
self.Found.values, self.Expected.values)
self.kullback_leibler_divergence = kullback_leibler_divergence(
self.Found.values, self.Expected.values)
self.confidence = confidence
self.digs = digs
self.sec_order = sec_order
Expand Down
14 changes: 14 additions & 0 deletions benford/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,18 @@ def _report_summ_(test, high_diff):
print(test.sort_values('AbsDif', ascending=False))


def _report_bhattac_dist_(bhattac_dist):
"""
"""
print(f"The Bhattacharyya Distance is: {bhattac_dist:6f}\n")


def _report_kl_diverg_(kl_diverg):
"""
"""
print(f"The Kullback-Leibler Divergence is: {kl_diverg:6f}\n")


def _report_test_(test, high=None, crit_vals=None):
"""Main report function. Receives the Args: to report with, initiates
the process, and calls the right reporting helper function(s), depending
Expand All @@ -108,6 +120,8 @@ def _report_test_(test, high=None, crit_vals=None):
print('\n', f' {test.name} '.center(50, '#'), '\n')
if not 'Summation' in test.name:
_report_mad_(test.digs, test.MAD)
_report_bhattac_dist_(test.bhattacharyya_distance)
_report_kl_diverg_(test.kullback_leibler_divergence)
if test.confidence is not None:
print(f"For confidence level {test.confidence}%: ")
_report_KS_(test.KS, crit_vals['KS'])
Expand Down
49 changes: 48 additions & 1 deletion benford/stats.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from numpy import sqrt
from numpy import sqrt, log, where
from .constants import crit_chi2, KS_crit, mad_dict, digs_dict


Expand Down Expand Up @@ -159,3 +159,50 @@ def mse(frame, verbose=True):
print(f"\nMean Square Error = {mse}")

return mse

def _bhattacharyya_coefficient(dist_1, dist_2):
"""Computes the Bhattacharyya Coeficient between two probability
distributions, to be letar used to compute the Bhattacharyya Distance
Args:
dist_1 (np.array): The newly gathered distribution, to be compared
with an older / established distribution.
dist_2 (np.array): The older/ establhished distribution with which
the new one will be compared.
Returns:
bhat_coef (float)
"""
return sqrt(dist_1 * dist_2).sum()


def bhattacharyya_distance(dist_1, dist_2):
"""Computes the Bhattacharyya Dsitance between two probability
distributions
Args:
dist_1 (np.array): The newly gathered distribution, to be compared
with an older / established distribution.
dist_2 (np.array): The older/ establhished distribution with which
the new one will be compared.
Returns:
bhat_dist (float)
"""
return -log(_bhattacharyya_coefficient(dist_1, dist_2))


def kullback_leibler_divergence(dist_1, dist_2):
"""Computes the Kullback-Leibler Divergence between two probability
distributions.
Args:
dist_1 (np.array): The newly gathered distribution, to be compared
with an older / established distribution.
dist_2 (np.array): The older/ establhished distribution with which
the new one will be compared.
Returns:
kulb_leib_diverg (float)
"""
return (log((dist_1 / dist_2), where=(dist_1 != 0)) * dist_1).sum()

0 comments on commit c9022ca

Please sign in to comment.