Skip to content

Commit

Permalink
Fixes in Google python docstring + sphinx conf.py + apidoc + make html
Browse files Browse the repository at this point in the history
  • Loading branch information
milcent committed Jan 30, 2020
1 parent b5485ba commit 0c502cf
Show file tree
Hide file tree
Showing 27 changed files with 6,498 additions and 608 deletions.
137 changes: 49 additions & 88 deletions benford/benford.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
All logarithms ar in base 10: "log10"
Copyright (C) 2014 Marcel Milcent
Author: Marcel Milcent
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -121,6 +121,17 @@ class Test(DataFrame):
plotting and to limit the top deviations to show.
limit_N: sets a limit to N as the sample size for the calculation of
the Z scores if the sample is too big. Defaults to None.
Attributes:
N: Number of records in the sample to consider in computations
ddf: Degrees of Freedom to look up for the critical chi-square value
chi_square: Chi-square statistic for the given test
KS: Kolmogorov-Smirnov statistic for the given test
MAD: Mean Absolute Deviation for the given test
confidence: Confidence level to consider when setting some critical values
digs (int): numerical representation of the test at hand. 1: F1D; 2: F2D;
3: F3D; 22: SD; -2: L2D.
sec_order (bool): True if the test is a Second Order one
"""

def __init__(self, base, digs, confidence, limit_N=None, sec_order=False):
Expand All @@ -134,22 +145,14 @@ def __init__(self, base, digs, confidence, limit_N=None, sec_order=False):
# create column with absolute differences
self['Dif'] = self.Found - self.Expected
self['AbsDif'] = self.Dif.abs()
#: Number of records in the sample to consider in computations
self.N = _set_N_(len(base), limit_N)
self['Z_score'] = Z_score(self, self.N)
#: Degrees of Freedom to look up for the critical chi-square value
self.ddf = len(self) - 1
#: Chi-square statistic for the given test
self.chi_square = chi_square_2(self)
#: Kolmogorov-Smirnov statistic for the given test
self.KS = KS_2(self)
#: Mean Absolute Deviation for the given test
self.MAD = self.AbsDif.mean()
#: Confidence level to consider when setting some critical values
self.confidence = confidence
# (int): numerical representation of the test at hand
self.digs = digs
# (bool): True if the test is a Secnd Order one
self.sec_order = sec_order

if sec_order:
Expand All @@ -175,7 +178,7 @@ def update_confidence(self, new_conf, check=True):
@property
def critical_values(self):
"""dict: a dictionary with the critical values for the test at hand,
according to the current confidence level."""
according to the current confidence level."""
return {'Z': confs[self.confidence],
'KS': KS_crit[self.confidence] / (self.N ** 0.5),
'chi2': crit_chi2[self.ddf][self.confidence],
Expand Down Expand Up @@ -890,7 +893,7 @@ def duplicates(self, top_Rep=20, inform=None):
Args:
verbose: tells how many duplicated entries were found and prints the
top numbers according to the top_Rep parameter. Defaluts to True.
top numbers according to the top_Rep argument. Defaluts to True.
top_Rep: int or None. Chooses how many duplicated entries will be
shown withe the top repititions. Defaluts to 20. If None, returns
al the ordered repetitions.
Expand All @@ -904,7 +907,7 @@ def duplicates(self, top_Rep=20, inform=None):
ValueError: if the `top_Rep` arg is not int or None.
"""
if top_Rep is not None and not isinstance(top_Rep, int):
raise ValueError('The top_Rep parameter must be an int or None.')
raise ValueError('The top_Rep argument must be an int or None.')

dup = self[['Seq']][self.Seq.duplicated(keep=False)]
dup_count = dup.groupby(self.Seq).count()
Expand All @@ -925,14 +928,17 @@ def duplicates(self, top_Rep=20, inform=None):


class Mantissas(object):
'''
"""
Returns a Series with the data mantissas,
Parameters
----------
data: sequence to compute mantissas from, numpy 1D array, pandas
Series of pandas DataFrame column.
'''
Args:
data: sequence to compute mantissas from, numpy 1D array, pandas
Series of pandas DataFrame column.
Attributes:
data (DataFrame): holds the computed mantissas and, if the arc_test
is also called, the respecttive x and Y coordinates for the plot.
stats (dict): holds the relevant statistics about the data mantissas.
"""

def __init__(self, data):

Expand All @@ -947,14 +953,12 @@ def __init__(self, data):
'Kurt': self.data.Mantissa.kurt()}

def report(self, show_plot=True):
'''
Displays the Mantissas stats.
Paranmeters:
-----------
show_plot: shows the ordered mantissas plot and the Arc Test plot.
Defaults to True.
'''
"""Displays the Mantissas stats.
Args:
show_plot: shows the ordered mantissas plot and the Arc Test plot.
Defaults to True.
"""
print("\n", ' Mantissas Test '.center(52, '#'))
print(f"\nThe Mantissas MEAN is {self.stats['Mean']:.6f}."
"\tRef: 0.5")
Expand All @@ -969,75 +973,32 @@ def report(self, show_plot=True):
self.arc_test()

def show_plot(self, figsize=(12, 12)):
'''
plots the ordered mantissas and a line with the expected
inclination. Defaults to True.
Parameters
----------
figsize -> tuple that sets the figure size
'''
ld = len(self.data)
x = arange(1, ld + 1)
n = ones(ld) / ld
fig = plt.figure(figsize=figsize)
ax = fig.add_subplot(111)
ax.plot(x, self.data.Mantissa.sort_values(), linestyle='--',
color=colors['s'], linewidth=3, label='Mantissas')
ax.plot(x, n.cumsum(), color=colors['m'],
linewidth=2, label='Expected')
plt.ylim((0, 1.))
plt.xlim((1, ld + 1))
ax.set_facecolor(colors['b'])
ax.set_title("Ordered Mantissas")
plt.legend(loc='upper left')
plt.show(block=False);
"""Plots the ordered mantissas and compares them to the expected, straight
line that should be formed in a Benford-cmpliant set.
def arc_test(self, decimals=2, grid=True, figsize=12):
'''
Args:
figsize: tuple that sets the figure size.
"""
plot_ordered_mantissas(self.data.Mantissa, figsize=figsize)

def arc_test(self, grid=True, figsize=12):
"""
Add two columns to Mantissas's DataFrame equal to their "X" and "Y"
coordinates, plots its to a scatter plot and calculates the gravity
center of the circle.
Parameters
----------
decimals -> number of decimal places for displaying the gravity center.
Defaults to 2.
grid -> show grid of the plot. Defaluts to True.
figsize -> size of the figure to be displayed. Since it is a square,
there is no need to provide a tuple, like is usually the case with
matplotlib.
'''
Args:
grid:show grid of the plot. Defaluts to True.
figsize: size of the figure to be displayed. Since it is a square,
there is no need to provide a tuple, like is usually the case with
matplotlib.
"""
if self.stats.get('gravity_center') is None:
self.data['mant_x'] = cos(2 * pi * self.data.Mantissa)
self.data['mant_y'] = sin(2 * pi * self.data.Mantissa)
self.stats['gravity_center'] = (self.data.mant_x.mean(),
self.data.mant_y.mean())
fig = plt.figure(figsize=(figsize,figsize))
ax = plt.subplot()
ax.set_facecolor(colors['b'])
ax.scatter(self.data.mant_x, self.data.mant_y, label= "ARC TEST",
color=colors['m'])
ax.scatter(self.stats['gravity_center'][0], self.stats['gravity_center'][1],
color=colors['s'])
text_annotation = Annotation(
" Gravity Center: "
f"x({round(self.stats['gravity_center'][0], decimals)}),"
f" y({round(self.stats['gravity_center'][1], decimals)})",
xy=(self.stats['gravity_center'][0] - 0.65,
self.stats['gravity_center'][1] - 0.1),
xycoords='data')
ax.add_artist(text_annotation)
ax.grid(True, which='both')
ax.axhline(y=0, color='k')
ax.axvline(x=0, color='k')
ax.legend(loc = 'lower left')
ax.set_title("Mantissas Arc Test")
plt.show(block=False);
plot_mantissa_arc_test(self, stats['gravity_center'], figsize=figsize)


class Roll_mad(Series):
Expand Down Expand Up @@ -1081,7 +1042,7 @@ def show_plot(self, figsize=(15, 8)):
"""Shows the rolling MAD plot
Args:
figsize: the figure dimensions .
figsize: the figure dimensions.
"""
fig, ax = plt.subplots(figsize=figsize)
ax.set_facecolor(colors['b'])
Expand Down Expand Up @@ -1556,7 +1517,7 @@ def duplicates(data, top_Rep=20, verbose=True, inform=None):
data: sequence to take the duplicates from. pandas Series or
numpy Ndarray.
verbose: tells how many duplicated entries were found and prints the
top numbers according to the top_Rep parameter. Defaluts to True.
top numbers according to the top_Rep argument. Defaluts to True.
top_Rep: chooses how many duplicated entries will be
shown withe the top repititions. int or None. Defaluts to 20.
If None, returns al the ordered repetitions.
Expand All @@ -1570,7 +1531,7 @@ def duplicates(data, top_Rep=20, verbose=True, inform=None):
verbose = _deprecate_inform_(verbose, inform)

if top_Rep is not None and not isinstance(top_Rep, int):
raise ValueError('The top_Rep parameter must be an int or None.')
raise ValueError('The top_Rep argument must be an int or None.')

if not isinstance(data, Series):
try:
Expand Down
2 changes: 1 addition & 1 deletion benford/expected.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

class First(DataFrame):
"""Holds the expected probabilities of the First, First Two, or
First Three digits according to Benford's distribution.
First Three digits according to Benford's distribution.
Args:
digs: 1, 2 or 3 - tells which of the first digits to consider:
Expand Down
6 changes: 2 additions & 4 deletions benford/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ def chi_square(frame, ddf, confidence, verbose=True):
Returns:
The computed Chi square statistic and the critical chi square
(according) to the degrees of freedom and confidence level,
for comparison
None if confidence is None
for comparison. None if confidence is None
"""
if confidence is None:
print('\nChi-square test needs confidence other than None.')
Expand Down Expand Up @@ -152,8 +151,7 @@ def mse(frame, verbose=True):
verbose: Prints the MSE. Defaults to True.
Returns:
Mean of the squared differences between the found and the expected
proportions.
Mean of the squared differences between the found and the expected proportions.
"""
mse = (frame.AbsDif ** 2).mean()

Expand Down
2 changes: 1 addition & 1 deletion benford/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def _set_N_(len_df, limit_N):


def get_mantissas(arr):
"""Computes the mantissas, the non-integer part of the log of a number.
"""Computes the mantissas, the non-integer part of the log of a number.
Args:
arr: array of integers or floats
Expand Down
31 changes: 21 additions & 10 deletions benford/viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,12 @@ def plot_sum(df, figsize, li, text_x=False):
plt.show(block=False)

def plot_ordered_mantissas(col, figsize=(12, 12)):
"""
"""Plots the ordered mantissas and compares them to the expected, straight
line that should be formed in a Benford-cmpliant set.
Args:
col: column of mantissas to plot
figsize: sets the dimensions of the plot figure
col (Series): column of mantissas to plot.
figsize (tuple): sets the dimensions of the plot figure.
"""
ld = len(col)
x = arange(1, ld + 1)
Expand All @@ -160,21 +161,31 @@ def plot_ordered_mantissas(col, figsize=(12, 12)):
plt.legend(loc='upper left')
plt.show(block=False);

def plot_mantissa_arc_test(df, stats, decimals=2, grid=True, figsize=12):
""""""
def plot_mantissa_arc_test(df, gravity_center, grid=True, figsize=12):
"""Draws thee Mantissa Arc Test after computing X and Y circular coordinates
for every mantissa and the center of gravity for the set
Args:
df (DataFrame): pandas DataFrame with the mantissas and the X and Y
coordinates.
gravity_center (tuple): coordinates for plottling the gravity center
grid (bool): show grid. Defaults to True.
figsize (int): figure dimensions. No need to be a tuple, since the
figure is a square.
"""
fig = plt.figure(figsize=(figsize,figsize))
ax = plt.subplot()
ax.set_facecolor(colors['b'])
ax.scatter(df.mant_x, df.mant_y, label= "ARC TEST",
color=colors['m'])
ax.scatter(stats['gravity_center'][0], stats['gravity_center'][1],
ax.scatter(gravity_center[0], gravity_center[1],
color=colors['s'])
text_annotation = Annotation(
" Gravity Center: "
f"x({round(stats['gravity_center'][0], decimals)}),"
f" y({round(stats['gravity_center'][1], decimals)})",
xy=(stats['gravity_center'][0] - 0.65,
stats['gravity_center'][1] - 0.1),
f"x({round(gravity_center[0], 3)}),"
f" y({round(gravity_center[1], 3)})",
xy=(gravity_center[0] - 0.65,
gravity_center[1] - 0.1),
xycoords='data')
ax.add_artist(text_annotation)
ax.grid(True, which='both')
Expand Down
Binary file modified docs/build/doctrees/benford.doctree
Binary file not shown.
Binary file modified docs/build/doctrees/environment.pickle
Binary file not shown.
Binary file modified docs/build/doctrees/index.doctree
Binary file not shown.
2 changes: 1 addition & 1 deletion docs/build/html/.buildinfo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config: 5f4e07e2ec5552602c9236d1467abd22
config: f32db999374507860c3ba14b0a3086ce
tags: 645f666f9bcd5a90fca523b33c5a78b7

0 comments on commit 0c502cf

Please sign in to comment.