Fixes in Google python docstring + sphinx conf.py + apidoc + make html

milcent · Jan 30, 2020 · 0c502cf · 0c502cf
1 parent b5485ba
commit 0c502cf
Show file tree

Hide file tree

Showing 27 changed files with 6,498 additions and 608 deletions.
diff --git a/benford/benford.py b/benford/benford.py
@@ -6,7 +6,7 @@
 
 All logarithms ar in base 10: "log10"
 
-Copyright (C) 2014  Marcel Milcent
+Author:  Marcel Milcent
 
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -121,6 +121,17 @@ class Test(DataFrame):
             plotting and to limit the top deviations to show.
         limit_N: sets a limit to N as the sample size for the calculation of
                 the Z scores if the sample is too big. Defaults to None.
+    
+    Attributes:
+        N: Number of records in the sample to consider in computations
+        ddf: Degrees of Freedom to look up for the critical chi-square value
+        chi_square: Chi-square statistic for the given test
+        KS: Kolmogorov-Smirnov statistic for the given test
+        MAD: Mean Absolute Deviation for the given test
+        confidence: Confidence level to consider when setting some critical values
+        digs (int): numerical representation of the test at hand. 1: F1D; 2: F2D;
+            3: F3D; 22: SD; -2: L2D.
+        sec_order (bool): True if the test is a Second Order one 
     """
 
     def __init__(self, base, digs, confidence, limit_N=None, sec_order=False):
@@ -134,22 +145,14 @@ def __init__(self, base, digs, confidence, limit_N=None, sec_order=False):
         # create column with absolute differences
         self['Dif'] = self.Found - self.Expected
         self['AbsDif'] = self.Dif.abs()
-        #: Number of records in the sample to consider in computations
         self.N = _set_N_(len(base), limit_N)
         self['Z_score'] = Z_score(self, self.N)
-        #: Degrees of Freedom to look up for the critical chi-square value
         self.ddf = len(self) - 1
-        #: Chi-square statistic for the given test
         self.chi_square = chi_square_2(self)
-        #: Kolmogorov-Smirnov statistic for the given test
         self.KS = KS_2(self)
-        #: Mean Absolute Deviation for the given test
         self.MAD = self.AbsDif.mean()
-        #: Confidence level to consider when setting some critical values
         self.confidence = confidence
-        # (int): numerical representation of the test at hand 
         self.digs = digs
-        # (bool): True if the test is a Secnd Order one 
         self.sec_order = sec_order
 
         if sec_order:
@@ -175,7 +178,7 @@ def update_confidence(self, new_conf, check=True):
     @property
     def critical_values(self):
         """dict: a dictionary with the critical values for the test at hand,
-        according to the current confidence level."""
+            according to the current confidence level."""
         return {'Z': confs[self.confidence],
                 'KS': KS_crit[self.confidence] / (self.N ** 0.5),
                 'chi2': crit_chi2[self.ddf][self.confidence],
@@ -890,7 +893,7 @@ def duplicates(self, top_Rep=20, inform=None):
 
         Args:
             verbose: tells how many duplicated entries were found and prints the
-                top numbers according to the top_Rep parameter. Defaluts to True.
+                top numbers according to the top_Rep argument. Defaluts to True.
             top_Rep: int or None. Chooses how many duplicated entries will be
                 shown withe the top repititions. Defaluts to 20. If None, returns
                 al the ordered repetitions.
@@ -904,7 +907,7 @@ def duplicates(self, top_Rep=20, inform=None):
             ValueError: if the `top_Rep` arg is not int or None.
         """
         if top_Rep is not None and not isinstance(top_Rep, int):
-            raise ValueError('The top_Rep parameter must be an int or None.')
+            raise ValueError('The top_Rep argument must be an int or None.')
 
         dup = self[['Seq']][self.Seq.duplicated(keep=False)]
         dup_count = dup.groupby(self.Seq).count()
@@ -925,14 +928,17 @@ def duplicates(self, top_Rep=20, inform=None):
 
 
 class Mantissas(object):
-    '''
+    """
     Returns a Series with the data mantissas,
 
-    Parameters
-    ----------
-    data: sequence to compute mantissas from, numpy 1D array, pandas
-        Series of pandas DataFrame column.
-    '''
+    Args:
+        data: sequence to compute mantissas from, numpy 1D array, pandas
+            Series of pandas DataFrame column.
+    Attributes:
+        data (DataFrame): holds the computed mantissas and, if the arc_test
+            is also called, the respecttive x and Y coordinates for the plot.
+        stats (dict): holds the relevant statistics about the data mantissas.
+    """
 
     def __init__(self, data):
 
@@ -947,14 +953,12 @@ def __init__(self, data):
                       'Kurt': self.data.Mantissa.kurt()}
 
     def report(self, show_plot=True):
-        '''
-        Displays the Mantissas stats.
-
-        Paranmeters:
-        -----------
-        show_plot: shows the ordered mantissas plot and the Arc Test plot.
-            Defaults to True.
-        '''
+        """Displays the Mantissas stats.
+
+        Args:
+            show_plot: shows the ordered mantissas plot and the Arc Test plot.
+                Defaults to True.
+        """
         print("\n", '  Mantissas Test  '.center(52, '#'))
         print(f"\nThe Mantissas MEAN is      {self.stats['Mean']:.6f}."
               "\tRef: 0.5")
@@ -969,75 +973,32 @@ def report(self, show_plot=True):
             self.arc_test()
 
     def show_plot(self, figsize=(12, 12)):
-        '''
-        plots the ordered mantissas and a line with the expected
-                inclination. Defaults to True.
-
-        Parameters
-        ----------
-
-        figsize -> tuple that sets the figure size
-        '''
-        ld = len(self.data)
-        x = arange(1, ld + 1)
-        n = ones(ld) / ld
-        fig = plt.figure(figsize=figsize)
-        ax = fig.add_subplot(111)
-        ax.plot(x, self.data.Mantissa.sort_values(), linestyle='--',
-                color=colors['s'], linewidth=3, label='Mantissas')
-        ax.plot(x, n.cumsum(), color=colors['m'],
-                linewidth=2, label='Expected')
-        plt.ylim((0, 1.))
-        plt.xlim((1, ld + 1))
-        ax.set_facecolor(colors['b'])
-        ax.set_title("Ordered Mantissas")
-        plt.legend(loc='upper left')
-        plt.show(block=False);
+        """Plots the ordered mantissas and compares them to the expected, straight
+        line that should be formed in a Benford-cmpliant set.
 
-    def arc_test(self, decimals=2, grid=True, figsize=12):
-        '''
+        Args:
+            figsize: tuple that sets the figure size.
+        """
+        plot_ordered_mantissas(self.data.Mantissa, figsize=figsize)
+
+    def arc_test(self, grid=True, figsize=12):
+        """
         Add two columns to Mantissas's DataFrame equal to their "X" and "Y"
         coordinates, plots its to a scatter plot and calculates the gravity
         center of the circle.
 
-        Parameters
-        ----------
-
-        decimals -> number of decimal places for displaying the gravity center.
-            Defaults to 2.
-        
-        grid -> show grid of the plot. Defaluts to True.
-        
-        figsize -> size of the figure to be displayed. Since it is a square,
-            there is no need to provide a tuple, like is usually the case with
-            matplotlib.
-        '''
+        Args:
+            grid:show grid of the plot. Defaluts to True.
+            figsize: size of the figure to be displayed. Since it is a square,
+                there is no need to provide a tuple, like is usually the case with
+                matplotlib.
+        """
         if self.stats.get('gravity_center') is None:
             self.data['mant_x'] = cos(2 * pi * self.data.Mantissa)
             self.data['mant_y'] = sin(2 * pi * self.data.Mantissa)
             self.stats['gravity_center'] = (self.data.mant_x.mean(),
                                             self.data.mant_y.mean())
-        fig = plt.figure(figsize=(figsize,figsize))
-        ax = plt.subplot()
-        ax.set_facecolor(colors['b'])
-        ax.scatter(self.data.mant_x, self.data.mant_y, label= "ARC TEST",
-                   color=colors['m'])
-        ax.scatter(self.stats['gravity_center'][0], self.stats['gravity_center'][1],
-                   color=colors['s']) 
-        text_annotation = Annotation(
-                    "  Gravity Center: "
-                    f"x({round(self.stats['gravity_center'][0], decimals)}),"
-                    f" y({round(self.stats['gravity_center'][1], decimals)})", 
-                    xy=(self.stats['gravity_center'][0] - 0.65,
-                        self.stats['gravity_center'][1] - 0.1),
-                    xycoords='data')
-        ax.add_artist(text_annotation)
-        ax.grid(True, which='both')
-        ax.axhline(y=0, color='k')
-        ax.axvline(x=0, color='k')
-        ax.legend(loc = 'lower left')
-        ax.set_title("Mantissas Arc Test")
-        plt.show(block=False);
+        plot_mantissa_arc_test(self, stats['gravity_center'], figsize=figsize)
 
 
 class Roll_mad(Series):
@@ -1081,7 +1042,7 @@ def show_plot(self, figsize=(15, 8)):
         """Shows the rolling MAD plot
         
         Args:
-            figsize: the figure dimensions .
+            figsize: the figure dimensions.
         """
         fig, ax = plt.subplots(figsize=figsize)
         ax.set_facecolor(colors['b'])
@@ -1556,7 +1517,7 @@ def duplicates(data, top_Rep=20, verbose=True, inform=None):
         data: sequence to take the duplicates from. pandas Series or
             numpy Ndarray.
         verbose: tells how many duplicated entries were found and prints the
-            top numbers according to the top_Rep parameter. Defaluts to True.
+            top numbers according to the top_Rep argument. Defaluts to True.
         top_Rep: chooses how many duplicated entries will be
             shown withe the top repititions. int or None. Defaluts to 20.
             If None, returns al the ordered repetitions.
@@ -1570,7 +1531,7 @@ def duplicates(data, top_Rep=20, verbose=True, inform=None):
     verbose = _deprecate_inform_(verbose, inform)
 
     if top_Rep is not None and not isinstance(top_Rep, int):
-        raise ValueError('The top_Rep parameter must be an int or None.')
+        raise ValueError('The top_Rep argument must be an int or None.')
 
     if not isinstance(data, Series):
         try:

diff --git a/benford/expected.py b/benford/expected.py
@@ -6,7 +6,7 @@
 
 class First(DataFrame):
     """Holds the expected probabilities of the First, First Two, or
-     First Three digits according to Benford's distribution.
+    First Three digits according to Benford's distribution.
 
     Args:
         digs: 1, 2 or 3 - tells which of the first digits to consider:

diff --git a/benford/stats.py b/benford/stats.py
@@ -32,8 +32,7 @@ def chi_square(frame, ddf, confidence, verbose=True):
     Returns:
         The computed Chi square statistic and the critical chi square
             (according) to the degrees of freedom and confidence level,
-            for comparison
-        None if confidence is None
+            for comparison. None if confidence is None
     """
     if confidence is None:
         print('\nChi-square test needs confidence other than None.')
@@ -152,8 +151,7 @@ def mse(frame, verbose=True):
         verbose: Prints the MSE. Defaults to True.
     
     Returns:
-        Mean of the squared differences between the found and the expected
-            proportions.
+        Mean of the squared differences between the found and the expected proportions.
     """
     mse = (frame.AbsDif ** 2).mean()
 

diff --git a/benford/utils.py b/benford/utils.py
@@ -19,7 +19,7 @@ def _set_N_(len_df, limit_N):
 
 
 def get_mantissas(arr):
-    """Computes the  mantissas, the non-integer part of the log of a number.
+    """Computes the mantissas, the non-integer part of the log of a number.
     
     Args:
         arr: array of integers or floats

diff --git a/benford/viz.py b/benford/viz.py
@@ -138,11 +138,12 @@ def plot_sum(df, figsize, li, text_x=False):
     plt.show(block=False)
 
 def plot_ordered_mantissas(col, figsize=(12, 12)):
-    """
+    """Plots the ordered mantissas and compares them to the expected, straight
+        line that should be formed in a Benford-cmpliant set.
     
     Args:
-        col: column of mantissas to plot
-        figsize: sets the dimensions of the plot figure
+        col (Series): column of mantissas to plot.
+        figsize (tuple): sets the dimensions of the plot figure.
     """
     ld = len(col)
     x = arange(1, ld + 1)
@@ -160,21 +161,31 @@ def plot_ordered_mantissas(col, figsize=(12, 12)):
     plt.legend(loc='upper left')
     plt.show(block=False);
 
-def plot_mantissa_arc_test(df, stats, decimals=2, grid=True, figsize=12):
-    """"""
+def plot_mantissa_arc_test(df, gravity_center, grid=True, figsize=12):
+    """Draws thee Mantissa Arc Test after computing X and Y circular coordinates
+    for every mantissa and the center of gravity for the set
+    
+    Args:
+        df (DataFrame): pandas DataFrame with the mantissas and the X and Y
+            coordinates.
+        gravity_center (tuple): coordinates for plottling the gravity center
+        grid (bool): show grid. Defaults to True.
+        figsize (int): figure dimensions. No need to be a tuple, since the
+            figure is a square.
+    """
     fig = plt.figure(figsize=(figsize,figsize))
     ax = plt.subplot()
     ax.set_facecolor(colors['b'])
     ax.scatter(df.mant_x, df.mant_y, label= "ARC TEST",
                 color=colors['m'])
-    ax.scatter(stats['gravity_center'][0], stats['gravity_center'][1],
+    ax.scatter(gravity_center[0], gravity_center[1],
                 color=colors['s']) 
     text_annotation = Annotation(
                 "  Gravity Center: "
-                f"x({round(stats['gravity_center'][0], decimals)}),"
-                f" y({round(stats['gravity_center'][1], decimals)})", 
-                xy=(stats['gravity_center'][0] - 0.65,
-                    stats['gravity_center'][1] - 0.1),
+                f"x({round(gravity_center[0], 3)}),"
+                f" y({round(gravity_center[1], 3)})", 
+                xy=(gravity_center[0] - 0.65,
+                    gravity_center[1] - 0.1),
                 xycoords='data')
     ax.add_artist(text_annotation)
     ax.grid(True, which='both')

diff --git a/docs/build/doctrees/benford.doctree b/docs/build/doctrees/benford.doctree
diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle
diff --git a/docs/build/doctrees/index.doctree b/docs/build/doctrees/index.doctree
diff --git a/docs/build/html/.buildinfo b/docs/build/html/.buildinfo
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 5f4e07e2ec5552602c9236d1467abd22
+config: f32db999374507860c3ba14b0a3086ce
 tags: 645f666f9bcd5a90fca523b33c5a78b7