Updated documentation for multiple methods

researchpy · Mar 10, 2022 · fde503e · fde503e
1 parent b605202
commit fde503e
Show file tree

Hide file tree

Showing 5 changed files with 79 additions and 120 deletions.
diff --git a/source/anova_documentation.rst b/source/anova_documentation.rst
@@ -188,3 +188,9 @@ If it's of interest, one can also access the underlying regression table.
 .. raw:: html
 
   <table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th>systolic</th>      <th>Coef.</th>      <th>Std. Err.</th>      <th>t</th>      <th>p-value</th>      <th>95% Conf. Interval</th>    </tr>  </thead>  <tbody>    <tr>      <td>Intercept</td>      <td>29.3333</td>      <td>4.2905</td>      <td>6.8367</td>      <td>0.0000</td>      <td>[20.6969, 37.9697]</td>    </tr>    <tr>      <td>drug</td>      <td></td>      <td></td>      <td></td>      <td></td>      <td></td>    </tr>    <tr>      <td>1</td>      <td>(reference)</td>      <td></td>      <td></td>      <td></td>      <td></td>    </tr>    <tr>      <td>2</td>      <td>-1.3333</td>      <td>6.3639</td>      <td>-0.2095</td>      <td>0.8350</td>      <td>[-14.1432, 11.4765]</td>    </tr>    <tr>      <td>3</td>      <td>-13.0000</td>      <td>7.4314</td>      <td>-1.7493</td>      <td>0.0869</td>      <td>[-27.9587, 1.9587]</td>    </tr>    <tr>      <td>4</td>      <td>-15.7333</td>      <td>6.3639</td>      <td>-2.4723</td>      <td>0.0172</td>      <td>[-28.5432, -2.9235]</td>    </tr>    <tr>      <td>disease</td>      <td></td>      <td></td>      <td></td>      <td></td>      <td></td>    </tr>    <tr>      <td>1</td>      <td>(reference)</td>      <td></td>      <td></td>      <td></td>      <td></td>    </tr>    <tr>      <td>2</td>      <td>-1.0833</td>      <td>6.7839</td>      <td>-0.1597</td>      <td>0.8738</td>      <td>[-14.7387, 12.572]</td>    </tr>    <tr>      <td>3</td>      <td>-8.9333</td>      <td>6.3639</td>      <td>-1.4038</td>      <td>0.1671</td>      <td>[-21.7432, 3.8765]</td>    </tr>    <tr>      <td>drug:disease</td>      <td></td>      <td></td>      <td></td>      <td></td>      <td></td>    </tr>    <tr>      <td>2:2</td>      <td>6.5833</td>      <td>9.7839</td>      <td>0.6729</td>      <td>0.5044</td>      <td>[-13.1107, 26.2774]</td>    </tr>    <tr>      <td>2:3</td>      <td>-0.9000</td>      <td>8.9999</td>      <td>-0.1000</td>      <td>0.9208</td>      <td>[-19.0159, 17.2159]</td>    </tr>    <tr>      <td>3:2</td>      <td>-10.8500</td>      <td>10.2435</td>      <td>-1.0592</td>      <td>0.2950</td>      <td>[-31.4692, 9.7692]</td>    </tr>    <tr>      <td>3:3</td>      <td>1.1000</td>      <td>10.2435</td>      <td>0.1074</td>      <td>0.9150</td>      <td>[-19.5192, 21.7192]</td>    </tr>    <tr>      <td>4:2</td>      <td>0.3167</td>      <td>9.3017</td>      <td>0.0340</td>      <td>0.9730</td>      <td>[-18.4066, 19.04]</td>    </tr>    <tr>      <td>4:3</td>      <td>9.5333</td>      <td>9.2022</td>      <td>1.0360</td>      <td>0.3056</td>      <td>[-8.9897, 28.0564]</td>    </tr>  </tbody></table>
+
+
+References
+==========
+.. bibliography:: refs.bib
+  :list: bullet
diff --git a/source/difference_test_documentation.rst b/source/difference_test_documentation.rst
@@ -72,16 +72,16 @@ Arguments
 .. note::
     This can be a one step, or two step process.
 
-    **One step**
-    .. code:: python
+**One step**
+.. code:: python
 
-        difference_test("DV ~ IV", data).conduct()
+    difference_test("DV ~ IV", data).conduct()
 
-    **Two step**
-    .. code:: python
+**Two step**
+.. code:: python
 
-        model = difference_test("DV ~ IV", data)
-        model.conduct()
+    model = difference_test("DV ~ IV", data)
+    model.conduct()
 
 
 
@@ -212,7 +212,10 @@ correlation coefficient r :cite:`Kerby2012` for the Wilcoxon ranked-sign test.
 
 
 Examples
---------
+========
+
+Loading Packages and Data
+-------------------------
 First let's create an example data set to work through the examples. This will be done using
 numpy (to create fake data) and pandas (to hold the data in a data frame).
 
@@ -267,16 +270,19 @@ will be shown below.
 Now the data is in the correct structure.
 
 
+Independent Samples t-test
+--------------------------
+
 .. code:: python
 
     # Independent t-test
 
     # If you don't store the 2 returned DataFrames, it outputs as a tuple and
     # is displayed
     rp.difference_test("StressReactivity ~ C(Exercise)",
-                    data = df2,
-                    equal_variances = True,
-                    independent_samples = True).conduct(effect_size = "all")
+                       data = df2,
+                       equal_variances = True,
+                       independent_samples = True).conduct(effect_size = "all")
 
 .. parsed-literal::
 
@@ -302,9 +308,9 @@ Now the data is in the correct structure.
 
     # Otherwise you can store them as objects
     summary, results = rp.difference_test("StressReactivity ~ C(Exercise)",
-                                       data = df2,
-                                       equal_variances = True,
-                                       independent_samples = True).conduct(effect_size = "all")
+                                          data = df2,
+                                          equal_variances = True,
+                                          independent_samples = True).conduct(effect_size = "all")
 
     summary
 
@@ -339,13 +345,16 @@ Now the data is in the correct structure.
 
 
 
+Paired Samples t-test
+---------------------
+
 .. code:: python
 
     # Paired samples t-test
     summary, results = rp.difference_test("StressReactivity ~ C(Exercise)",
-                                       data = df2,
-                                       equal_variances = True,
-                                       independent_samples = False).conduct(effect_size = "all")
+                                          data = df2,
+                                          equal_variances = True,
+                                          independent_samples = False).conduct(effect_size = "all")
 
     summary
 
@@ -378,6 +387,10 @@ Now the data is in the correct structure.
     10          Point-Biserial r   0.105763
 
 
+Welch's t-test
+--------------
+One can request either the Satterthwaite (default) or Welch degrees of freedom; to
+calculate degrees of freedom using Welch's formula set  welch_dof = "welch"
 
 .. code:: python
 
@@ -420,47 +433,27 @@ Now the data is in the correct structure.
 
 
 
+Wilcoxon signed-rank Test
+--------------------------
 .. code:: python
 
     # Wilcoxon signed-rank test
-    summary, results = rp.difference_test("StressReactivity ~ C(Exercise)",
-                                       data = df2,
-                                       equal_variances = False,
-                                       independent_samples = False).conduct(effect_size = "r")
+    desc, var_adj, res = difference_test("StressReactivity ~ C(Exercise)", df2, independent_samples=False, equal_variances = False).conduct()
 
-    summary
+    print(desc, var_adj, res, sep = "\n"*2)
 
-.. parsed-literal::
-
-      Name    N  Mean Variance       SD        SE  95% Conf.  Interval
-    0   No  100  4.59  7.55747  2.74909  0.274909   4.044522  5.135478
-    1  Yes  100  4.16  9.81253   3.1325  0.313250   3.538445  4.781555
-
-.. code:: python
-
-    results
+.. raw:: html
 
-.. parsed-literal::
-
-      Wilcoxon signed-rank test   Results
-    0                (No = Yes)
-    1                       W =    1849.5
-    2       Two sided p-value =  0.333755
-    3          Point-Biserial r  0.366238
-
-
-.. code:: python
+    <table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th>sign</th>      <th>obs</th>      <th>sum ranks</th>      <th>expected</th>    </tr>  </thead>  <tbody>    <tr>      <td>positive</td>      <td>40</td>      <td>2,298.0000</td>      <td>2,502.5000</td>    </tr>    <tr>      <td>negative</td>      <td>51</td>      <td>2,707.0000</td>      <td>2,502.5000</td>    </tr>    <tr>      <td>zero</td>      <td>9</td>      <td>45.0000</td>      <td>45.0000</td>    </tr>    <tr>      <td>all</td>      <td>100</td>      <td>5,050.0000</td>      <td>5,050.0000</td>    </tr>  </tbody></table>
 
-    # Exporting descriptive table (summary) and result table (results) to same
-    # csv file
-    summary.to_csv("C:\\Users\\...\\test.csv", index= False)
-    results.to_csv("C:\\Users\\...\\test.csv", index= False, mode= 'a')
+    <table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th>unadjusted variance</th>      <th>adjustment for ties</th>      <th>adjustment for zeros</th>      <th>adjusted variance</th>    </tr>  </thead>  <tbody>    <tr>      <td>84,587.5000</td>      <td>-375.1250</td>      <td>-71.2500</td>      <td>84,141.1250</td>    </tr>  </tbody></table>
 
+    <table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th>z</th>      <th>w</th>      <th>pval</th>    </tr>  </thead>  <tbody>    <tr>      <td>-0.7050</td>      <td>2,298.0000</td>      <td>0.4808</td>    </tr>  </tbody></table>
 
 
 
 
 References
-----------
+==========
 .. bibliography:: refs.bib
    :list: bullet
diff --git a/source/signrank_documentation.rst b/source/signrank_documentation.rst
@@ -21,15 +21,15 @@ Input
 -----
 **signrank(formula_like = None, data = {}, group1 = None, group2 = None, zero_method = "pratt", correction = False, mode = "auto")**
 
-  * **formula_like** : A valid formula which will parse the data into a design matrix.
+  * **formula_like** : A valid `formula <https://patsy.readthedocs.io/en/latest/formulas.html>`_ which will parse the data into a design matrix.
   * **data** : The dataframe which contains the data to be analyzed; required if using *formula_like*.
   * **group1** : The array like object which contains data for the paired-sample.
   * **group2** : The array like object which contains data for the paired-sample.
-  * **zero_method** : How to handle the zero-differences in the ranking process. Available options are (from :cite:`scipy_wilcoxon`):
+  * **zero_method** : How to handle the zero-differences in the ranking process. Available options are (see `scipy.stats.wilcoxon <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wilcoxon.html#scipy.stats.wilcoxon>`_):
     * *"pratt"* : Includes zero-differences in the ranking process, but drops the ranks of the zeros (default).
     * *"wilcox"* : Discards all zero-differences.
-  * **correction** : Boolean value indicating if the continuity correction should be applied; see :cite:`scipy_wilcoxon` for more information.
-  * **mode** : Method to calculate the p-value, see :cite:`scipy_wilcoxon` for more information. Options are:
+  * **correction** : Boolean value indicating if the continuity correction should be applied; see `scipy.stats.wilcoxon <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wilcoxon.html#scipy.stats.wilcoxon>`_ for more information.
+  * **mode** : Method to calculate the p-value, see `scipy.stats.wilcoxon <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wilcoxon.html#scipy.stats.wilcoxon>`_ for more information. Options are:
     * *"auto"* : Use the exact distribution if there are no more than 25 observations and no ties, otherwise a normal approximation will be used (default).
     * *"exact"* : Use the exact distribution, can be used if there are no more than 25 observations and no ties.
     * *"approx"* : Use a normal approximation.
@@ -87,13 +87,13 @@ called 'fuel'.
 .. code:: python
 
  import researchpy as rp
-  import pandas as pd
-  # Used to load example data #
-  import statsmodels.datasets
+ import pandas as pd
+ # Used to load example data #
+ import statsmodels.datasets
 
-  fuel = statsmodels.datasets.webuse('fuel')
-  fuel["id"] = range(1, fuel.shape[0] + 1)
-  fuel.info()
+ fuel = statsmodels.datasets.webuse('fuel')
+ fuel["id"] = range(1, fuel.shape[0] + 1)
+ fuel.info()
 
 .. raw:: html
 
@@ -110,7 +110,7 @@ the data ready for this demonstration section the transformation will be conduct
                        value_vars = ["mpg1", "mpg2"],
                        var_name = "mpg")
 
-   fuel2.head()
+  fuel2.head()
 
 .. raw:: html
 
@@ -127,7 +127,7 @@ it makes the output look cleaner.
 
   desc, var_adj, res = signrank(group1 = fuel.mpg1, group2 = fuel.mpg2).conduct()
 
-   print(desc, var_adj, res, sep = "\n"*2)
+  print(desc, var_adj, res, sep = "\n"*2)
 
 .. raw:: html
 
@@ -165,7 +165,7 @@ Signrank using Long Structured Datasets
 
     desc, var_adj, res = signrank("value ~ C(mpg)", fuel2).conduct()
 
-     print(desc, var_adj, res, sep = "\n"*2)
+    print(desc, var_adj, res, sep = "\n"*2)
 
 .. raw:: html
 
@@ -174,3 +174,11 @@ Signrank using Long Structured Datasets
        <table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th>unadjusted variance</th>      <th>adjustment for ties</th>      <th>adjustment for zeros</th>      <th>adjusted variance</th>    </tr>  </thead>  <tbody>    <tr>      <td>162.5000</td>      <td>-1.6250</td>      <td>-0.2500</td>      <td>160.6250</td>    </tr>  </tbody></table>
 
        <table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th>z</th>      <th>w</th>      <th>pval</th>    </tr>  </thead>  <tbody>    <tr>      <td>-1.9726</td>      <td>13.5000</td>      <td>0.0485</td>    </tr>  </tbody></table>
+
+
+
+
+References
+==========
+.. bibliography:: refs.bib
+  :list: bullet
diff --git a/source/summarize_documentation.rst b/source/summarize_documentation.rst
@@ -56,12 +56,12 @@ called 'auto'.
 .. code:: python
 
  import researchpy as rp
-  import pandas as pd
-  # Used to load example data #
-  import statsmodels.datasets
+ import pandas as pd
+ # Used to load example data #
+ import statsmodels.datasets
 
-  auto = statsmodels.datasets.webuse('auto')
-  auto.info()
+ auto = statsmodels.datasets.webuse('auto')
+ auto.info()
 
 
 .. parsed-literal::

diff --git a/source/ttest_documentation.rst b/source/ttest_documentation.rst
@@ -193,6 +193,9 @@ calculated as:
 
 Examples
 ========
+
+Loading Packages and Data
+-------------------------
 .. code:: python
 
     import numpy, pandas, researchpy
@@ -636,72 +639,21 @@ Wilcoxon Signed-Rank Test
 .. code:: python
 
     # Wilcoxon signed-rank test
-    researchpy.ttest(df['healthy'], df['non-healthy'],
-                     equal_variances= False, paired= True)
+    desc, res = researchpy.ttest(df['healthy'], df['non-healthy'],
+                                 equal_variances= False, paired= True)
 
 .. raw:: html
 
-    <div>
-    <table border="1" class="dataframe">
-      <thead>
-        <tr style="text-align: right;">
-          <th></th>
-          <th>Wilcoxon signed-rank test</th>
-          <th>results</th>
-        </tr>
-      </thead>
-      <tbody>
-        <tr>
-          <th>0</th>
-          <td>Mean for healthy =</td>
-          <td>4.5900</td>
-        </tr>
-        <tr>
-          <th>1</th>
-          <td>Mean for non-healthy =</td>
-          <td>4.1600</td>
-        </tr>
-        <tr>
-          <th>2</th>
-          <td>T value =</td>
-          <td>1849.5000</td>
-        </tr>
-        <tr>
-          <th>3</th>
-          <td>Z value =</td>
-          <td>-0.9638</td>
-        </tr>
-        <tr>
-          <th>4</th>
-          <td>Two sided p value =</td>
-          <td>0.3347</td>
-        </tr>
-        <tr>
-          <th>5</th>
-          <td>r =</td>
-          <td>-0.0681</td>
-        </tr>
-      </tbody>
-    </table>
-    </div>
-
-
+    <table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th>sign</th>      <th>obs</th>      <th>sum ranks</th>      <th>expected</th>    </tr>  </thead>  <tbody>    <tr>      <td>positive</td>      <td>52</td>      <td>2,804.5000</td>      <td>2,502.5000</td>    </tr>    <tr>      <td>negative</td>      <td>39</td>      <td>2,200.5000</td>      <td>2,502.5000</td>    </tr>    <tr>      <td>zero</td>      <td>9</td>      <td>45.0000</td>      <td>45.0000</td>    </tr>    <tr>      <td>all</td>      <td>100</td>      <td>5,050.0000</td>      <td>5,050.0000</td>    </tr>  </tbody></table>
 
-.. code:: python
-
-    # Exporting descriptive table (des) and result table (res) to same
-    # csv file
-    des, res = researchpy.ttest(df['healthy'], df['non-healthy'])
-
-    des.to_csv("C:\\Users\\...\\test.csv", index= False)
-    res.to_csv("C:\\Users\\...\\test.csv", index= False, mode= 'a')
+    <table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th>Wilcoxon signed-rank test</th>      <th>results</th>    </tr>  </thead>  <tbody>    <tr>      <td>Mean for healthy =</td>      <td>4.5900</td>    </tr>    <tr>      <td>Mean for non-healthy =</td>      <td>4.1600</td>    </tr>    <tr>      <td>W value =</td>      <td>2,200.5000</td>    </tr>    <tr>      <td>Z value =</td>      <td>1.0411</td>    </tr>    <tr>      <td>p value =</td>      <td>0.2978</td>    </tr>    <tr>      <td>Rank-Biserial r =</td>      <td>0.1196</td>    </tr>    <tr>      <td>Pearson r =</td>      <td>0.1041</td>    </tr>  </tbody></table>
 
 
 
 
 
 References
-----------
+==========
 .. bibliography:: refs.bib
    :cited:
    :list: bullet