From 2b6ed725b825e6e43fa8c827c0aebb050d80ee3e Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Fri, 30 Jun 2023 16:58:48 -0400
Subject: [PATCH 01/53] init autodiff implementation

---
 delicatessen/derivative.py | 319 +++++++++++++++++++++++++++++++++++++
 1 file changed, 319 insertions(+)
 create mode 100644 delicatessen/derivative.py

diff --git a/delicatessen/derivative.py b/delicatessen/derivative.py
new file mode 100644
index 0000000..1d14758
--- /dev/null
+++ b/delicatessen/derivative.py
@@ -0,0 +1,319 @@
+import numpy as np
+
+
+def auto_differentiation(xk, f):
+    """Forward mode automatic differentiation. Automatic differentiation offers a way to compute the derivative exactly,
+    rather than numerically approximated (unlike the central difference method). Automatic differentiation iteratively
+    applies the chain rule into order to evaluate the derivative.
+
+    Note
+    ----
+    This functionality is only intended for use behind the scenes in ``delicatessen``. I wrote this functionality to
+    avoid additional dependencies.
+
+    Parameters
+    ----------
+    xk: ndarray, list, shape (n, )
+        Point(s) or coordinate vector to evaluate the gradient at.
+    f: callable
+        Function of which to estimate the gradient of.
+
+    Returns
+    -------
+    ndarray
+
+    Examples
+    --------
+
+    References
+    ----------
+
+    """
+    def f_deny_bool(function, x):
+        """This internal function re-evaluates the input function with an additional operator. Namely zero is added
+        to the function. This causes no difference in the primal or tangent pair. The purpose of this function is to
+        work around a specific behavior of ``np.where(...)``.
+
+        The function ``np.where`` can a weird call order to the standard Python operators. It calls ``__bool__`` as the
+        final operation. This means that ``PrimalTangentPairs`` can only return a ``bool`` for ``np.where`` calls.
+        Consider the following example
+
+        .. code::
+
+            np.where(x[0] >= 1, x[1]**2, 0)
+
+
+        If ``x[0] >= 1`` then we should find the derivative of ``x[1]**2``. For ``np.where``, ``x[1]**2`` is evaluated
+        before ``x[0] >=1``. As a result of the boolean piece being the last part evaluated, resulting in an empty
+        array.
+
+        By adding zero, we ensure that the final operation applied is an addition, and thus an float is returned
+        after the boolean is evaluated. Essentially, we deny the call to ``__bool__`` being the last operation.
+        """
+        eval = function(x)                 # Short-name for function
+        eval_no_bool_end = []              # Empty list for storage
+        for e in eval:                     # For each evaluation in the function
+            eval_no_bool_end.append(e+0)   # ... adding zero in a loop
+        return eval_no_bool_end            # Return the evaluation with the final addition
+
+    # Meta-information about function and inputs
+    xshape = len(xk)                                                   # The number of inputs into the function
+
+    # Set up Dual objects for evaluating gradient of function
+    pairs_for_gradient = []                                            # Storage for the pairs to provide function
+    for i in range(xshape):                                            # For each of the inputs
+        partial = np.zeros_like(xk)                                    # ... generate array of that size of all zeroes
+        partial[i] = 1                                                 # ... replace 0 with 1 for current index
+        pairs_for_gradient.append(PrimalTangentPairs(xk[i], partial))  # ... then store as a primal,tangent pair
+    x_to_eval = np.asarray(pairs_for_gradient)                         # Convert from list to NumPy array
+
+    # Evaluating the function for the primal,tangent pairs
+    evaluated_pair = f_deny_bool(function=f,                           # Evaluate the primal,tangent pair with function
+                                 x=x_to_eval)                          # ... at the given values
+
+    # Processing function output into gradient value or matrix
+    evaluated_gradient = []                                            # List storage for the computed gradient
+    for pair in evaluated_pair:                                        # For each evaluated primal,tangent pair
+        if isinstance(pair, PrimalTangentPairs):                       # ... if that pair is the key type
+            evaluated_gradient.append(pair.tangent)                    # ... then give back the derivative
+        else:                                                          # ... otherwise row has no xk operations
+            evaluated_gradient.append([0 for j in range(xshape)])      # ... so derivative is always zero
+
+    # Return evaluated gradient as a NumPy array
+    if len(evaluated_gradient) == 1:                                   # If only consists of 1 item
+        return np.asarray(evaluated_gradient[0])                       # ... return that item as NumPy array
+    else:                                                              # Otherwise
+        return np.asarray(evaluated_gradient)                          # ... return is as an array
+
+
+class PrimalTangentPairs:
+    """Unique class object for automatic differentiation. This process divides the inputs into 'primal' and 'tangent'
+    pairs. Operations on the pairs are then recurvsively called.
+
+
+    Parameters
+    ----------
+    primal :
+        B
+    tangent :
+        A
+    """
+    def __init__(self, primal, tangent):
+        # Processing of the inputs
+        if isinstance(primal, PrimalTangentPairs):    # If given a PrimalTangentPair input
+            self.primal = primal.primal               # ... extract the primal element from input
+        else:                                         # Else
+            self.primal = primal                      # ... directly save as new primal
+        self.tangent = tangent                        # Store the tangent
+
+    # Basic operators
+
+    def __str__(self):
+        # Conversion to string just to in case it gets called somehow
+        return f"PrimalTangentPairs({self.primal}, {self.tangent})"
+
+    def __bool__(self):
+        # To get np.where working properly, I need to have the internal boolean function for this class return
+        #   only the primal part. This is a bool object type, which is what is expected and has np.where operate as
+        #   expected. This only seems to be called for np.where and not the other operators (they work directly).
+        return self.primal
+
+    # Equality operators
+
+    def __le__(self, other):
+        # Less than or equal to operator
+        if isinstance(other, PrimalTangentPairs):                  #
+            comparison = (self.primal <= other.primal)             #
+        else:                                                      #
+            comparison = (self.primal <= other)                    #
+        return PrimalTangentPairs(comparison, 0)                   #
+
+    def __lt__(self, other):
+        # Less than operator
+        if isinstance(other, PrimalTangentPairs):
+            comparison = (self.primal < other.primal)
+        else:
+            comparison = (self.primal < other)
+        return PrimalTangentPairs(comparison, 0)
+
+    def __ge__(self, other):
+        # Greater than or equal to operator
+        if isinstance(other, PrimalTangentPairs):
+            comparison = (self.primal >= other.primal)
+        else:
+            comparison = (self.primal >= other)
+        return PrimalTangentPairs(comparison, 0)
+
+    def __gt__(self, other):
+        # Greater than operator
+        if isinstance(other, PrimalTangentPairs):
+            comparison = (self.primal > other.primal)
+        else:
+            comparison = (self.primal > other)
+        return PrimalTangentPairs(comparison, 0)
+
+    def __eq__(self, other):
+        # Equality operator
+        if isinstance(other, PrimalTangentPairs):
+            comparison = (self.primal == other.primal)
+        else:
+            comparison = (self.primal == other)
+        return PrimalTangentPairs(comparison, 0)
+
+    def __ne__(self, other):
+        # Inequality operator
+        if isinstance(other, PrimalTangentPairs):
+            comparison = (self.primal != other.primal)
+        else:
+            comparison = (self.primal != other)
+        return PrimalTangentPairs(comparison, 0)
+
+    # Elementary mathematical operations
+
+    def __neg__(self):
+        # Negation operator
+        return PrimalTangentPairs(-self.primal,
+                                  -self.tangent)
+
+    def __add__(self, other):
+        # Addition
+        if isinstance(other, PrimalTangentPairs):
+            return PrimalTangentPairs(self.primal + other.primal,
+                                      self.tangent + other.tangent)
+        else:
+            return PrimalTangentPairs(self.primal + other,
+                                      self.tangent)
+
+    def __radd__(self, other):
+        # Addition, reversed
+        return self.__add__(other)
+
+    def __sub__(self, other):
+        # Subtraction
+        if isinstance(other, PrimalTangentPairs):
+            return PrimalTangentPairs(self.primal - other.primal,
+                                      self.tangent - other.tangent)
+        else:
+            return PrimalTangentPairs(self.primal - other,
+                                      self.tangent)
+
+    def __rsub__(self, other):
+        # Subtraction, reversed
+        return PrimalTangentPairs(other - self.primal,
+                                  -1 * self.tangent)
+
+    def __mul__(self, other):
+        # Multiplication
+        if isinstance(other, PrimalTangentPairs):
+            return PrimalTangentPairs(self.primal * other.primal,
+                                      self.tangent * other.primal + self.primal * other.tangent)
+        else:
+            return PrimalTangentPairs(self.primal * other,
+                                      self.tangent * other)
+
+    def __rmul__(self, other):
+        # Multiplication, reversed
+        return PrimalTangentPairs(self.primal * other,
+                                  self.tangent * other)
+
+    def __truediv__(self, other):
+        # Division
+        if isinstance(other, PrimalTangentPairs):
+            return PrimalTangentPairs(self.primal / other.primal,
+                                      (self.tangent * other.primal - self.primal * other.tangent) / (other.primal ** 2))
+        else:
+            return PrimalTangentPairs(self.primal / other,
+                                      self.tangent / other)
+
+    def __rtruediv__(self, other):
+        # Division, reversed
+        return PrimalTangentPairs(other / self.primal,
+                                  (0 * self.primal - self.tangent * other) / (self.primal ** 2))
+
+    def __pow__(self, other):
+        # Power
+        if isinstance(other, PrimalTangentPairs):
+            fgx = other.primal * self.primal ** (other.primal - 1) * self.tangent
+            gx = self.primal ** other.primal * np.log(self.primal) * other.tangent
+            return PrimalTangentPairs(self.primal ** other.primal,
+                                      fgx + gx)
+        else:
+            return PrimalTangentPairs(self.primal ** other,
+                                      other * self.primal ** (other - 1) * self.tangent)
+
+    def __rpow__(self, other):
+        # Power, reversed
+        return PrimalTangentPairs(other ** self.primal,
+                                  other ** self.primal * np.log(other))
+
+    # Operators that sometimes have undefined derivatives
+
+    def __abs__(self):
+        # Absolute value
+        if self.primal > 0:
+            return PrimalTangentPairs(1 * self.primal, self.tangent)
+        elif self.primal < 0:
+            return PrimalTangentPairs(-1 * self.primal, -1 * self.tangent)
+        else:
+            return PrimalTangentPairs(0 * self.primal, np.nan * self.tangent)
+
+    def __floor__(self):
+        # Floor
+        if self.primal % 1 == 0:
+            return PrimalTangentPairs(np.floor(self.primal), np.nan * self.tangent)
+        else:
+            return PrimalTangentPairs(np.floor(self.primal), 0 * self.tangent)
+
+    def __ceil__(self):
+        # Ceiling
+        if self.primal % 1 == 0:
+            return PrimalTangentPairs(np.ceil(self.primal), np.nan * self.tangent)
+        else:
+            return PrimalTangentPairs(np.ceil(self.primal), 0 * self.tangent)
+
+    # NumPy logarithm and exponent operators
+
+    def exp(self):
+        # Exponentiate
+        return PrimalTangentPairs(np.exp(self.primal),
+                                  np.exp(self.primal) * self.tangent)
+
+    def log(self):
+        # Logarithm
+        return PrimalTangentPairs(np.log(self.primal),
+                                  self.tangent / self.primal)
+
+    # NumPy trigonometry operators
+
+    def sin(self):
+        # Sine function
+        return PrimalTangentPairs(np.sin(self.primal),
+                                  np.cos(self.primal) * self.tangent)
+
+    def cos(self):
+        # Cosine function
+        return PrimalTangentPairs(np.cos(self.primal),
+                                  -np.sin(self.primal) * self.tangent)
+
+    def tan(self):
+        # Tangent function
+        return PrimalTangentPairs(np.tan(self.primal),
+                                  self.tangent / np.cos(self.primal))
+
+    def sinh(self):
+        # Hyperbolic sine function
+        return PrimalTangentPairs(np.sinh(self.primal),
+                                  np.cosh(self.primal) * self.tangent)
+
+    def cosh(self):
+        # Hyperbolic cosine function
+        return PrimalTangentPairs(np.cosh(self.primal),
+                                  np.sinh(self.primal) * self.tangent)
+
+    def tanh(self):
+        # Hyperbolic tangent function
+        return PrimalTangentPairs(np.tanh(self.primal),
+                                  (1 / (np.cosh(self.primal) ** 2)) * self.tangent)
+
+    # TODO add arc trig functions
+

From 07daa67aa5a6351f1e878001d01de5013bc6591b Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Tue, 4 Jul 2023 14:52:54 -0400
Subject: [PATCH 02/53] adding missing trig functions

---
 delicatessen/derivative.py | 55 ++++++++++++++++++++++++++++++++++----
 1 file changed, 50 insertions(+), 5 deletions(-)

diff --git a/delicatessen/derivative.py b/delicatessen/derivative.py
index 1d14758..b86ad82 100644
--- a/delicatessen/derivative.py
+++ b/delicatessen/derivative.py
@@ -279,10 +279,27 @@ def exp(self):
                                   np.exp(self.primal) * self.tangent)
 
     def log(self):
-        # Logarithm
+        # Logarithm base-e
         return PrimalTangentPairs(np.log(self.primal),
                                   self.tangent / self.primal)
 
+    def log2(self):
+        # Logarithm base-2
+        pair = self.log()
+        return PrimalTangentPairs(pair.primal / np.log(2),
+                                  pair.tangent / np.log(2))
+
+    def log10(self):
+        # Logarithm base-10
+        pair = self.log()
+        return PrimalTangentPairs(pair.primal / np.log(10),
+                                  pair.tangent / np.log(10))
+
+    def sqrt(self):
+        # Square root
+        return PrimalTangentPairs(np.sqrt(self.primal),
+                                  self.tangent / (2 * np.sqrt(self.primal)))
+
     # NumPy trigonometry operators
 
     def sin(self):
@@ -298,7 +315,22 @@ def cos(self):
     def tan(self):
         # Tangent function
         return PrimalTangentPairs(np.tan(self.primal),
-                                  self.tangent / np.cos(self.primal))
+                                  self.tangent / np.cos(self.primal)**2)
+
+    def arcsin(self):
+        # Inverse sine function
+        return PrimalTangentPairs(np.arcsin(self.primal),
+                                  self.tangent / np.sqrt(1 - self.primal**2))
+
+    def arccos(self):
+        # Inverse cosine function
+        return PrimalTangentPairs(np.arccos(self.primal),
+                                  -1 * self.tangent / np.sqrt(1 - self.primal**2))
+
+    def arctan(self):
+        # Inverse tangent function
+        return PrimalTangentPairs(np.arctan(self.primal),
+                                  self.tangent / (1 + self.primal**2))
 
     def sinh(self):
         # Hyperbolic sine function
@@ -313,7 +345,20 @@ def cosh(self):
     def tanh(self):
         # Hyperbolic tangent function
         return PrimalTangentPairs(np.tanh(self.primal),
-                                  (1 / (np.cosh(self.primal) ** 2)) * self.tangent)
-
-    # TODO add arc trig functions
+                                  self.tangent / (np.cosh(self.primal) ** 2))
+
+    def arcsinh(self):
+        # Inverse hyperbolic sine function
+        return PrimalTangentPairs(np.arcsinh(self.primal),
+                                  self.tangent / np.sqrt(self.primal**2 + 1))
+
+    def arccosh(self):
+        # Inverse hyperbolic cosine function
+        return PrimalTangentPairs(np.arccosh(self.primal),
+                                  self.tangent / np.sqrt(self.primal**2 - 1))
+
+    def arctanh(self):
+        # Inverse hyperbolic tangent function
+        return PrimalTangentPairs(np.arctanh(self.primal),
+                                  self.tangent / (1 - self.primal**2))
 

From 6f5d8ee7af24e6a33a58428cf148a4cb087515aa Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Tue, 4 Jul 2023 14:53:16 -0400
Subject: [PATCH 03/53] initial test suite for autodiff

---
 tests/test_autodiff_deriv.py | 263 +++++++++++++++++++++++++++++++++++
 1 file changed, 263 insertions(+)
 create mode 100644 tests/test_autodiff_deriv.py

diff --git a/tests/test_autodiff_deriv.py b/tests/test_autodiff_deriv.py
new file mode 100644
index 0000000..3f2a54d
--- /dev/null
+++ b/tests/test_autodiff_deriv.py
@@ -0,0 +1,263 @@
+import pytest
+import numpy as np
+import numpy.testing as npt
+from scipy.optimize import approx_fprime
+
+from delicatessen.utilities import inverse_logit, identity
+from delicatessen.derivative import auto_differentiation
+
+
+class TestEstimatingEquationsBase:
+
+    def test_compare_elementary_operators(self):
+        # Defining the functions to check
+        def f(x):
+            return [10,
+                    10 - 5 + 32*5 - 6**2,
+                    5 + x[0] + x[1] - x[2] - x[3],
+                    -32 + x[0]*x[2] + x[1]*x[3],
+                    x[1]**2 + x[0] + x[3] - 30,
+                    -32 + x[0]**x[2] + x[1]**x[3],
+                    (x[0] + x[1])**(x[2] + x[3]) + 6,
+                    5*x[1]**2 + (x[2]**2)*5,
+                    ]
+
+        # Points to Evaluate at
+        xinput = [0.5, 1.9, -2.3, 2]
+
+        # Evaluating the derivatives at the points
+        dx_exact = auto_differentiation(xinput, f)
+        dx_approx = approx_fprime(xinput, f, epsilon=1e-9)
+
+        # Checking
+        npt.assert_allclose(dx_approx, dx_exact, atol=1e-5)
+
+    def test_compare_equality1_operators(self):
+        # Defining the functions to check
+        def f(x):
+            return [(x[0] <= 0.1)*x[0] + x[1]**x[2],
+                    (x[0] < 0.1)*x[0] + x[1]**x[2],
+                    (x[0] >= 0.1) * x[0] + x[1] ** x[2],
+                    (x[0] > 0.1) * x[0] + x[1] ** x[2],
+                    (x[0] >= 5.0)*x[0] + x[1]**x[2],
+                    (x[0] > 5.0)*x[0] + x[1]**x[2],
+                    (x[0] <= 5.0)*x[0] + x[1]**x[2],
+                    (x[0] < 5.0)*x[0] + x[1]**x[2],
+                    (x[0] <= 5.1)*(x[0] <= 7.0)*(x[0] ** 2.5)*(x[0] + 3)**0.5 + 27*x[0]**3,
+                    (x[0] < 5.1) * (x[0] + x[1] ** 2) ** 3,
+                    ]
+
+        # Points to Evaluate at
+        xinput = [0.5, 1.9, -2.3, 2]
+
+        # Evaluating the derivatives at the points
+        dx_exact = auto_differentiation(xinput, f)
+        dx_approx = approx_fprime(xinput, f, epsilon=1e-9)
+
+        # Checking
+        npt.assert_allclose(dx_approx, dx_exact, atol=1e-5)
+
+    def test_compare_equality2_operators(self):
+        # Defining the functions to check
+        def f(x):
+            return [(x[0] == 0.5) * (x[0] + x[1]**2),
+                    (x[0] != 0.5) * (x[0] + x[1]**2),
+                    (x[0] == 5.5) * (x[0] + x[1]**2),
+                    (x[0] != 5.5) * (x[0] + x[1]**2),
+                    ]
+
+        # Points to Evaluate at
+        xinput = [0.5, 1.9, -2.3, 2]
+
+        # Truth by-hand (approx_fprime breaks here due to shifts)
+        dx_true = np.array([[1., 3.8, 0., 0., ],
+                            [0., 0., 0., 0., ],
+                            [0., 0., 0., 0., ],
+                            [1., 3.8, 0., 0., ]])
+
+        # Evaluating the derivatives at the points
+        dx_exact = auto_differentiation(xinput, f)
+
+        # Checking
+        npt.assert_allclose(dx_true, dx_exact, atol=1e-9)
+
+    def test_compare_numpy_operators(self):
+        # Defining the functions to check
+        def f(x):
+            return [np.sum(x),
+                    np.prod(x),
+                    np.dot(np.array([1, -2, 3]), x[0:3].T),
+                    np.power(x[1], 0) + np.power(x[2], 2),
+                    -np.power(x[0], 2) + 3*np.power(x[3], 2),
+                    np.sqrt(x[1]),
+
+                    np.abs(x[0] + x[2]),
+                    np.abs(x[0] + (x[1] + x[0])**2),
+                    np.sign(x[2]),
+                    np.sign(x[2])*x[0] + x[1]**x[2],
+
+                    5 + np.exp(1) + x[1] - x[2],
+                    np.exp(x[0] - x[1]) + 1.5*x[2]**3,
+                    5 + np.exp(x[0] + x[1]) + x[2] + 5 * x[3],
+
+                    np.log(x[0] + x[1]) + x[3]**1.5,
+                    np.log2(x[1]) + np.log10(x[3]) - x[2]**2,
+                    ]
+
+        # Points to Evaluate at
+        xinput = [0.5, 1.9, -2.3, 2]
+
+        # Evaluating the derivatives at the points
+        dx_exact = auto_differentiation(xinput, f)
+        dx_approx = approx_fprime(xinput, f, epsilon=1e-9)
+
+        # Checking
+        npt.assert_allclose(dx_approx, dx_exact, atol=1e-5)
+
+    def test_compare_numpy_trig(self):
+        # Defining the functions to check
+        def f(x):
+            return [np.sin(x[0]) + x[0]**2,
+                    np.cos(x[1]**2) + x[2],
+                    np.tan(x[2])**2 - 5*x[0]*x[1]**2,
+                    np.arcsin(x[0]) - 2*np.arccos(x[1]) + np.arctan(0.1*x[2]) + x[3]**3,
+                    np.arctan(x[0] + np.arcsin(0.5*x[1] + 0.1*x[2])),
+                    np.sinh(1.1*x[0]) + np.cosh(0.5*x[1]) + np.tanh(0.9*x[2] + x[3]),
+                    np.arcsinh(x[0]) - 2*np.arccosh(x[1]) + np.arctanh(0.1*x[2]) + x[3]**3,
+                    ]
+
+        # Points to Evaluate at
+        xinput = [0.5, 1.9, -2.3, 2]
+
+        # Evaluating the derivatives at the points
+        dx_exact = auto_differentiation(xinput, f)
+        dx_approx = approx_fprime(xinput, f, epsilon=1e-9)
+
+        # Checking
+        npt.assert_allclose(dx_approx, dx_exact, atol=1e-5)
+
+    def test_compare_numpy_comparisons(self):
+        # Defining the functions to check
+        def f(x):
+            return [np.where(x[0] <= 0.1, 1, 0),
+                    np.where(x[0] <= 5.1, 1, 0),
+                    np.where(x[0] <= 5.1, 1, 0) * x[1],
+                    np.where(x[0] <= 0.1, 1, 0) * x[0],
+                    np.where(x[0] <= 5.1, 1, 0) * (x[0] ** 2.5),
+                    np.where(x[0] <= 5.1, x[0] ** 2.5, 0),
+                    np.where(x[0] < 5.1, 1, 0)*(x[3]**1.5),
+                    np.where(x[0] < 5.1, x[3] ** 1.5, 0),
+                    ]
+
+        # Points to Evaluate at
+        xinput = [0.5, 1.9, -2.3, 2]
+
+        # Evaluating the derivatives at the points
+        dx_exact = auto_differentiation(xinput, f)
+        dx_approx = approx_fprime(xinput, f, epsilon=1e-9)
+
+        # Checking
+        npt.assert_allclose(dx_approx, dx_exact, atol=1e-5)
+
+    def test_compare_numpy_truncate(self):
+        # Defining the functions to check
+        def f(x):
+            return [np.floor(x[0]),
+                    1 + np.floor(x[0] + x[1]),
+                    np.floor(x[3] - 0.1),
+                    np.ceil(x[1]),
+                    np.clip(x[0], -10, 10),
+                    np.clip(x[0], 8, 10),
+                    np.clip(x[0], -10, -8),
+                    ]
+
+        # Points to Evaluate at
+        xinput = [0.5, 1.9, -2.3, 2]
+
+        # Evaluating the derivatives at the points
+        dx_exact = auto_differentiation(xinput, f)
+        dx_approx = approx_fprime(xinput, f, epsilon=1e-9)
+
+        # Checking
+        npt.assert_allclose(dx_approx, dx_exact, atol=1e-5)
+
+    def test_numpy_hard_comparisons(self):
+        # Defining the functions to check
+        def f(x):
+            return [x[1] ** 2 + 1 * np.where(x[2] == -2.3, 1, 0) * (x[0] + x[3]),  # Equality causes bad approx
+                    x[1] ** 2 + 1 * np.where(x[2] == -2.3, x[0] + x[3], 0),        # Equality causes bad approx
+                    ]
+
+        # Points to Evaluate at
+        xinput = [0.5, 1.9, -2.3, 2]
+
+        # True Derivatives
+        dx_true = np.array([[1., 3.8, 0., 1.],
+                            [1., 3.8, 0., 1.],
+                            ])
+
+        # Evaluating the derivatives at the points
+        dx_exact = auto_differentiation(xinput, f)
+
+        # Checking
+        npt.assert_allclose(dx_true, dx_exact, atol=1e-5)
+
+    def test_compare_deli_utilities(self):
+        # Defining the functions to check
+        def f(x):
+            return [inverse_logit(-0.75 + 0.5*x[0] - 0.78*x[1] + 1.45*x[2] + 1.2*x[3]),
+                    identity(-0.75 + 0.5*x[0] - 0.78*x[1] + 1.45*x[2]) + 1.2*x[3], ]
+
+        # Points to Evaluate at
+        xinput = [0.5, 1.9, -2.3, 3]
+
+        # Evaluating the derivatives at the points
+        dx_exact = auto_differentiation(xinput, f)
+        dx_approx = approx_fprime(xinput, f, epsilon=1e-9)
+
+        # Checking
+        npt.assert_allclose(dx_approx, dx_exact, atol=1e-7)
+
+    def test_large_gradient(self):
+        # Function to evaluate (with 500 parameters)
+        def f(x):
+            x_init = 1
+            storage = []
+            for xi in x:
+                y = np.sin(x_init - xi**2 - 0.01*xi)
+                storage.append(y)
+                x_init = xi
+            return storage
+
+        # Points to Evaluate at
+        xinput = np.linspace(-10, 10, 500)
+
+        # Evaluating the derivatives at the points
+        dx_exact = auto_differentiation(xinput, f)
+        dx_approx = approx_fprime(xinput, f, epsilon=1e-9)
+
+        # Checking
+        npt.assert_allclose(dx_approx, dx_exact, atol=1e-4)
+
+    def test_undefined_derivs(self):
+        # Defining the functions to check
+        def f(x):
+            return [np.floor(x[3]) + x[0],                 # Floor is not defined if no remain
+                    np.ceil(x[3]) + x[1],                  # Ceil is not defined if no remain
+                    np.abs(x[0] - 1/x[3]) + x[2],          # Absolute value not defined at 0
+                    ]
+
+        # Points to Evaluate at
+        xinput = [0.5, 1.9, -2.3, 2]
+
+        # True Derivatives
+        dx_true = np.array([[np.nan, np.nan, np.nan, np.nan],
+                            [np.nan, np.nan, np.nan, np.nan],
+                            [np.nan, np.nan, np.nan, np.nan],
+                            ])
+
+        # Evaluating the derivatives at the points
+        dx_exact = auto_differentiation(xinput, f)
+
+        # Checking
+        npt.assert_allclose(dx_true, dx_exact, atol=1e-5)

From 3b96939497dfa6a51a45c5cd73436337f45b81d8 Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Tue, 4 Jul 2023 14:53:47 -0400
Subject: [PATCH 04/53] adding in deriv method

---
 delicatessen/mestimation.py | 44 +++++++++++++++++++++++++------------
 1 file changed, 30 insertions(+), 14 deletions(-)

diff --git a/delicatessen/mestimation.py b/delicatessen/mestimation.py
index 1436fb4..22b2d6e 100644
--- a/delicatessen/mestimation.py
+++ b/delicatessen/mestimation.py
@@ -4,6 +4,8 @@
 from scipy.optimize import approx_fprime
 from scipy.stats import norm
 
+from delicatessen.derivative import auto_differentiation
+
 
 class MEstimator:
     r"""M-Estimator for stacked estimating equations.
@@ -172,7 +174,7 @@ def __init__(self, stacked_equations, init=None, subset=None):
         self.variance = None              # Covariance matrix for theta values (calculated later)
         self.asymptotic_variance = None   # Asymptotic covariance matrix for theta values (calculated later)
 
-    def estimate(self, solver='newton', maxiter=1000, tolerance=1e-9, dx=1e-9, order=3, allow_pinv=True):
+    def estimate(self, solver='newton', maxiter=1000, tolerance=1e-9, deriv_method='approx', dx=1e-9, allow_pinv=True):
         """Function to carry out the point and variance estimation of theta. After this procedure, the point estimates
         (in ``theta``) and the covariance matrix (in ``variance``) can be extracted.
 
@@ -193,13 +195,14 @@ def estimate(self, solver='newton', maxiter=1000, tolerance=1e-9, dx=1e-9, order
         tolerance : float, optional
             Maximum tolerance for errors in the root finding. This argument is passed ``scipy.optimize`` via the
             ``tol`` parameter. Default is 1e-9.
+        deriv_method : str, optional
+            Method to compute the derivative of the estimating equations for the bread matrix. Options include numerical
+            approximation with the central difference method (``'approx'``) and forward-mode automatic differentiation
+            (``'exact'``). Default is numerical approximation.
         dx : float, optional
             Spacing to use to numerically approximate the partial derivatives of the bread matrix. Default is 1e-9. It
             is generally not recommended to have a large ``dx``, since some large values can poorly approximate
             derivatives.
-        order : int, optional
-            Number of points to use to numerically approximate the partial derivative (must be an odd number). Default
-            is 3.
         allow_pinv : bool, optional
             The default is ``True`` which uses ``numpy.linalg.pinv`` to find the inverse (or pseudo-inverse if matrix is
             non-invertible) for the bread. This default option is more robust to the possible matrices. If you want
@@ -290,6 +293,7 @@ def estimate(self, solver='newton', maxiter=1000, tolerance=1e-9, dx=1e-9, order
         # STEP 2: calculating Variance
         # STEP 2.1: baking the Bread
         self.bread = self._bread_matrix_(theta=self.theta,                           # Provide theta-hat
+                                         method=deriv_method,                        # Method to use
                                          dx=dx) / self.n_obs                         # Derivative approximation value
 
         # STEP 2.2: slicing the meat
@@ -508,7 +512,7 @@ def _solve_coefficients_(stacked_equations, init, method, maxiter, tolerance):
         # Return optimized theta array
         return psi
 
-    def _bread_matrix_(self, theta, dx):
+    def _bread_matrix_(self, theta, method, dx):
         """Evaluate the bread matrix by taking all partial derivatives of the thetas in the estimating equation.
 
         Parameters
@@ -525,14 +529,26 @@ def _bread_matrix_(self, theta, dx):
         # Check how many values of theta there is
         val_range = len(theta)
 
-        # Calculating the bread
-        if val_range == 1:                                       # When only a single theta is present
-            d = derivative(self._mestimation_answer_no_subset_,  # ... approximate the derivative
-                           theta, dx=dx)                         # ... at the solved theta (input)
-            bread_matrix = np.array([[d, ], ])                   # ... return as 1-by-1 array object for inversion
-        else:                                                    # Otherwise approximate the partial derivatives
-            bread_matrix = approx_fprime(xk=theta,               # ... use built-in jacobian functionality of SciPy
-                                         f=self._mestimation_answer_no_subset_,
-                                         epsilon=dx)             # ... order option removed in v1.0 since not in SciPy
+        # Calculating the bread via numerical approximation
+        if method.lower() == "approx":
+            if val_range == 1:                                       # When only a single theta is present
+                d = derivative(self._mestimation_answer_no_subset_,  # ... approximate the derivative
+                               theta, dx=dx)                         # ... at the solved theta (input)
+                bread_matrix = np.array([[d, ], ])                   # ... return as 1-by-1 array object for inversion
+            else:                                                    # Otherwise approximate the partial derivatives
+                bread_matrix = approx_fprime(xk=theta,               # ... use built-in jacobian functionality of SciPy
+                                             f=self._mestimation_answer_no_subset_,
+                                             epsilon=dx)             # ... order option removed in v1.0
+
+        # Calculating the bread via automatic differentiation
+        elif method.lower() == "exact":
+            bread_matrix = auto_differentiation(xk=theta,
+                                                f=self._mestimation_answer_no_subset_)
+
+        # Error for invalid derivative option
+        else:
+            raise ValueError("Input for deriv_method was " + str(method)
+                             + ", but only 'approx' and 'exact' are available.")
+
         # Return bread (multiplied by negative 1 as in Stefanski & Boos)
         return -1 * bread_matrix

From 4cf8ba13faf63fbf6bf4eff57be7803016bd8185 Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Tue, 4 Jul 2023 14:54:01 -0400
Subject: [PATCH 05/53] fixing tests that had hold over dx option

---
 tests/test_MEstimation_examples.py | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/tests/test_MEstimation_examples.py b/tests/test_MEstimation_examples.py
index 19cdceb..500eb03 100644
--- a/tests/test_MEstimation_examples.py
+++ b/tests/test_MEstimation_examples.py
@@ -164,8 +164,7 @@ def psi_quantile(theta):
         estr = MEstimator(psi_quantile, init=[0., 0., 0., ])
         estr.estimate(solver='hybr',
                       tolerance=1e-4,
-                      dx=1,
-                      order=9)
+                      dx=1)
 
         # Verify using built-in equations
         mest25 = MEstimator(lambda theta: ee_percentile(theta=theta, y=y, q=0.25), init=[-0.1, ])
@@ -173,16 +172,13 @@ def psi_quantile(theta):
         mest75 = MEstimator(lambda theta: ee_percentile(theta=theta, y=y, q=0.75), init=[0.1, ])
         mest25.estimate(solver='hybr',
                         tolerance=1e-3,
-                        dx=1,
-                        order=15)
+                        dx=1)
         mest50.estimate(solver='hybr',
                         tolerance=1e-3,
-                        dx=1,
-                        order=15)
+                        dx=1)
         mest75.estimate(solver='hybr',
                         tolerance=1e-3,
-                        dx=1,
-                        order=15)
+                        dx=1)
 
         assert estr.theta[0] - mest25.theta[0] < 1e-2
         assert estr.theta[1] - mest50.theta[0] < 1e-2
@@ -206,8 +202,7 @@ def psi_deviation(theta):
         estr = MEstimator(psi_deviation, init=[0., 0., ])
         estr.estimate(solver='hybr',
                       tolerance=1e-3,
-                      dx=1,
-                      order=9)
+                      dx=1)
 
         assert estr.theta[0] - md < 0.1
         assert estr.theta[1] - np.median(y) < 0.1
@@ -216,7 +211,7 @@ def psi_deviation(theta):
         mest = MEstimator(lambda theta: ee_positive_mean_deviation(theta, y), init=[0., 0., ])
         mest.estimate(solver='hybr',
                       tolerance=1e-3,
-                      dx=1, order=9)
+                      dx=1)
 
         assert estr.theta[0] - mest.theta[0] < 0.1
         assert estr.theta[1] - mest.theta[1] < 0.1

From e1045e6709c782f23adc3dc9fcfcec6b07a5d11d Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Tue, 4 Jul 2023 14:54:38 -0400
Subject: [PATCH 06/53] fixing autodiff test class name

---
 tests/test_autodiff_deriv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_autodiff_deriv.py b/tests/test_autodiff_deriv.py
index 3f2a54d..432f174 100644
--- a/tests/test_autodiff_deriv.py
+++ b/tests/test_autodiff_deriv.py
@@ -7,7 +7,7 @@
 from delicatessen.derivative import auto_differentiation
 
 
-class TestEstimatingEquationsBase:
+class TestAutoDifferentiation:
 
     def test_compare_elementary_operators(self):
         # Defining the functions to check

From 5604214e02e4738381b338fadc11d6b87985fb6c Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Tue, 4 Jul 2023 15:32:27 -0400
Subject: [PATCH 07/53] fix autodiff to handle 1param estimating functions

---
 tests/test_autodiff_deriv.py | 60 ++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/tests/test_autodiff_deriv.py b/tests/test_autodiff_deriv.py
index 432f174..41018fc 100644
--- a/tests/test_autodiff_deriv.py
+++ b/tests/test_autodiff_deriv.py
@@ -5,6 +5,8 @@
 
 from delicatessen.utilities import inverse_logit, identity
 from delicatessen.derivative import auto_differentiation
+from delicatessen import MEstimator
+from delicatessen.estimating_equations import ee_mean, ee_mean_variance
 
 
 class TestAutoDifferentiation:
@@ -32,6 +34,21 @@ def f(x):
         # Checking
         npt.assert_allclose(dx_approx, dx_exact, atol=1e-5)
 
+    def test_single_evaluation(self):
+        def f(x):
+            return -32 + 4*x - 10*x**2
+
+        # Points to Evaluate at
+        xinput = [2.754, ]
+
+        # Evaluating the derivatives at the points
+        dx_exact = auto_differentiation(xinput, f)
+        dx_approx = approx_fprime(xinput, f, epsilon=1e-9)
+
+        # Checking
+        npt.assert_allclose(dx_approx, dx_exact, atol=1e-5)
+
+
     def test_compare_equality1_operators(self):
         # Defining the functions to check
         def f(x):
@@ -261,3 +278,46 @@ def f(x):
 
         # Checking
         npt.assert_allclose(dx_true, dx_exact, atol=1e-5)
+
+
+class TestSandwichAutoDiff:
+
+    def test_exact_bread_mean(self):
+        # Data set
+        y = np.array([5, 1, 2, 4, 2, 4, 5, 7, 11, 1, 6, 3, 4, 6])
+
+        def psi(theta):
+            return y - theta
+
+        mestr = MEstimator(psi, init=[0, ])
+        mestr.estimate(deriv_method='exact')
+
+        # Checking variance estimates
+        npt.assert_allclose(mestr.asymptotic_variance,
+                            np.var(y, ddof=0),
+                            atol=1e-6)
+
+    def test_exact_bread_mean_var(self):
+        # Data set
+        y = np.array([5, 1, 2, 4, 2, 4, 5, 7, 11, 1, 6, 3, 4, 6])
+
+        def psi(theta):
+            return ee_mean_variance(theta=theta, y=y)
+
+        mestr = MEstimator(psi, init=[0, 1, ])
+        mestr.estimate(deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+        mestr.estimate(deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=1e-5)

From 24a02196b7021bbebf6cdcf65e2b64a1497a0b8f Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Tue, 4 Jul 2023 15:32:45 -0400
Subject: [PATCH 08/53] fixing autodiff to handle 1param efs

---
 delicatessen/derivative.py | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/delicatessen/derivative.py b/delicatessen/derivative.py
index b86ad82..3646dbb 100644
--- a/delicatessen/derivative.py
+++ b/delicatessen/derivative.py
@@ -50,10 +50,16 @@ def f_deny_bool(function, x):
         By adding zero, we ensure that the final operation applied is an addition, and thus an float is returned
         after the boolean is evaluated. Essentially, we deny the call to ``__bool__`` being the last operation.
         """
-        eval = function(x)                 # Short-name for function
-        eval_no_bool_end = []              # Empty list for storage
-        for e in eval:                     # For each evaluation in the function
-            eval_no_bool_end.append(e+0)   # ... adding zero in a loop
+        evalued = function(x)              # Short-name for evaluated function
+
+        # Handling whether a single input (non-tuple like SciPy) is provided
+        if isinstance(evalued, PrimalTangentPairs):     # If not a tuple, gives back the Pair object
+            eval_no_bool_end = evalued + 0              # ... then just add zero in this case
+        else:                                           # Otherwise,
+            eval_no_bool_end = []                       # ... empty list for storage
+            for e in evalued:                           # ... for each evaluation in the function
+                eval_no_bool_end.append(e+0)            # ... adding zero in a loop
+
         return eval_no_bool_end            # Return the evaluation with the final addition
 
     # Meta-information about function and inputs
@@ -73,11 +79,14 @@ def f_deny_bool(function, x):
 
     # Processing function output into gradient value or matrix
     evaluated_gradient = []                                            # List storage for the computed gradient
-    for pair in evaluated_pair:                                        # For each evaluated primal,tangent pair
-        if isinstance(pair, PrimalTangentPairs):                       # ... if that pair is the key type
-            evaluated_gradient.append(pair.tangent)                    # ... then give back the derivative
-        else:                                                          # ... otherwise row has no xk operations
-            evaluated_gradient.append([0 for j in range(xshape)])      # ... so derivative is always zero
+    if isinstance(evaluated_pair, PrimalTangentPairs):                 # Handle case where only single input
+        evaluated_gradient.append([evaluated_pair.tangent, ])          # ... evaluate tangent and put list in list
+    else:                                                              # In all other cases
+        for pair in evaluated_pair:                                    # ... for each evaluated primal, tangent pair
+            if isinstance(pair, PrimalTangentPairs):                   # ... if that pair is the key type
+                evaluated_gradient.append(pair.tangent)                # ... then give back the derivative
+            else:                                                      # ... otherwise row has no xk operations
+                evaluated_gradient.append([0 for j in range(xshape)])  # ... so derivative is always zero
 
     # Return evaluated gradient as a NumPy array
     if len(evaluated_gradient) == 1:                                   # If only consists of 1 item

From c5ad43315a190f0d8ff34ac2b58e57bb771d81b0 Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Tue, 4 Jul 2023 15:38:39 -0400
Subject: [PATCH 09/53] autodiff tests for selected ee

---
 tests/test_autodiff_deriv.py | 47 +++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/tests/test_autodiff_deriv.py b/tests/test_autodiff_deriv.py
index 41018fc..097c259 100644
--- a/tests/test_autodiff_deriv.py
+++ b/tests/test_autodiff_deriv.py
@@ -1,12 +1,16 @@
 import pytest
 import numpy as np
 import numpy.testing as npt
+import pandas as pd
+from scipy.stats import logistic
 from scipy.optimize import approx_fprime
 
 from delicatessen.utilities import inverse_logit, identity
 from delicatessen.derivative import auto_differentiation
 from delicatessen import MEstimator
-from delicatessen.estimating_equations import ee_mean, ee_mean_variance
+from delicatessen.estimating_equations import ee_mean_variance, ee_regression
+
+np.random.seed(20230704)
 
 
 class TestAutoDifferentiation:
@@ -292,6 +296,11 @@ def psi(theta):
         mestr = MEstimator(psi, init=[0, ])
         mestr.estimate(deriv_method='exact')
 
+        # Checking bread estimates
+        npt.assert_allclose(mestr.bread,
+                            [[1]],
+                            atol=1e-6)
+
         # Checking variance estimates
         npt.assert_allclose(mestr.asymptotic_variance,
                             np.var(y, ddof=0),
@@ -321,3 +330,39 @@ def psi(theta):
         npt.assert_allclose(var_approx,
                             var_exact,
                             atol=1e-5)
+
+    def test_exact_bread_logit_reg(self):
+        n = 500
+        data = pd.DataFrame()
+        data['X'] = np.random.normal(size=n)
+        data['Z'] = np.random.normal(size=n)
+        data['Y'] = np.random.binomial(n=1, p=logistic.cdf(0.5 + 2*data['X'] - 1*data['Z']), size=n)
+        data['C'] = 1
+        data['w'] = np.random.uniform(1, 10, size=n)
+
+        def psi_regression(theta):
+            return ee_regression(theta,
+                                 X=data[['C', 'X', 'Z']], y=data['Y'],
+                                 model='logistic', weights=data['w'])
+
+        mestr = MEstimator(psi_regression, init=[0., 0., 0.])
+
+        # Auto-differentation
+        mestr.estimate(deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr.estimate(deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=1e-6)

From 1a08edb8c15e32375341f0852af9d6dc58456885 Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Tue, 4 Jul 2023 15:47:22 -0400
Subject: [PATCH 10/53] better init for wlogit to prevent test fails

---
 tests/test_estimating_equations.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_estimating_equations.py b/tests/test_estimating_equations.py
index 9cb4ba8..7a2b594 100644
--- a/tests/test_estimating_equations.py
+++ b/tests/test_estimating_equations.py
@@ -599,8 +599,8 @@ def psi_regression(theta):
                                  X=data[['C', 'X', 'Z']], y=data['Y'],
                                  model='logistic', weights=data['w'])
 
-        mestimator = MEstimator(psi_regression, init=[0., 0., 0.])
-        mestimator.estimate()
+        mestimator = MEstimator(psi_regression, init=[0., 2., -1.])
+        mestimator.estimate(solver='lm')
 
         # Comparing to statsmodels GLM (with robust covariance)
         glm = smf.glm("Y ~ X + Z", data, freq_weights=data['w'],

From 338017695d9e3fcd31af9e6a974a5c6d1cdcb47a Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Wed, 5 Jul 2023 11:31:13 -0400
Subject: [PATCH 11/53] fixing autodiff test to have better stability

---
 tests/test_autodiff_deriv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_autodiff_deriv.py b/tests/test_autodiff_deriv.py
index 097c259..843ccd3 100644
--- a/tests/test_autodiff_deriv.py
+++ b/tests/test_autodiff_deriv.py
@@ -345,7 +345,7 @@ def psi_regression(theta):
                                  X=data[['C', 'X', 'Z']], y=data['Y'],
                                  model='logistic', weights=data['w'])
 
-        mestr = MEstimator(psi_regression, init=[0., 0., 0.])
+        mestr = MEstimator(psi_regression, init=[0., 2., -1.])
 
         # Auto-differentation
         mestr.estimate(deriv_method='exact')

From 5f7b15cc1dd9ac502ff844abcabacba7d198687f Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Wed, 5 Jul 2023 11:45:25 -0400
Subject: [PATCH 12/53] fixing what Pairs __bool__ returns

---
 delicatessen/derivative.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/delicatessen/derivative.py b/delicatessen/derivative.py
index 3646dbb..5f55d07 100644
--- a/delicatessen/derivative.py
+++ b/delicatessen/derivative.py
@@ -125,7 +125,7 @@ def __bool__(self):
         # To get np.where working properly, I need to have the internal boolean function for this class return
         #   only the primal part. This is a bool object type, which is what is expected and has np.where operate as
         #   expected. This only seems to be called for np.where and not the other operators (they work directly).
-        return self.primal
+        return bool(self.primal)
 
     # Equality operators
 

From b25eaedacef070865c7f07ebf1ff43545e3b8745 Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Wed, 5 Jul 2023 11:48:18 -0400
Subject: [PATCH 13/53] adding more autodiff ee tests

---
 tests/test_autodiff_deriv.py | 120 +++++++++++++++++++++++++++++++++--
 1 file changed, 115 insertions(+), 5 deletions(-)

diff --git a/tests/test_autodiff_deriv.py b/tests/test_autodiff_deriv.py
index 843ccd3..d9d5862 100644
--- a/tests/test_autodiff_deriv.py
+++ b/tests/test_autodiff_deriv.py
@@ -8,7 +8,7 @@
 from delicatessen.utilities import inverse_logit, identity
 from delicatessen.derivative import auto_differentiation
 from delicatessen import MEstimator
-from delicatessen.estimating_equations import ee_mean_variance, ee_regression
+from delicatessen.estimating_equations import ee_mean_variance, ee_regression, ee_ridge_regression
 
 np.random.seed(20230704)
 
@@ -141,7 +141,7 @@ def f(x):
             return [np.sin(x[0]) + x[0]**2,
                     np.cos(x[1]**2) + x[2],
                     np.tan(x[2])**2 - 5*x[0]*x[1]**2,
-                    np.arcsin(x[0]) - 2*np.arccos(x[1]) + np.arctan(0.1*x[2]) + x[3]**3,
+                    np.arcsin(x[0]) - 2*np.arccos(0.1*x[1]) + np.arctan(0.1*x[2]) + x[3]**3,
                     np.arctan(x[0] + np.arcsin(0.5*x[1] + 0.1*x[2])),
                     np.sinh(1.1*x[0]) + np.cosh(0.5*x[1]) + np.tanh(0.9*x[2] + x[3]),
                     np.arcsinh(x[0]) - 2*np.arccosh(x[1]) + np.arctanh(0.1*x[2]) + x[3]**3,
@@ -304,7 +304,7 @@ def psi(theta):
         # Checking variance estimates
         npt.assert_allclose(mestr.asymptotic_variance,
                             np.var(y, ddof=0),
-                            atol=1e-6)
+                            atol=1e-7)
 
     def test_exact_bread_mean_var(self):
         # Data set
@@ -331,6 +331,42 @@ def psi(theta):
                             var_exact,
                             atol=1e-5)
 
+    def test_exact_bread_linear_reg(self):
+        n = 500
+        data = pd.DataFrame()
+        data['X'] = np.random.normal(size=n)
+        data['Z'] = np.random.normal(size=n)
+        data['Y'] = 0.5 + 2*data['X'] - 1*data['Z'] + np.random.normal(size=n)
+        data['C'] = 1
+        data['w'] = np.random.uniform(1, 10, size=n)
+
+        def psi(theta):
+            return ee_regression(theta,
+                                 X=data[['C', 'X', 'Z']], y=data['Y'],
+                                 model='linear', weights=data['w'])
+
+        mestr = MEstimator(psi, init=[0., 2., -1.])
+
+        # Auto-differentation
+        mestr.estimate(deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr.estimate(deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=1e-6)
+
     def test_exact_bread_logit_reg(self):
         n = 500
         data = pd.DataFrame()
@@ -340,12 +376,49 @@ def test_exact_bread_logit_reg(self):
         data['C'] = 1
         data['w'] = np.random.uniform(1, 10, size=n)
 
-        def psi_regression(theta):
+        def psi(theta):
             return ee_regression(theta,
                                  X=data[['C', 'X', 'Z']], y=data['Y'],
                                  model='logistic', weights=data['w'])
 
-        mestr = MEstimator(psi_regression, init=[0., 2., -1.])
+        mestr = MEstimator(psi, init=[0., 2., -1.])
+
+        # Auto-differentation
+        mestr.estimate(deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr.estimate(deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-7)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=1e-7)
+
+    def test_exact_bread_linear_ridge(self):
+        n = 500
+        data = pd.DataFrame()
+        data['X'] = np.random.normal(size=n)
+        data['Z'] = np.random.normal(size=n)
+        data['Y'] = 0.5 + 2*data['X'] - 1*data['Z'] + np.random.normal(size=n)
+        data['C'] = 1
+        data['w'] = np.random.uniform(1, 10, size=n)
+
+        def psi(theta):
+            return ee_ridge_regression(theta,
+                                       X=data[['C', 'X', 'Z']], y=data['Y'],
+                                       penalty=[0, 1, 2],
+                                       model='linear', weights=data['w'])
+
+        mestr = MEstimator(psi, init=[0., 2., -1.])
 
         # Auto-differentation
         mestr.estimate(deriv_method='exact')
@@ -366,3 +439,40 @@ def psi_regression(theta):
         npt.assert_allclose(var_approx,
                             var_exact,
                             atol=1e-6)
+
+    def test_exact_bread_logit_ridge(self):
+        n = 500
+        data = pd.DataFrame()
+        data['X'] = np.random.normal(size=n)
+        data['Z'] = np.random.normal(size=n)
+        data['Y'] = np.random.binomial(n=1, p=logistic.cdf(0.5 + 2*data['X'] - 1*data['Z']), size=n)
+        data['C'] = 1
+        data['w'] = np.random.uniform(1, 10, size=n)
+
+        def psi(theta):
+            return ee_ridge_regression(theta,
+                                       X=data[['C', 'X', 'Z']], y=data['Y'],
+                                       penalty=[0, 1, 2],
+                                       model='logistic', weights=data['w'])
+
+        mestr = MEstimator(psi, init=[0., 2., -1.])
+
+        # Auto-differentation
+        mestr.estimate(deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr.estimate(deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-7)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=1e-7)

From 6194465cceeb46430fe331da34b5c654e269bc3d Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Thu, 6 Jul 2023 12:38:51 -0400
Subject: [PATCH 14/53] adding test for robust mean, since uses different funcs

---
 tests/test_autodiff_deriv.py | 118 ++++++++++++++++++++++++++++++++++-
 1 file changed, 117 insertions(+), 1 deletion(-)

diff --git a/tests/test_autodiff_deriv.py b/tests/test_autodiff_deriv.py
index d9d5862..331897a 100644
--- a/tests/test_autodiff_deriv.py
+++ b/tests/test_autodiff_deriv.py
@@ -8,7 +8,7 @@
 from delicatessen.utilities import inverse_logit, identity
 from delicatessen.derivative import auto_differentiation
 from delicatessen import MEstimator
-from delicatessen.estimating_equations import ee_mean_variance, ee_regression, ee_ridge_regression
+from delicatessen.estimating_equations import ee_mean_variance, ee_mean_robust, ee_regression, ee_ridge_regression
 
 np.random.seed(20230704)
 
@@ -331,6 +331,122 @@ def psi(theta):
                             var_exact,
                             atol=1e-5)
 
+    def test_exact_bread_robust_mean(self):
+        n = 1000
+        y = np.random.standard_cauchy(size=n)
+
+        # Huber
+        def psi(theta):
+            return ee_mean_robust(theta,
+                                  y=y,
+                                  k=3, loss='huber')
+
+        mestr = MEstimator(psi, init=[0.])
+
+        # Auto-differentation
+        mestr.estimate(deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr.estimate(deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=1e-6)
+
+        # Andrew
+        def psi(theta):
+            return ee_mean_robust(theta,
+                                  y=y,
+                                  k=1.339, loss='andrew')
+
+        mestr = MEstimator(psi, init=[0.])
+
+        # Auto-differentation
+        mestr.estimate(deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr.estimate(deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=1e-6)
+
+        # Tukey
+        def psi(theta):
+            return ee_mean_robust(theta,
+                                  y=y,
+                                  k=4.685, loss='tukey')
+
+        mestr = MEstimator(psi, init=[0.])
+
+        # Auto-differentation
+        mestr.estimate(deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr.estimate(deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=1e-6)
+
+        # Hampel
+        def psi(theta):
+            return ee_mean_robust(theta,
+                                  y=y,
+                                  k=8, loss='hampel', lower=2, upper=4)
+
+        mestr = MEstimator(psi, init=[0.])
+
+        # Auto-differentation
+        mestr.estimate(deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr.estimate(deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=1e-6)
+
     def test_exact_bread_linear_reg(self):
         n = 500
         data = pd.DataFrame()

From c17019fa973e2f16c8ea1645fe7b1c1ac415e8c9 Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Thu, 6 Jul 2023 18:24:37 -0400
Subject: [PATCH 15/53] adjusting N for robust mean

---
 tests/test_autodiff_deriv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_autodiff_deriv.py b/tests/test_autodiff_deriv.py
index 331897a..bd9f16d 100644
--- a/tests/test_autodiff_deriv.py
+++ b/tests/test_autodiff_deriv.py
@@ -332,7 +332,7 @@ def psi(theta):
                             atol=1e-5)
 
     def test_exact_bread_robust_mean(self):
-        n = 1000
+        n = 500
         y = np.random.standard_cauchy(size=n)
 
         # Huber

From fd36acd4a111a98643864df7b1bdaefaeb2f6ffc Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Thu, 6 Jul 2023 18:51:21 -0400
Subject: [PATCH 16/53] improved documentation for derivative

---
 delicatessen/derivative.py | 82 ++++++++++++++++++++++++++++++--------
 1 file changed, 66 insertions(+), 16 deletions(-)

diff --git a/delicatessen/derivative.py b/delicatessen/derivative.py
index 5f55d07..b52bafc 100644
--- a/delicatessen/derivative.py
+++ b/delicatessen/derivative.py
@@ -2,15 +2,20 @@
 
 
 def auto_differentiation(xk, f):
-    """Forward mode automatic differentiation. Automatic differentiation offers a way to compute the derivative exactly,
-    rather than numerically approximated (unlike the central difference method). Automatic differentiation iteratively
-    applies the chain rule into order to evaluate the derivative.
+    """Forward-mode automatic differentiation. Automatic differentiation offers a way to compute the exact derivative,
+    rather than numerically approximated (i.e., the central difference method). Automatic differentiation iteratively
+    applies the chain rule through recursive calls to evaluate the derivative.
 
     Note
     ----
     This functionality is only intended for use behind the scenes in ``delicatessen``. I wrote this functionality to
     avoid additional dependencies.
 
+    This is accomplished by the ``PrimalTangentPairs`` class, which is a special data type in ``delicatessen`` that
+    stores pairs of the original evaluation and the corresponding derivative for a variety of different mathematical
+    operations. This is what allows for the exact derivative calculation. The ``auto_differentiation`` function is
+    a wrapper to access and use this class object as it is intended for derivative computations.
+
     Parameters
     ----------
     xk: ndarray, list, shape (n, )
@@ -20,15 +25,71 @@ def auto_differentiation(xk, f):
 
     Returns
     -------
-    ndarray
+    ndarray :
+        Corresponding array of the pairwise derivatives for all different input x values.
 
     Examples
     --------
+    Loading necessary functions and building a generic data set for estimation of the mean
+
+    >>> import numpy as np
+    >>> from delicatessen.derivative import auto_differentiation
+
+    To illustrate use, we will compute the derivative of the following function
+
+    .. math::
+
+        f(x) = x^2 - x^1 + sin(x + \sqrt{x})
+
+    >>> def f(x):
+    >>>     return x**2 - x + np.sin(x + np.sqrt(x))
+
+    If you work out the deriative by-hand, you will end up with the following
+
+    .. math::
+
+        2x - 1 + \left( \frac{1}{2 \sqrt{x}} + 1 \right) \cos(x + \sqrt{x})
+
+    Instead, we can use automatic differentiation to evaluate the derivative at a specific point. Here, we will
+    evaluate the derivative at :math:`x=1`
+
+    >>> dy = auto_differentiation(xk=[1, ], f=f)
+
+    which returns :math:`dy=0.3757795`. This is the same as if you plugged in :math:`x=1` into the previous equation.
+
+    Note
+    ----
+    If a derivative is not defined, then the function will return a ``NaN``.
+
+
+    The derivative of a function with multiple inputs and multiple outputs can also be evaluated. Consider the following
+    example with three inputs and two outputs
+
+    >>> def f(x):
+    >>>     return [x[0]**2 - x, np.sin(np.sqrt(x[1]) + x[2]) + x[2]*(x[1]**2)]
+
+    >>> dy = auto_differentiation(xk=[0.7, 1.2, -0.9], f=f)
+
+    which will return a 2-by-3 array of all the x-y pair derivatives. Here, the rows correspond to the output and the
+    columns correspond to the inputs.
 
     References
     ----------
-
+    Baydin AG, Pearlmutter BA, Radul AA, & Siskind JM. (2018). Automatic differentiation in machine learning: a survey.
+    *Journal of Marchine Learning Research*, 18, 1-43.
     """
+    # Meta-information about function and inputs
+    xshape = len(xk)                                                   # The number of inputs into the function
+
+    # Set up Dual objects for evaluating gradient of function
+    pairs_for_gradient = []                                            # Storage for the pairs to provide function
+    for i in range(xshape):                                            # For each of the inputs
+        partial = np.zeros_like(xk)                                    # ... generate array of that size of all zeroes
+        partial[i] = 1                                                 # ... replace 0 with 1 for current index
+        pairs_for_gradient.append(PrimalTangentPairs(xk[i], partial))  # ... then store as a primal,tangent pair
+    x_to_eval = np.asarray(pairs_for_gradient)                         # Convert from list to NumPy array
+
+    # Internal function to handle some specific exceptions when computing derivatives
     def f_deny_bool(function, x):
         """This internal function re-evaluates the input function with an additional operator. Namely zero is added
         to the function. This causes no difference in the primal or tangent pair. The purpose of this function is to
@@ -62,17 +123,6 @@ def f_deny_bool(function, x):
 
         return eval_no_bool_end            # Return the evaluation with the final addition
 
-    # Meta-information about function and inputs
-    xshape = len(xk)                                                   # The number of inputs into the function
-
-    # Set up Dual objects for evaluating gradient of function
-    pairs_for_gradient = []                                            # Storage for the pairs to provide function
-    for i in range(xshape):                                            # For each of the inputs
-        partial = np.zeros_like(xk)                                    # ... generate array of that size of all zeroes
-        partial[i] = 1                                                 # ... replace 0 with 1 for current index
-        pairs_for_gradient.append(PrimalTangentPairs(xk[i], partial))  # ... then store as a primal,tangent pair
-    x_to_eval = np.asarray(pairs_for_gradient)                         # Convert from list to NumPy array
-
     # Evaluating the function for the primal,tangent pairs
     evaluated_pair = f_deny_bool(function=f,                           # Evaluate the primal,tangent pair with function
                                  x=x_to_eval)                          # ... at the given values

From 4b8a0a35a5816b74f5e5bb314a4a13f321a4923e Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Thu, 6 Jul 2023 19:35:20 -0400
Subject: [PATCH 17/53] adding comments and fixing rpow to return array

---
 delicatessen/derivative.py | 217 +++++++++++++++++++------------------
 1 file changed, 113 insertions(+), 104 deletions(-)

diff --git a/delicatessen/derivative.py b/delicatessen/derivative.py
index b52bafc..99f3607 100644
--- a/delicatessen/derivative.py
+++ b/delicatessen/derivative.py
@@ -66,7 +66,7 @@ def auto_differentiation(xk, f):
     example with three inputs and two outputs
 
     >>> def f(x):
-    >>>     return [x[0]**2 - x, np.sin(np.sqrt(x[1]) + x[2]) + x[2]*(x[1]**2)]
+    >>>     return [x[0]**2 - x[1], np.sin(np.sqrt(x[1]) + x[2]) + x[2]*(x[1]**2)]
 
     >>> dy = auto_differentiation(xk=[0.7, 1.2, -0.9], f=f)
 
@@ -81,7 +81,7 @@ def auto_differentiation(xk, f):
     # Meta-information about function and inputs
     xshape = len(xk)                                                   # The number of inputs into the function
 
-    # Set up Dual objects for evaluating gradient of function
+    # Set up Pair objects for evaluating gradient of function
     pairs_for_gradient = []                                            # Storage for the pairs to provide function
     for i in range(xshape):                                            # For each of the inputs
         partial = np.zeros_like(xk)                                    # ... generate array of that size of all zeroes
@@ -136,7 +136,8 @@ def f_deny_bool(function, x):
             if isinstance(pair, PrimalTangentPairs):                   # ... if that pair is the key type
                 evaluated_gradient.append(pair.tangent)                # ... then give back the derivative
             else:                                                      # ... otherwise row has no xk operations
-                evaluated_gradient.append([0 for j in range(xshape)])  # ... so derivative is always zero
+                empty_array = np.array([0 for j in range(xshape)])     # ... create array of all zeroes
+                evaluated_gradient.append(empty_array)                 # ... so derivative is always zero
 
     # Return evaluated gradient as a NumPy array
     if len(evaluated_gradient) == 1:                                   # If only consists of 1 item
@@ -146,26 +147,34 @@ def f_deny_bool(function, x):
 
 
 class PrimalTangentPairs:
-    """Unique class object for automatic differentiation. This process divides the inputs into 'primal' and 'tangent'
-    pairs. Operations on the pairs are then recurvsively called.
+    """Unique class object for automatic differentiation. This class divides the inputs into 'primal' and 'tangent'
+    pairs. The derivative is computed via operations on the pairs, which are then recurvsively called by this class.
+    This process allows use to successively apply the chain rule.
 
+    Note
+    ----
+    This class only handles how the data is setup (i.e., it does not automatically compute a derivative. The function
+    for the derivative computation then takes this as input.
+
+
+    This data class is only meant to interact with ``auto_differentiation`` and should not be used elsewhere.
 
     Parameters
     ----------
     primal :
-        B
+        The x values for which the derivative computation is desired. Must be the same length as ``tangent``.
     tangent :
-        A
+        Indicator for the location at which the derivatives is desired. Must be the same length as ``primal``.
     """
     def __init__(self, primal, tangent):
-        # Processing of the inputs
+        # Processing of the inputs into the class, both initial and recursive calls
         if isinstance(primal, PrimalTangentPairs):    # If given a PrimalTangentPair input
             self.primal = primal.primal               # ... extract the primal element from input
         else:                                         # Else
             self.primal = primal                      # ... directly save as new primal
         self.tangent = tangent                        # Store the tangent
 
-    # Basic operators
+    # Basic operators for the class object
 
     def __str__(self):
         # Conversion to string just to in case it gets called somehow
@@ -177,187 +186,187 @@ def __bool__(self):
         #   expected. This only seems to be called for np.where and not the other operators (they work directly).
         return bool(self.primal)
 
-    # Equality operators
+    # Equality operators for the class
 
     def __le__(self, other):
         # Less than or equal to operator
-        if isinstance(other, PrimalTangentPairs):                  #
-            comparison = (self.primal <= other.primal)             #
-        else:                                                      #
-            comparison = (self.primal <= other)                    #
-        return PrimalTangentPairs(comparison, 0)                   #
+        if isinstance(other, PrimalTangentPairs):                  # If the input is a PrimalTangentPairs
+            comparison = (self.primal <= other.primal)             # ... compare the primals
+        else:                                                      # Otherwise
+            comparison = (self.primal <= other)                    # ... directly compare primal with other
+        return PrimalTangentPairs(comparison, 0)                   # Return evaluated equality
 
     def __lt__(self, other):
         # Less than operator
-        if isinstance(other, PrimalTangentPairs):
-            comparison = (self.primal < other.primal)
-        else:
-            comparison = (self.primal < other)
-        return PrimalTangentPairs(comparison, 0)
+        if isinstance(other, PrimalTangentPairs):                  # If the input is a PrimalTangentPairs
+            comparison = (self.primal < other.primal)              # ... compare the primals
+        else:                                                      # Otherwise
+            comparison = (self.primal < other)                     # ... directly compare primal with other
+        return PrimalTangentPairs(comparison, 0)                   # Return evaluated equality
 
     def __ge__(self, other):
         # Greater than or equal to operator
-        if isinstance(other, PrimalTangentPairs):
-            comparison = (self.primal >= other.primal)
-        else:
-            comparison = (self.primal >= other)
-        return PrimalTangentPairs(comparison, 0)
+        if isinstance(other, PrimalTangentPairs):                  # If the input is a PrimalTangentPairs
+            comparison = (self.primal >= other.primal)             # ... compare the primals
+        else:                                                      # Otherwise
+            comparison = (self.primal >= other)                    # ... directly compare primal with other
+        return PrimalTangentPairs(comparison, 0)                   # Return evaluated equality
 
     def __gt__(self, other):
         # Greater than operator
-        if isinstance(other, PrimalTangentPairs):
-            comparison = (self.primal > other.primal)
-        else:
-            comparison = (self.primal > other)
-        return PrimalTangentPairs(comparison, 0)
+        if isinstance(other, PrimalTangentPairs):                  # If the input is a PrimalTangentPairs
+            comparison = (self.primal > other.primal)              # ... compare the primals
+        else:                                                      # Otherwise
+            comparison = (self.primal > other)                     # ... directly compare primal with other
+        return PrimalTangentPairs(comparison, 0)                   # Return evaluated equality
 
     def __eq__(self, other):
         # Equality operator
-        if isinstance(other, PrimalTangentPairs):
-            comparison = (self.primal == other.primal)
-        else:
-            comparison = (self.primal == other)
-        return PrimalTangentPairs(comparison, 0)
+        if isinstance(other, PrimalTangentPairs):                  # If the input is a PrimalTangentPairs
+            comparison = (self.primal == other.primal)             # ... compare the primals
+        else:                                                      # Otherwise
+            comparison = (self.primal == other)                    # ... directly compare primal with other
+        return PrimalTangentPairs(comparison, 0)                   # Return evaluated equality
 
     def __ne__(self, other):
         # Inequality operator
-        if isinstance(other, PrimalTangentPairs):
-            comparison = (self.primal != other.primal)
-        else:
-            comparison = (self.primal != other)
-        return PrimalTangentPairs(comparison, 0)
+        if isinstance(other, PrimalTangentPairs):                  # If the input is a PrimalTangentPairs
+            comparison = (self.primal != other.primal)             # ... compare the primals
+        else:                                                      # Otherwise
+            comparison = (self.primal != other)                    # ... directly compare primal with other
+        return PrimalTangentPairs(comparison, 0)                   # Return evaluated equality
 
     # Elementary mathematical operations
 
     def __neg__(self):
         # Negation operator
-        return PrimalTangentPairs(-self.primal,
-                                  -self.tangent)
+        return PrimalTangentPairs(-self.primal,                    # Negation is simply negation
+                                  -self.tangent)                   # ... and same for derivative
 
     def __add__(self, other):
         # Addition
-        if isinstance(other, PrimalTangentPairs):
-            return PrimalTangentPairs(self.primal + other.primal,
-                                      self.tangent + other.tangent)
-        else:
-            return PrimalTangentPairs(self.primal + other,
-                                      self.tangent)
+        if isinstance(other, PrimalTangentPairs):                     # When adding PrimalTangentPairs
+            return PrimalTangentPairs(self.primal + other.primal,     # ... add primals together
+                                      self.tangent + other.tangent)   # ... and add tangents together
+        else:                                                         # Otherwise
+            return PrimalTangentPairs(self.primal + other,            # ... add the primal and other
+                                      self.tangent)                   # ... tangent does not change by constants
 
     def __radd__(self, other):
         # Addition, reversed
-        return self.__add__(other)
+        return self.__add__(other)                                    # Reversed addition (order matters for process)
 
     def __sub__(self, other):
         # Subtraction
-        if isinstance(other, PrimalTangentPairs):
-            return PrimalTangentPairs(self.primal - other.primal,
-                                      self.tangent - other.tangent)
-        else:
-            return PrimalTangentPairs(self.primal - other,
-                                      self.tangent)
+        if isinstance(other, PrimalTangentPairs):                     # When subtracting PrimalTangentPairs
+            return PrimalTangentPairs(self.primal - other.primal,     # ... minus primals together
+                                      self.tangent - other.tangent)   # ... and minus tangets together
+        else:                                                         # Otherwise
+            return PrimalTangentPairs(self.primal - other,            # ... minus primal and other
+                                      self.tangent)                   # ... tangent does not change by constants
 
     def __rsub__(self, other):
         # Subtraction, reversed
-        return PrimalTangentPairs(other - self.primal,
-                                  -1 * self.tangent)
+        return PrimalTangentPairs(other - self.primal,                # Subtracting in reverse order (lead constant)
+                                  -1 * self.tangent)                  # ... simply multiply tangent by -1
 
     def __mul__(self, other):
         # Multiplication
-        if isinstance(other, PrimalTangentPairs):
-            return PrimalTangentPairs(self.primal * other.primal,
+        if isinstance(other, PrimalTangentPairs):                     # When multiplying PrimalTangentPairs
+            return PrimalTangentPairs(self.primal * other.primal,     # ... multiply primals
                                       self.tangent * other.primal + self.primal * other.tangent)
-        else:
-            return PrimalTangentPairs(self.primal * other,
-                                      self.tangent * other)
+        else:                                                         # Otherwise
+            return PrimalTangentPairs(self.primal * other,            # ... multiply primal and constant
+                                      self.tangent * other)           # ... multiply primal and constant
 
     def __rmul__(self, other):
         # Multiplication, reversed
-        return PrimalTangentPairs(self.primal * other,
-                                  self.tangent * other)
+        return PrimalTangentPairs(self.primal * other,                # Reversed order can evaluate the same
+                                  self.tangent * other)               # ... for primal and tangent pairs
 
     def __truediv__(self, other):
         # Division
-        if isinstance(other, PrimalTangentPairs):
-            return PrimalTangentPairs(self.primal / other.primal,
+        if isinstance(other, PrimalTangentPairs):                     # When dividing PrimalTangentPairs
+            return PrimalTangentPairs(self.primal / other.primal,     # ... divide primals
                                       (self.tangent * other.primal - self.primal * other.tangent) / (other.primal ** 2))
-        else:
-            return PrimalTangentPairs(self.primal / other,
-                                      self.tangent / other)
+        else:                                                         # Otherwise
+            return PrimalTangentPairs(self.primal / other,            # ... divide primal and constant
+                                      self.tangent / other)           # ... divide tangent and constant
 
     def __rtruediv__(self, other):
         # Division, reversed
-        return PrimalTangentPairs(other / self.primal,
+        return PrimalTangentPairs(other / self.primal,                # Reversed form of division
                                   (0 * self.primal - self.tangent * other) / (self.primal ** 2))
 
     def __pow__(self, other):
         # Power
-        if isinstance(other, PrimalTangentPairs):
-            fgx = other.primal * self.primal ** (other.primal - 1) * self.tangent
-            gx = self.primal ** other.primal * np.log(self.primal) * other.tangent
-            return PrimalTangentPairs(self.primal ** other.primal,
-                                      fgx + gx)
+        if isinstance(other, PrimalTangentPairs):                                    # For PrimalTangentPairs to powers
+            fgx = other.primal * self.primal ** (other.primal - 1) * self.tangent    # ... initial piece of rule
+            gx = self.primal ** other.primal * np.log(self.primal) * other.tangent   # ... secondary piece of rule
+            return PrimalTangentPairs(self.primal ** other.primal,                   # ... raise primal to primal
+                                      fgx + gx)                                      # ... apply the power rule
         else:
-            return PrimalTangentPairs(self.primal ** other,
+            return PrimalTangentPairs(self.primal ** other,                          # Otherwise compute directly
                                       other * self.primal ** (other - 1) * self.tangent)
 
     def __rpow__(self, other):
         # Power, reversed
-        return PrimalTangentPairs(other ** self.primal,
-                                  other ** self.primal * np.log(other))
+        return PrimalTangentPairs(other ** self.primal,                                 # Constant to primal
+                                  self.tangent * other ** self.primal * np.log(other))  # ... then power rule
 
     # Operators that sometimes have undefined derivatives
 
     def __abs__(self):
         # Absolute value
-        if self.primal > 0:
-            return PrimalTangentPairs(1 * self.primal, self.tangent)
-        elif self.primal < 0:
-            return PrimalTangentPairs(-1 * self.primal, -1 * self.tangent)
-        else:
-            return PrimalTangentPairs(0 * self.primal, np.nan * self.tangent)
+        if self.primal > 0:                                                            # Absolute value for positive
+            return PrimalTangentPairs(1 * self.primal, self.tangent)                   # ... primal, tangent
+        elif self.primal < 0:                                                          # Absolute value for negative
+            return PrimalTangentPairs(-1 * self.primal, -1 * self.tangent)             # ... -primal, -tangent
+        else:                                                                          # Absolute value at zero
+            return PrimalTangentPairs(0 * self.primal, np.nan * self.tangent)          # ... zero, undefined
 
     def __floor__(self):
         # Floor
-        if self.primal % 1 == 0:
-            return PrimalTangentPairs(np.floor(self.primal), np.nan * self.tangent)
-        else:
-            return PrimalTangentPairs(np.floor(self.primal), 0 * self.tangent)
+        if self.primal % 1 == 0:                                                       # Floor with integers
+            return PrimalTangentPairs(np.floor(self.primal), np.nan * self.tangent)    # ... floor, undefined
+        else:                                                                          # Floor with remainder
+            return PrimalTangentPairs(np.floor(self.primal), 0 * self.tangent)         # ... floor, 0
 
     def __ceil__(self):
         # Ceiling
-        if self.primal % 1 == 0:
-            return PrimalTangentPairs(np.ceil(self.primal), np.nan * self.tangent)
-        else:
-            return PrimalTangentPairs(np.ceil(self.primal), 0 * self.tangent)
+        if self.primal % 1 == 0:                                                       # Ceil with integers
+            return PrimalTangentPairs(np.ceil(self.primal), np.nan * self.tangent)     # ... ceil, undefined
+        else:                                                                          # Ceil with remainder
+            return PrimalTangentPairs(np.ceil(self.primal), 0 * self.tangent)          # ... ceil, 0
 
     # NumPy logarithm and exponent operators
 
     def exp(self):
         # Exponentiate
-        return PrimalTangentPairs(np.exp(self.primal),
-                                  np.exp(self.primal) * self.tangent)
+        return PrimalTangentPairs(np.exp(self.primal),                    # Exponentiate primal
+                                  np.exp(self.primal) * self.tangent)     # ... exponential rule
 
     def log(self):
         # Logarithm base-e
-        return PrimalTangentPairs(np.log(self.primal),
-                                  self.tangent / self.primal)
+        return PrimalTangentPairs(np.log(self.primal),                    # Natural logarithm primal
+                                  self.tangent / self.primal)             # ... logarithm rule
 
     def log2(self):
         # Logarithm base-2
-        pair = self.log()
-        return PrimalTangentPairs(pair.primal / np.log(2),
-                                  pair.tangent / np.log(2))
+        pair = self.log()                                                 # Compute natural log
+        return PrimalTangentPairs(pair.primal / np.log(2),                # Apply logarithm rule for primal
+                                  pair.tangent / np.log(2))               # ... and logarithm rule for tangent
 
     def log10(self):
         # Logarithm base-10
-        pair = self.log()
-        return PrimalTangentPairs(pair.primal / np.log(10),
-                                  pair.tangent / np.log(10))
+        pair = self.log()                                                 # Compute natural log
+        return PrimalTangentPairs(pair.primal / np.log(10),               # Apply logarithm rule for primal
+                                  pair.tangent / np.log(10))              # ... and logarithm rule for tangent
 
     def sqrt(self):
         # Square root
-        return PrimalTangentPairs(np.sqrt(self.primal),
-                                  self.tangent / (2 * np.sqrt(self.primal)))
+        return PrimalTangentPairs(np.sqrt(self.primal),                        # Compute square root of primal
+                                  self.tangent / (2 * np.sqrt(self.primal)))   # ... power rule for tangent
 
     # NumPy trigonometry operators
 

From af09939155f81ef842f8a662082a7545799891f8 Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Thu, 6 Jul 2023 19:38:13 -0400
Subject: [PATCH 18/53] fixing backslash in docs

---
 delicatessen/derivative.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/delicatessen/derivative.py b/delicatessen/derivative.py
index 99f3607..82e1c34 100644
--- a/delicatessen/derivative.py
+++ b/delicatessen/derivative.py
@@ -2,7 +2,7 @@
 
 
 def auto_differentiation(xk, f):
-    """Forward-mode automatic differentiation. Automatic differentiation offers a way to compute the exact derivative,
+    r"""Forward-mode automatic differentiation. Automatic differentiation offers a way to compute the exact derivative,
     rather than numerically approximated (i.e., the central difference method). Automatic differentiation iteratively
     applies the chain rule through recursive calls to evaluate the derivative.
 

From 05779b8ad51aded706701dd83ed99bf4b4daefca Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Thu, 6 Jul 2023 19:38:27 -0400
Subject: [PATCH 19/53] adding better tests for power rule of autodiff

---
 tests/test_autodiff_deriv.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/test_autodiff_deriv.py b/tests/test_autodiff_deriv.py
index bd9f16d..9f57222 100644
--- a/tests/test_autodiff_deriv.py
+++ b/tests/test_autodiff_deriv.py
@@ -26,6 +26,10 @@ def f(x):
                     -32 + x[0]**x[2] + x[1]**x[3],
                     (x[0] + x[1])**(x[2] + x[3]) + 6,
                     5*x[1]**2 + (x[2]**2)*5,
+                    x[1] / 10 + (10/x[2])**2,
+                    x[0]**x[1],
+                    0.9**x[2],
+                    (x[3] + 0.9)**(x[1] * x[0] - 0.1),
                     ]
 
         # Points to Evaluate at
@@ -52,7 +56,6 @@ def f(x):
         # Checking
         npt.assert_allclose(dx_approx, dx_exact, atol=1e-5)
 
-
     def test_compare_equality1_operators(self):
         # Defining the functions to check
         def f(x):

From a0314beeb0eb4c85526608fac14bde669f5575ab Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Fri, 7 Jul 2023 07:33:10 -0400
Subject: [PATCH 20/53] adding docs for autodiff

---
 delicatessen/derivative.py                       | 16 ++++++++++------
 docs/Reference/Utilities.rst                     | 16 ++++++++++++++--
 ...icatessen.derivative.auto_differentiation.rst |  6 ++++++
 3 files changed, 30 insertions(+), 8 deletions(-)
 create mode 100644 docs/Reference/generated/delicatessen.derivative.auto_differentiation.rst

diff --git a/delicatessen/derivative.py b/delicatessen/derivative.py
index 82e1c34..2a8cba1 100644
--- a/delicatessen/derivative.py
+++ b/delicatessen/derivative.py
@@ -8,8 +8,9 @@ def auto_differentiation(xk, f):
 
     Note
     ----
-    This functionality is only intended for use behind the scenes in ``delicatessen``. I wrote this functionality to
-    avoid additional dependencies.
+    This functionality is only intended for use behind the scenes in ``delicatessen``. Automatic differentiation is
+    implemented from scratch to avoid additional dependencies.
+
 
     This is accomplished by the ``PrimalTangentPairs`` class, which is a special data type in ``delicatessen`` that
     stores pairs of the original evaluation and the corresponding derivative for a variety of different mathematical
@@ -30,7 +31,7 @@ def auto_differentiation(xk, f):
 
     Examples
     --------
-    Loading necessary functions and building a generic data set for estimation of the mean
+    Loading necessary functions
 
     >>> import numpy as np
     >>> from delicatessen.derivative import auto_differentiation
@@ -55,7 +56,7 @@ def auto_differentiation(xk, f):
 
     >>> dy = auto_differentiation(xk=[1, ], f=f)
 
-    which returns :math:`dy=0.3757795`. This is the same as if you plugged in :math:`x=1` into the previous equation.
+    which returns ``0.3757795``. This is the same as if you plugged in :math:`x=1` into the previous equation.
 
     Note
     ----
@@ -70,13 +71,16 @@ def auto_differentiation(xk, f):
 
     >>> dy = auto_differentiation(xk=[0.7, 1.2, -0.9], f=f)
 
-    which will return a 2-by-3 array of all the x-y pair derivatives. Here, the rows correspond to the output and the
-    columns correspond to the inputs.
+    which will return a 2-by-3 array of all the x-y pair derivatives at the given values. Here, the rows correspond to
+    the output and the columns correspond to the inputs.
 
     References
     ----------
     Baydin AG, Pearlmutter BA, Radul AA, & Siskind JM. (2018). Automatic differentiation in machine learning: a survey.
     *Journal of Marchine Learning Research*, 18, 1-43.
+
+    Rall LB & Corliss GF. (1996). An introduction to automatic differentiation. Computational Differentiation:
+    Techniques, Applications, and Tools, 89, 1-18.
     """
     # Meta-information about function and inputs
     xshape = len(xk)                                                   # The number of inputs into the function
diff --git a/docs/Reference/Utilities.rst b/docs/Reference/Utilities.rst
index 06cf59b..e7c06bc 100644
--- a/docs/Reference/Utilities.rst
+++ b/docs/Reference/Utilities.rst
@@ -1,7 +1,8 @@
 Utilities
 =====================
-For manipulation of output or inputs, there are several basic utility functionalities for transformation of variables
-or predicted parameters. These are used internally by the estimating equations but are also made available to users.
+For manipulation of output or inputs, there are several basic utility functionalities for transformation of variables,
+predicted parameters, or computations. Some are used internally by the estimating equations but are also made
+available to users.
 
 Data transformations
 ---------------------
@@ -25,3 +26,14 @@ Design matrices
   :toctree: generated/
 
   additive_design_matrix
+
+Differentiation
+---------------------
+
+.. currentmodule:: delicatessen.derivative
+
+.. autosummary::
+  :toctree: generated/
+
+  auto_differentiation
+
diff --git a/docs/Reference/generated/delicatessen.derivative.auto_differentiation.rst b/docs/Reference/generated/delicatessen.derivative.auto_differentiation.rst
new file mode 100644
index 0000000..f59d78d
--- /dev/null
+++ b/docs/Reference/generated/delicatessen.derivative.auto_differentiation.rst
@@ -0,0 +1,6 @@
+﻿delicatessen.derivative.auto\_differentiation
+=============================================
+
+.. currentmodule:: delicatessen.derivative
+
+.. autofunction:: auto_differentiation
\ No newline at end of file

From 266e5a995038006b786c61abd47996e9c52dbbfc Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Fri, 7 Jul 2023 07:33:36 -0400
Subject: [PATCH 21/53] tweaking some MEstimator docs

---
 delicatessen/mestimation.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/delicatessen/mestimation.py b/delicatessen/mestimation.py
index 22b2d6e..24336ab 100644
--- a/delicatessen/mestimation.py
+++ b/delicatessen/mestimation.py
@@ -16,11 +16,11 @@ class MEstimator:
 
     .. math::
 
-        \sum_{i=1}^n \psi(Z_i, \hat{\theta}) = 0
+        \sum_{i=1}^n \psi(O_i, \hat{\theta}) = 0
 
     where :math:`\psi` is the :math:`v`-dimensional vector of estimating equation(s), :math:`\hat{\theta}` is the
-    :math:`v`-dimensional parameter vector, and :math:`O_i` is the observed data (independent but not necessarily
-    identically distributed).
+    :math:`v`-dimensional parameter vector, and :math:`O_i` is the observed data (where units are independent but not
+    necessarily identically distributed).
 
     Note
     ----
@@ -35,17 +35,17 @@ class MEstimator:
 
     .. math::
 
-        B_n(Z, \hat{\theta})^{-1} F_n(Z, \hat{\theta}) \left\{B_n(Z, \hat{\theta}^{-1})\right\}^T
+        B_n(O, \hat{\theta})^{-1} F_n(O, \hat{\theta}) \left\{B_n(O, \hat{\theta}^{-1})\right\}^T
 
     where :math:`B` is the 'bread' and :math:`F` is the 'filling'
 
     .. math::
 
-        B_n(Z, \hat{\theta}) = n^{-1} \sum_{i=1}^{n} - \psi'(Z_i, \hat{\theta})
+        B_n(O, \hat{\theta}) = n^{-1} \sum_{i=1}^{n} - \psi'(O_i, \hat{\theta})
 
     .. math::
 
-        F_n(Z, \hat{\theta}) = n^{-1} \sum_{i=1}^{n} \psi(Z_i, \hat{\theta}) \psi(Z_i, \hat{\theta})^T
+        F_n(O, \hat{\theta}) = n^{-1} \sum_{i=1}^{n} \psi(O_i, \hat{\theta}) \psi(O_i, \hat{\theta})^T
 
     The partial derivatives for the bread are calculated using numerical approximation methods. Inverting the bread is
     done via NumPy's ``linalg.pinv``. For the filling, the dot product is taken at :math:`\hat{\theta}`.

From 584e958573190510ddb6a0b4cd0166084a83d854 Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Fri, 7 Jul 2023 07:58:46 -0400
Subject: [PATCH 22/53] more documentation updates

---
 delicatessen/derivative.py  |   2 +-
 delicatessen/mestimation.py | 128 ++++++++++++++++++------------------
 2 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/delicatessen/derivative.py b/delicatessen/derivative.py
index 2a8cba1..95f50ca 100644
--- a/delicatessen/derivative.py
+++ b/delicatessen/derivative.py
@@ -26,7 +26,7 @@ def auto_differentiation(xk, f):
 
     Returns
     -------
-    ndarray :
+    numpy.array :
         Corresponding array of the pairwise derivatives for all different input x values.
 
     Examples
diff --git a/delicatessen/mestimation.py b/delicatessen/mestimation.py
index 24336ab..cc705e0 100644
--- a/delicatessen/mestimation.py
+++ b/delicatessen/mestimation.py
@@ -11,8 +11,8 @@ class MEstimator:
     r"""M-Estimator for stacked estimating equations.
 
     M-Estimation, or loosely referred to as estimating equations, is a general approach to point and variance
-    estimation that consists of defining an estimator as the solution to an estimating equation (but does not require
-    the derivative of a log-likelihood function). M-estimators satisify the following
+    estimation that consists of defining an estimator as the solution to an estimating equation. M-estimators
+    satisify the following
 
     .. math::
 
@@ -25,7 +25,7 @@ class MEstimator:
     Note
     ----
     M-Estimation is advantageous in both theoretical and applied research. M-estimation simplifies proofs of
-    consistency and asymptotic normality of estimator sunder a large-sample approximation framework. In application,
+    consistency and asymptotic normality of estimators under a large-sample approximation framework. In application,
     M-estimators simplify estimation of the variance of parameters and automate the delta-method.
 
 
@@ -47,13 +47,14 @@ class MEstimator:
 
         F_n(O, \hat{\theta}) = n^{-1} \sum_{i=1}^{n} \psi(O_i, \hat{\theta}) \psi(O_i, \hat{\theta})^T
 
-    The partial derivatives for the bread are calculated using numerical approximation methods. Inverting the bread is
-    done via NumPy's ``linalg.pinv``. For the filling, the dot product is taken at :math:`\hat{\theta}`.
+    The partial derivatives for the bread are calculated using either numerical approximation (i.e., central difference
+    method) or forward-mode automatic differentiation. Inverting the bread is done via NumPy's ``linalg.pinv``. For
+    the filling, the dot product is taken at :math:`\hat{\theta}`.
 
     Note
     ----
-    A hard part (that must be done by the user) is to specify the stacked estimating equations. Be sure to check
-    the provided examples for the format. Pre-built estimating equations for common problems are made available.
+    A hard part, that must be done by the user, is to specify the estimating equations. Be sure to check the provided
+    examples for the expected format. Pre-built estimating equations for common problems are also made available.
 
 
     After completion of these steps, point and variance estimates are stored. These can be extracted from
@@ -61,8 +62,8 @@ class MEstimator:
 
     Note
     ----
-    For complex regression problems, the root-finding algorithms are not as robust relative to other approaches based
-    on higher-order derivatives. 'Pre-washed' values can be fed forward as the initial values.
+    For complex regression problems, the root-finding algorithms are not as robust relative to maximization approaches.
+    A solution for difficult problems is to 'pre-wash' the initial values.
 
     Parameters
     ----------
@@ -72,10 +73,10 @@ class MEstimator:
     init : list, set, array
         Initial values for the root-finding algorithm.
     subset : list, set, array, None, optional
-        Optional argument to conduct the root-finding procedure on a subset of parameters in the stacked estimating
-        equations. The input list is used to location index the parameter array via ``np.take()``. The subset list will
+        Optional argument to conduct the root-finding procedure on a subset of parameters in the estimating equations.
+        The input list is used to location index the parameter array via ``np.take()``. The subset list will
         only affect the root-finding procedure (i.e., the sandwich variance estimator ignores the subset argument).
-        Default is ``None``, which runs the root-finding procedure for all parameters in the stacked equations.
+        Default is ``None``, which runs the root-finding procedure for all parameters in the estimating equations.
 
     Note
     ----
@@ -133,16 +134,16 @@ class MEstimator:
 
     Note that ``len(init)`` should be equal to b. So in this case, two initial values are provided.
 
-    ``MEstimator`` can also be run with a user-provided root-finding algorithm. To specify a custom
-    root-finder, a function must be created by the user that consists of two keyword arguments (``stacked_equations``,
-    ``init``) and must return only the optimized values. The following is an example with SciPy's Levenberg-Marquardt
-    algorithm in ``root``.
+    ``MEstimator`` can also be run with a user-provided root-finding algorithm. To specify a custom root-finder, a
+    function must be created by the user that consists of two keyword arguments (``stacked_equations``, ``init``) and
+    must return only the optimized values. The following is an example with SciPy's Levenberg-Marquardt algorithm
+    implemented in ``root``.
 
     >>> def custom_solver(stacked_equations, init):
     >>>     options = {"maxiter": 1000}
     >>>     opt = root(stacked_equations,
-    >>>                x0=np.asarray(init), method='lm', tol=1e-9,
-    >>>                options=options)
+    >>>                x0=np.asarray(init),
+    >>>                method='lm', tol=1e-9, options=options)
     >>>     return opt.x
 
     The provided custom root-finder can then be implemented like the following (continuing with the estimating equation
@@ -161,10 +162,10 @@ class MEstimator:
     def __init__(self, stacked_equations, init=None, subset=None):
         self.stacked_equations = stacked_equations     # User-input stacked estimating equations
         self.init = init                               # User-input initial starting values for solving estimating eqs
-        if subset is None:
-            self._subset_ = subset
-        else:
-            self._subset_ = sorted(subset)
+        if subset is None:                             # Handling subset of parameters
+            self._subset_ = subset                     # ... when None, set as None
+        else:                                          # Otherwise
+            self._subset_ = sorted(subset)             # ... ensure it is sorted
 
         # Storage for results from the M-Estimation procedure
         self.n_obs = None                 # Number of unique observations (calculated later)
@@ -184,30 +185,29 @@ def estimate(self, solver='newton', maxiter=1000, tolerance=1e-9, deriv_method='
             Method to use for the root-finding procedure. Default is the secant method (``scipy.optimize.newton``).
             Other built-in option is the Levenberg-Marquardt algorithm (``scipy.optimize.root(method='lm')``), and
             a modification of the Powell hybrid method (``scipy.optimize.root(method='hybr')``). Finally, any generic
-            root-finding algorithm can be used via a user-provided callable object (function). The function should
+            root-finding algorithm can be used via a user-provided callable object. The function must
             consist of two keyword arguments: ``stacked_equations``, and ``init``. Additionally, the function should
-            return only the optimized values. Please review the example in the documentation for how to provide a
-            custom root-finding algorithm.
+            return only the optimized values. Please review the provided example in the documentation for how to
+            implement a custom root-finding algorithm.
         maxiter : int, optional
             Maximum iterations to consider for the root finding procedure. Default is 1000 iterations. For complex
             estimating equations (without preceding optimization), this value may need to be increased. This argument
-            is not used for user-specified solvers
+            is not used for user-specified solvers.
         tolerance : float, optional
             Maximum tolerance for errors in the root finding. This argument is passed ``scipy.optimize`` via the
-            ``tol`` parameter. Default is 1e-9.
+            ``tol`` parameter. This argument is not used for user-specified solvers. Default is 1e-9.
         deriv_method : str, optional
             Method to compute the derivative of the estimating equations for the bread matrix. Options include numerical
-            approximation with the central difference method (``'approx'``) and forward-mode automatic differentiation
-            (``'exact'``). Default is numerical approximation.
+            approximation via the central difference method (``'approx'``) and forward-mode automatic differentiation
+            (``'exact'``). Default is ``'approx'``.
         dx : float, optional
-            Spacing to use to numerically approximate the partial derivatives of the bread matrix. Default is 1e-9. It
-            is generally not recommended to have a large ``dx``, since some large values can poorly approximate
-            derivatives.
+            Spacing to use to numerically approximate the partial derivatives of the bread matrix. Here, a small value
+            for ``dx`` should be used, since some large values can result in poor approximations. This argument is only
+            used when ``deriv_method='approx'``. Default is 1e-9.
         allow_pinv : bool, optional
-            The default is ``True`` which uses ``numpy.linalg.pinv`` to find the inverse (or pseudo-inverse if matrix is
-            non-invertible) for the bread. This default option is more robust to the possible matrices. If you want
-            to use ``numpy.linalg.inv`` instead (which does not support pseudo-inverse), set this parameter to
-            ``False``.
+            Whether to allow for the pseudo-inverse (via ``numpy.linalg.pinv``) if the bread matrix is determined to be
+            non-invertible. If you want to disallow the pseudo-inverse (i.e., use ``numpy.linalg.inv``), set this
+            argument to ``False``. Default is ``True``, which  is more robust to the possible bread matrices.
 
         Returns
         -------
@@ -318,7 +318,7 @@ def confidence_intervals(self, alpha=0.05):
 
         .. math::
 
-            \hat{\theta} \pm Z_{\alpha / 2} \times \widehat{SE}(\hat{\theta})
+            \hat{\theta} \pm z_{\alpha / 2} \times \widehat{SE}(\hat{\theta})
 
         Note
         ----
@@ -327,8 +327,8 @@ def confidence_intervals(self, alpha=0.05):
         Parameters
         ----------
         alpha : float, optional
-            The :math:`\alpha` level for the corresponding confidence intervals. Default is 0.05, which calculate the
-            95% confidence intervals. Notice that :math:`0<\alpha<1`.
+            The :math:`0 < \alpha < 1` level for the corresponding confidence intervals. Default is 0.05, which
+            corresponds to 95% confidence intervals.
 
         Returns
         -------
@@ -352,9 +352,9 @@ def confidence_intervals(self, alpha=0.05):
         return np.asarray([lower_ci, upper_ci]).T
 
     def _mestimation_answer_(self, theta):
-        """Internal function to evaluate the sum of the estimating equations. The summation must be internally evaluated
-        since the bread requires calculation of the sum of the derivatives. This function is used by the root-finding
-        procedure (since we need the subset applied).
+        """Internal function to evaluate the sum of the estimating equations. The summation is internally evaluated
+        since access to the estimating functions is needed for the sandwich variance computations. This function is
+        used by the root-finding procedure (since we need the subset applied).
 
         Parameters
         ----------
@@ -380,9 +380,9 @@ def _mestimation_answer_(self, theta):
                                      subset=self._subset_)                  # ... with specified subset
 
     def _mestimation_answer_no_subset_(self, theta):
-        """Internal function to evaluate the sum of the estimating equations. The summation must be internally evaluated
-        since the bread requires calculation of the sum of the derivatives. This function is used by the bread matrix
-        approximation procedure (since the subset should be ignored for the bread).
+        """Internal function to evaluate the sum of the estimating equations. The summation is internally evaluated
+        since access to the estimating functions is needed for the sandwich variance computations. This function is
+        used by the bread matrix computation procedure (since subset is ignored for the bread).
 
         Parameters
         ----------
@@ -400,22 +400,23 @@ def _mestimation_answer_no_subset_(self, theta):
 
     @staticmethod
     def _mestimator_sum_(stacked_equations, subset):
-        """Function to evaluate the sum of the M-estimator over the i units. This static method takes an optional
-        argument.
+        """Function to evaluate the sum of the M-estimator over the :math:`n` units.
 
         Note
         ----
         Added in v1.0 to replace the inner functionality of ``_mestimation_answer_`` for the new ``approx_fprime`` but
-        still support ``subset`` (without having to flip the subset flag
+        still support ``subset`` (without having to flip the subset flag).
 
         Parameters
         ----------
-        stacked_equations
-        subset
+        stacked_equations :
+            Estimating equations to evaluate
+        subset :
+            Whether to consider a subset of parameters
 
         Returns
         -------
-
+        numpy.array
         """
         # IF stacked_equation returns 1 value, only return that 1 value
         if len(stacked_equations.shape) == 1:          # Checking length
@@ -526,28 +527,27 @@ def _bread_matrix_(self, theta, method, dx):
         -------
         numpy.array
         """
-        # Check how many values of theta there is
-        val_range = len(theta)
+        val_range = len(theta)                                       # Check how many values of theta there is
+        est_eq = self._mestimation_answer_no_subset_                 # Estimating equations to compute derivative of
 
-        # Calculating the bread via numerical approximation
-        if method.lower() == "approx":
+        # Compute the derivative
+        if method.lower() == "approx":                               # Numerical approximation method
             if val_range == 1:                                       # When only a single theta is present
-                d = derivative(self._mestimation_answer_no_subset_,  # ... approximate the derivative
+                d = derivative(est_eq,                               # ... approximate the derivative
                                theta, dx=dx)                         # ... at the solved theta (input)
                 bread_matrix = np.array([[d, ], ])                   # ... return as 1-by-1 array object for inversion
             else:                                                    # Otherwise approximate the partial derivatives
                 bread_matrix = approx_fprime(xk=theta,               # ... use built-in jacobian functionality of SciPy
-                                             f=self._mestimation_answer_no_subset_,
+                                             f=est_eq,               # ... with not-subset estimating equations
                                              epsilon=dx)             # ... order option removed in v1.0
 
-        # Calculating the bread via automatic differentiation
-        elif method.lower() == "exact":
-            bread_matrix = auto_differentiation(xk=theta,
-                                                f=self._mestimation_answer_no_subset_)
+        elif method.lower() == "exact":                              # Automatic Differentiation
+            bread_matrix = auto_differentiation(xk=theta,            # Compute the exact derivative at theta
+                                                f=est_eq)            # ... for the given estimating equations
 
-        # Error for invalid derivative option
-        else:
-            raise ValueError("Input for deriv_method was " + str(method)
+        else:                                                        # Error for unsupported option
+            raise ValueError("The input for deriv_method was "
+                             + str(method)
                              + ", but only 'approx' and 'exact' are available.")
 
         # Return bread (multiplied by negative 1 as in Stefanski & Boos)

From 1a10220f373d1151f45f0d36335f15baabc529ea Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Fri, 7 Jul 2023 08:56:48 -0400
Subject: [PATCH 23/53] adding GAM test, changing solver for reg

---
 tests/test_autodiff_deriv.py | 60 ++++++++++++++++++++++++++++++------
 1 file changed, 51 insertions(+), 9 deletions(-)

diff --git a/tests/test_autodiff_deriv.py b/tests/test_autodiff_deriv.py
index 9f57222..4f28081 100644
--- a/tests/test_autodiff_deriv.py
+++ b/tests/test_autodiff_deriv.py
@@ -8,7 +8,8 @@
 from delicatessen.utilities import inverse_logit, identity
 from delicatessen.derivative import auto_differentiation
 from delicatessen import MEstimator
-from delicatessen.estimating_equations import ee_mean_variance, ee_mean_robust, ee_regression, ee_ridge_regression
+from delicatessen.estimating_equations import (ee_mean_variance, ee_mean_robust, ee_regression, ee_ridge_regression,
+                                               ee_additive_regression)
 
 np.random.seed(20230704)
 
@@ -467,12 +468,12 @@ def psi(theta):
         mestr = MEstimator(psi, init=[0., 2., -1.])
 
         # Auto-differentation
-        mestr.estimate(deriv_method='exact')
+        mestr.estimate(solver='lm', deriv_method='exact')
         bread_exact = mestr.bread
         var_exact = mestr.variance
 
         # Central difference method
-        mestr.estimate(deriv_method='approx')
+        mestr.estimate(solver='lm', deriv_method='approx')
         bread_approx = mestr.bread
         var_approx = mestr.variance
 
@@ -503,12 +504,12 @@ def psi(theta):
         mestr = MEstimator(psi, init=[0., 2., -1.])
 
         # Auto-differentation
-        mestr.estimate(deriv_method='exact')
+        mestr.estimate(solver='lm', deriv_method='exact')
         bread_exact = mestr.bread
         var_exact = mestr.variance
 
         # Central difference method
-        mestr.estimate(deriv_method='approx')
+        mestr.estimate(solver='lm', deriv_method='approx')
         bread_approx = mestr.bread
         var_approx = mestr.variance
 
@@ -540,12 +541,12 @@ def psi(theta):
         mestr = MEstimator(psi, init=[0., 2., -1.])
 
         # Auto-differentation
-        mestr.estimate(deriv_method='exact')
+        mestr.estimate(solver='lm', deriv_method='exact')
         bread_exact = mestr.bread
         var_exact = mestr.variance
 
         # Central difference method
-        mestr.estimate(deriv_method='approx')
+        mestr.estimate(solver='lm', deriv_method='approx')
         bread_approx = mestr.bread
         var_approx = mestr.variance
 
@@ -577,12 +578,12 @@ def psi(theta):
         mestr = MEstimator(psi, init=[0., 2., -1.])
 
         # Auto-differentation
-        mestr.estimate(deriv_method='exact')
+        mestr.estimate(solver='lm', deriv_method='exact')
         bread_exact = mestr.bread
         var_exact = mestr.variance
 
         # Central difference method
-        mestr.estimate(deriv_method='approx')
+        mestr.estimate(solver='lm', deriv_method='approx')
         bread_approx = mestr.bread
         var_approx = mestr.variance
 
@@ -595,3 +596,44 @@ def psi(theta):
         npt.assert_allclose(var_approx,
                             var_exact,
                             atol=1e-7)
+
+    def test_exact_bread_logit_gam(self):
+        n = 1000
+        data = pd.DataFrame()
+        data['X'] = np.random.normal(size=n)
+        data['Z'] = np.random.normal(size=n)
+        data['Y'] = np.random.binomial(n=1, p=logistic.cdf(0.5 + 2 * data['X'] - 0.001*data['X']**2 - 1 * data['Z']),
+                                       size=n)
+        data['C'] = 1
+        Xvals = np.asarray(data[['C', 'X', 'Z']])
+        yvals = np.asarray(data['Y'])
+        spec = [None, {"knots": [-1, 0, 1], "penalty": 3}, {"knots": [-2, -1, 0, 1, 2], "penalty": 5}]
+
+        def psi_regression(theta):
+            return ee_additive_regression(theta,
+                                          X=Xvals, y=yvals,
+                                          model='logistic',
+                                          specifications=spec)
+
+        mestr = MEstimator(psi_regression, init=[0., 2., 0., 0., 1., 0., 0., 0., 0.])
+
+        # Auto-differentation
+        mestr.estimate(solver='lm', deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr.estimate(solver='lm', deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=1e-6)
+

From 63b050c1e081b50266f06d9e8d3813fdfa13ea2f Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Fri, 7 Jul 2023 09:06:46 -0400
Subject: [PATCH 24/53] adding traceback for github test

---
 tests/test_autodiff_deriv.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/test_autodiff_deriv.py b/tests/test_autodiff_deriv.py
index 4f28081..294bf95 100644
--- a/tests/test_autodiff_deriv.py
+++ b/tests/test_autodiff_deriv.py
@@ -627,6 +627,9 @@ def psi_regression(theta):
         bread_approx = mestr.bread
         var_approx = mestr.variance
 
+        print()
+        print(np.max(var_approx - var_exact))
+
         # Checking bread estimates
         npt.assert_allclose(bread_approx,
                             bread_exact,

From ec1a6309e1a6605117b7ccccb8e2a408a4f02fda Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Fri, 7 Jul 2023 09:10:25 -0400
Subject: [PATCH 25/53] removing traceback

---
 tests/test_autodiff_deriv.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/test_autodiff_deriv.py b/tests/test_autodiff_deriv.py
index 294bf95..5a6f974 100644
--- a/tests/test_autodiff_deriv.py
+++ b/tests/test_autodiff_deriv.py
@@ -627,9 +627,6 @@ def psi_regression(theta):
         bread_approx = mestr.bread
         var_approx = mestr.variance
 
-        print()
-        print(np.max(var_approx - var_exact))
-
         # Checking bread estimates
         npt.assert_allclose(bread_approx,
                             bread_exact,
@@ -638,5 +635,5 @@ def psi_regression(theta):
         # Checking variance estimates
         npt.assert_allclose(var_approx,
                             var_exact,
-                            atol=1e-6)
+                            atol=1e-5)
 

From ce8b9bc65cefccc3e6a514846a2e1729b609c5fd Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Sun, 3 Sep 2023 09:43:11 -0400
Subject: [PATCH 26/53] Autodiff polygamma support

---
 delicatessen/derivative.py |  5 ++++
 delicatessen/utilities.py  | 48 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+)

diff --git a/delicatessen/derivative.py b/delicatessen/derivative.py
index 95f50ca..a44892a 100644
--- a/delicatessen/derivative.py
+++ b/delicatessen/derivative.py
@@ -1,4 +1,5 @@
 import numpy as np
+import scipy as sp
 
 
 def auto_differentiation(xk, f):
@@ -434,3 +435,7 @@ def arctanh(self):
         return PrimalTangentPairs(np.arctanh(self.primal),
                                   self.tangent / (1 - self.primal**2))
 
+    def polygamma(self, n):
+        # Polygamma function
+        return PrimalTangentPairs(sp.special.polygamma(n, self.primal),
+                                  self.tangent * sp.special.polygamma(n+1, self.primal))
diff --git a/delicatessen/utilities.py b/delicatessen/utilities.py
index 475c5a5..0c68e8b 100644
--- a/delicatessen/utilities.py
+++ b/delicatessen/utilities.py
@@ -1,6 +1,54 @@
 import warnings
 import numpy as np
+import scipy as sp
 from scipy.stats import norm
+from delicatessen.derivative import PrimalTangentPairs as PTPair
+
+
+def digamma(x):
+    """Digamma function. This is a wrapper function of ``scipy.special.digamma`` meant to enable automatic
+    differentation with ``delicatessen``. When the input is a ``PrimalTangentPairs`` object, then an internal function
+    that implements the digamma function is called. Otherwise, ``scipy.special.digamma`` is called for the input
+    object.
+
+    Parameters
+    ----------
+    x : int, float, ndarray
+        Real valued input
+
+    Returns
+    -------
+    Return type depends on the input type (``PrimalTangentPairs`` will return ``PrimalTangentPairs``, otherwise will
+    return ``ndarray).
+    """
+    if isinstance(x, PTPair):
+        return x.polygamma(n=0)
+    else:
+        return sp.special.polygamma(n=0, x=x)
+
+
+def polygamma(n, x):
+    """Polygamma functions. This is a wrapper function of ``scipy.special.polygamma`` meant to enable automatic
+    differentation with ``delicatessen``. When the input is a ``PrimalTangentPairs`` object, then an internal function
+    that implements the polygamma function is called. Otherwise, ``scipy.special.polygamma`` is called for the input
+    object.
+
+    Parameters
+    ----------
+    n : int
+        Order of the derivative of the digamma function
+    x : int, float, ndarray
+        Real valued input
+
+    Returns
+    -------
+    Return type depends on the input type (``PrimalTangentPairs`` will return ``PrimalTangentPairs``, otherwise will
+    return ``ndarray).
+    """
+    if isinstance(x, PTPair):
+        return x.polygamma(n=n)
+    else:
+        return sp.special.polygamma(n=n, x=x)
 
 
 def logit(prob):

From dba6864b7eeefcbf16445b55488ace4417e6a95e Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Sun, 3 Sep 2023 09:44:31 -0400
Subject: [PATCH 27/53] tests for autodiff polygamma

---
 tests/test_autodiff_deriv.py | 81 ++++++++++++++++++++++++++++++++++--
 1 file changed, 77 insertions(+), 4 deletions(-)

diff --git a/tests/test_autodiff_deriv.py b/tests/test_autodiff_deriv.py
index 5a6f974..0ae5aa2 100644
--- a/tests/test_autodiff_deriv.py
+++ b/tests/test_autodiff_deriv.py
@@ -2,13 +2,15 @@
 import numpy as np
 import numpy.testing as npt
 import pandas as pd
+import scipy as sp
 from scipy.stats import logistic
 from scipy.optimize import approx_fprime
 
-from delicatessen.utilities import inverse_logit, identity
 from delicatessen.derivative import auto_differentiation
+from delicatessen.utilities import inverse_logit, identity, digamma, polygamma
 from delicatessen import MEstimator
-from delicatessen.estimating_equations import (ee_mean_variance, ee_mean_robust, ee_regression, ee_ridge_regression,
+from delicatessen.estimating_equations import (ee_mean_variance, ee_mean_robust,
+                                               ee_regression, ee_glm, ee_ridge_regression,
                                                ee_additive_regression)
 
 np.random.seed(20230704)
@@ -227,6 +229,46 @@ def f(x):
         # Checking
         npt.assert_allclose(dx_true, dx_exact, atol=1e-5)
 
+    def test_scipy_special(self):
+        def f(x):
+            return [polygamma(n=1, x=x[0]),
+                    polygamma(n=1, x=x[1]),
+                    polygamma(n=1, x=x[2]),
+                    polygamma(n=1, x=x[3]),
+                    ]
+
+        # Points to Evaluate at
+        xinput = [0.5, 1.9, -2.3, 2]
+
+        # True Derivatives
+        dx_true = sp.special.polygamma(n=2, x=xinput)
+
+        # Evaluating the derivatives at the points
+        dx_exact = auto_differentiation(xinput, f)
+
+        # Checking
+        npt.assert_allclose(dx_true, np.diag(dx_exact), atol=1e-5)
+
+    def test_scipy_special_numapprox(self):
+        def f(x):
+            return [polygamma(n=1, x=x[0]),
+                    polygamma(n=2, x=x[1]) + x[1]**2,
+                    polygamma(n=3, x=x[2]*x[3] + x[1]),
+                    polygamma(n=4, x=np.log(x[3] + x[1]) + x[0]**2) - x[3],
+                    ]
+
+        # Points to Evaluate at
+        xinput = [0.5, 1.9, -2.3, 2]
+
+        # True Derivatives
+        dx_approx = approx_fprime(xinput, f, epsilon=1e-9)
+
+        # Evaluating the derivatives at the points
+        dx_exact = auto_differentiation(xinput, f)
+
+        # Checking
+        npt.assert_allclose(dx_approx, dx_exact, atol=1e-5)
+
     def test_compare_deli_utilities(self):
         # Defining the functions to check
         def f(x):
@@ -615,14 +657,14 @@ def psi_regression(theta):
                                           model='logistic',
                                           specifications=spec)
 
-        mestr = MEstimator(psi_regression, init=[0., 2., 0., 0., 1., 0., 0., 0., 0.])
-
         # Auto-differentation
+        mestr = MEstimator(psi_regression, init=[0., 2., 0., 0., 1., 0., 0., 0., 0.])
         mestr.estimate(solver='lm', deriv_method='exact')
         bread_exact = mestr.bread
         var_exact = mestr.variance
 
         # Central difference method
+        mestr = MEstimator(psi_regression, init=[0., 2., 0., 0., 1., 0., 0., 0., 0.])
         mestr.estimate(solver='lm', deriv_method='approx')
         bread_approx = mestr.bread
         var_approx = mestr.variance
@@ -637,3 +679,34 @@ def psi_regression(theta):
                             var_exact,
                             atol=1e-5)
 
+    def test_exact_bread_glm_loggamma(self):
+        d = pd.DataFrame()
+        d['X'] = np.log([5, 10, 15, 20, 30, 40, 60, 80, 100])
+        d['Y'] = [118, 58, 42, 35, 27, 25, 21, 19, 18]
+        d['I'] = 1
+
+        def psi(theta):
+            return ee_glm(theta, X=d[['I', 'X']], y=d['Y'],
+                          distribution='gamma', link='log')
+
+        # Auto-differentation
+        mestr = MEstimator(psi, init=[0., 0., 1.])
+        mestr.estimate(solver='lm', deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr = MEstimator(psi, init=[0., 0., 1.])
+        mestr.estimate(solver='lm', deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=5e-5)

From 3dee31afe4935da63a3873b21ab0f6d8a6fbdf08 Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Sun, 3 Sep 2023 16:45:56 -0400
Subject: [PATCH 28/53] defining autodiff transpose operation, adding tests

---
 delicatessen/derivative.py   |  18 ++++-
 tests/test_autodiff_deriv.py | 127 ++++++++++++++++++++++++++++++++++-
 2 files changed, 141 insertions(+), 4 deletions(-)

diff --git a/delicatessen/derivative.py b/delicatessen/derivative.py
index a44892a..3febf6d 100644
--- a/delicatessen/derivative.py
+++ b/delicatessen/derivative.py
@@ -119,7 +119,7 @@ def f_deny_bool(function, x):
         evalued = function(x)              # Short-name for evaluated function
 
         # Handling whether a single input (non-tuple like SciPy) is provided
-        if isinstance(evalued, PrimalTangentPairs):     # If not a tuple, gives back the Pair object
+        if isinstance(evalued, PrimalTangentPairs):     # If not a tuple, then gives back the Pair object
             eval_no_bool_end = evalued + 0              # ... then just add zero in this case
         else:                                           # Otherwise,
             eval_no_bool_end = []                       # ... empty list for storage
@@ -179,6 +179,18 @@ def __init__(self, primal, tangent):
             self.primal = primal                      # ... directly save as new primal
         self.tangent = tangent                        # Store the tangent
 
+    # Defining properties
+
+    def transpose(self):
+        storage = []
+        for p, t in zip(self.primal, self.tangent):
+            storage.append(PrimalTangentPairs(p, t))
+        return np.array(storage)
+
+    @property
+    def T(self):
+        return self.transpose()
+
     # Basic operators for the class object
 
     def __str__(self):
@@ -279,7 +291,7 @@ def __mul__(self, other):
         # Multiplication
         if isinstance(other, PrimalTangentPairs):                     # When multiplying PrimalTangentPairs
             return PrimalTangentPairs(self.primal * other.primal,     # ... multiply primals
-                                      self.tangent * other.primal + self.primal * other.tangent)
+                                      self.tangent*other.primal + self.primal*other.tangent)
         else:                                                         # Otherwise
             return PrimalTangentPairs(self.primal * other,            # ... multiply primal and constant
                                       self.tangent * other)           # ... multiply primal and constant
@@ -435,6 +447,8 @@ def arctanh(self):
         return PrimalTangentPairs(np.arctanh(self.primal),
                                   self.tangent / (1 - self.primal**2))
 
+    # SciPy special operators
+
     def polygamma(self, n):
         # Polygamma function
         return PrimalTangentPairs(sp.special.polygamma(n, self.primal),
diff --git a/tests/test_autodiff_deriv.py b/tests/test_autodiff_deriv.py
index 0ae5aa2..2b56d7d 100644
--- a/tests/test_autodiff_deriv.py
+++ b/tests/test_autodiff_deriv.py
@@ -11,7 +11,8 @@
 from delicatessen import MEstimator
 from delicatessen.estimating_equations import (ee_mean_variance, ee_mean_robust,
                                                ee_regression, ee_glm, ee_ridge_regression,
-                                               ee_additive_regression)
+                                               ee_additive_regression,
+                                               ee_gformula, ee_gestimation_snmm)
 
 np.random.seed(20230704)
 
@@ -229,6 +230,34 @@ def f(x):
         # Checking
         npt.assert_allclose(dx_true, dx_exact, atol=1e-5)
 
+    def test_numpy_transpose(self):
+        d = pd.DataFrame()
+        d['X'] = np.log([5, 10, 15, 20, 25])
+        d['Y'] = [118, 58, 5, 30, 58]
+        d['I'] = 1
+        y = np.asarray(d['Y'])[:, None]
+        X = np.asarray(d[['I', 'X']])
+
+        def psi(theta):
+            beta, alpha = theta[:-1], np.exp(theta[-1])
+            beta = np.asarray(beta)[:, None]
+            pred_y = np.exp(np.dot(X, beta))
+            ee_beta = ((y - pred_y) * X).T
+
+            # This is the simplest example of the issue
+            ee_alpha = (alpha * y).T
+            return np.vstack([ee_beta, ee_alpha])
+
+        def internal_sum(theta):
+            return np.sum(psi(theta), axis=1)
+
+        # Evaluating the derivatives at the points
+        dx_exact = auto_differentiation([5.503, -0.6019, 0.0166], internal_sum)
+        dx_approx = approx_fprime([5.503, -0.6019, 0.0166], internal_sum, epsilon=1e-9)
+
+        # Checking
+        npt.assert_allclose(dx_approx, dx_exact, atol=1e-4)
+
     def test_scipy_special(self):
         def f(x):
             return [polygamma(n=1, x=x[0]),
@@ -260,7 +289,7 @@ def f(x):
         # Points to Evaluate at
         xinput = [0.5, 1.9, -2.3, 2]
 
-        # True Derivatives
+        # Approximate Derivatives
         dx_approx = approx_fprime(xinput, f, epsilon=1e-9)
 
         # Evaluating the derivatives at the points
@@ -710,3 +739,97 @@ def psi(theta):
         npt.assert_allclose(var_approx,
                             var_exact,
                             atol=5e-5)
+
+    def test_gcomputation(self):
+        d = pd.DataFrame()
+        d['W'] = [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,
+                  1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,
+                  1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]
+        d['V'] = [1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,
+                  1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,
+                  1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0]
+        d['A'] = [1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
+                  1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
+                  1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1]
+        d['Y'] = [3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5,
+                  3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5,
+                  3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5]
+        d['I'] = 1
+        d['A1'] = 1
+        d['A0'] = 0
+
+        # M-estimator
+        def psi(theta):
+            return ee_gformula(theta=theta,
+                               y=d['Y'],
+                               X=d[['I', 'A', 'V', 'W']],
+                               X1=d[['I', 'A1', 'V', 'W']],
+                               X0=d[['I', 'A0', 'V', 'W']])
+
+        # Auto-differentation
+        mestr = MEstimator(psi, init=[0., ] * 7)
+        mestr.estimate(solver='lm', deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr = MEstimator(psi, init=[0., ] * 7)
+        mestr.estimate(solver='lm', deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=5e-5)
+
+    def test_gestimation_linear_snm(self):
+        d = pd.DataFrame()
+        d['W'] = [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,
+                  1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,
+                  1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]
+        d['V'] = [1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,
+                  1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,
+                  1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0]
+        d['A'] = [1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
+                  1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
+                  1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1]
+        d['Y'] = [3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5,
+                  3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5,
+                  3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5]
+        d['I'] = 1
+
+        # M-estimator
+        def psi(theta):
+            return ee_gestimation_snmm(theta=theta,
+                                       y=d['Y'], A=d['A'],
+                                       W=d[['I', 'V', 'W']],
+                                       V=d[['I', 'V']],
+                                       model='linear')
+
+        # Auto-differentation
+        mestr = MEstimator(psi, init=[0., ] * 5)
+        mestr.estimate(solver='lm', deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr = MEstimator(psi, init=[0., ] * 5)
+        mestr.estimate(solver='lm', deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=5e-5)

From abfd36bdab079baecf2b3e5261a5426ee511b8e4 Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Sun, 3 Sep 2023 17:03:18 -0400
Subject: [PATCH 29/53] making the transpose operator handle another edge case

---
 delicatessen/derivative.py   | 32 ++++++++++++++++++++------------
 tests/test_autodiff_deriv.py | 28 ++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 12 deletions(-)

diff --git a/delicatessen/derivative.py b/delicatessen/derivative.py
index 3febf6d..9efb971 100644
--- a/delicatessen/derivative.py
+++ b/delicatessen/derivative.py
@@ -179,18 +179,6 @@ def __init__(self, primal, tangent):
             self.primal = primal                      # ... directly save as new primal
         self.tangent = tangent                        # Store the tangent
 
-    # Defining properties
-
-    def transpose(self):
-        storage = []
-        for p, t in zip(self.primal, self.tangent):
-            storage.append(PrimalTangentPairs(p, t))
-        return np.array(storage)
-
-    @property
-    def T(self):
-        return self.transpose()
-
     # Basic operators for the class object
 
     def __str__(self):
@@ -203,6 +191,26 @@ def __bool__(self):
         #   expected. This only seems to be called for np.where and not the other operators (they work directly).
         return bool(self.primal)
 
+    def transpose(self):
+        # Transpose operator is special
+        storage = []  # Create empty storage for the output
+
+        if self.tangent.ndim == 1:  # When tangent is of 1 dimension
+            for p in self.primal:  # ... then only the primal's are a stack
+                storage.append(PrimalTangentPairs(p,  # ... so we reshape the primal stack
+                                                  self.tangent))  # ... and keep all the corresponding tangent's
+        else:  # Otherwise, tangent is also 2 dimensions
+            for p, t in zip(self.primal, self.tangent):  # ... so both the primal and tangents
+                storage.append(PrimalTangentPairs(p, t))  # ... need to be re-paired
+
+        # Return the transposed array of the PrimalTangentPairs
+        return np.array(storage)
+
+    @property
+    def T(self):
+        # Giving the transpose() property the .T short-cut, like NumPy has (to be compatible with NumPy code)
+        return self.transpose()
+
     # Equality operators for the class
 
     def __le__(self, other):
diff --git a/tests/test_autodiff_deriv.py b/tests/test_autodiff_deriv.py
index 2b56d7d..dece3a6 100644
--- a/tests/test_autodiff_deriv.py
+++ b/tests/test_autodiff_deriv.py
@@ -258,6 +258,34 @@ def internal_sum(theta):
         # Checking
         npt.assert_allclose(dx_approx, dx_exact, atol=1e-4)
 
+    def test_some_other_issue(self):
+        d = pd.DataFrame()
+        d['X'] = np.log([5, 10, 15, 20, 25])
+        d['Y'] = [118, 58, 5, 30, 58]
+        d['I'] = 1
+        y = np.asarray(d['Y'])[:, None]
+        X = np.asarray(d[['I', 'X']])
+
+        def psi(theta):
+            beta, alpha = theta[:-1], np.exp(theta[-1])
+            beta = np.asarray(beta)[:, None]
+            pred_y = np.exp(np.dot(X, beta))
+            ee_beta = ((y - pred_y) * X).T
+
+            # Another problematic transpose example
+            ee_alpha = (alpha - y).T
+            return np.vstack([ee_beta, ee_alpha])
+
+        def internal_sum(theta):
+            return np.sum(psi(theta), axis=1)
+
+        # Evaluating the derivatives at the points
+        dx_exact = auto_differentiation([5.503, -0.6019, 0.0166], internal_sum)
+        dx_approx = approx_fprime([5.503, -0.6019, 0.0166], internal_sum, epsilon=1e-9)
+
+        # Checking
+        npt.assert_allclose(dx_approx, dx_exact, atol=1e-4)
+
     def test_scipy_special(self):
         def f(x):
             return [polygamma(n=1, x=x[0]),

From 187e7f624fd2827650d99b9d7d1a403c785ccca5 Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Mon, 4 Sep 2023 18:19:25 -0400
Subject: [PATCH 30/53] notes on deriv issue I found

---
 delicatessen/derivative.py | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/delicatessen/derivative.py b/delicatessen/derivative.py
index 9efb971..d6bc729 100644
--- a/delicatessen/derivative.py
+++ b/delicatessen/derivative.py
@@ -1,3 +1,4 @@
+import numpy
 import numpy as np
 import scipy as sp
 
@@ -171,11 +172,33 @@ class PrimalTangentPairs:
     tangent :
         Indicator for the location at which the derivatives is desired. Must be the same length as ``primal``.
     """
+    # After 3 days of debugging, this line is extremely important. Without it, the `test_numpy_operator_override`
+    #   test will fail. This issue that this single line solves is that it lowers the priority of the NumPy operators,
+    #   so that my __radd__ (or other reversed operators) will take precedence over numpy.ndarray.__add__
+    #   This is extremely important when trying to mix operators together that start with a numpy.ndarray, as otherwise
+    #   everything will default to a element-wise operation. However, we want it to stay as arrays up to that point.
+    # __array_priority__ = 2
+    # __array_ufunc__ = None
+    # Okay, neither of these are the solutions I want. My solution needs to be preventing arrays making it into .primal
+    #   in the first place...
+    # The issue remains that sometimes the operation order gives a NumPy array as the second object. This then defaults
+    #   to an array operation. While setting the priority above gets rid of this issue, it opens a new issue.
+    #   Specifically, the np.ones(...) trick then breaks when used in things like ee_gformula. So, setting the priority
+    #   fixes the issue but introduces a new one.
+    # The current solution is to raise an error any time we see a ndarray sneak into the primal object. I have not
+    #   figured out how to process this to avoid the objects stacking themselves. Right now, this can be coded around
+    #   though via ensuring that the ndarray comes first in more complicated operations. This is because ndarray goes
+    #   to use an element-wise procedure which gets us out of the whole array issue.
+
     def __init__(self, primal, tangent):
         # Processing of the inputs into the class, both initial and recursive calls
         if isinstance(primal, PrimalTangentPairs):    # If given a PrimalTangentPair input
+            if isinstance(primal.primal, np.ndarray):
+                raise ValueError("... order of operations issue... I am getting an array an input")
             self.primal = primal.primal               # ... extract the primal element from input
         else:                                         # Else
+            if isinstance(primal, np.ndarray):
+                raise ValueError("... order of operations issue... I am getting an array an input")
             self.primal = primal                      # ... directly save as new primal
         self.tangent = tangent                        # Store the tangent
 
@@ -459,5 +482,5 @@ def arctanh(self):
 
     def polygamma(self, n):
         # Polygamma function
-        return PrimalTangentPairs(sp.special.polygamma(n, self.primal),
+        return PrimalTangentPairs(float(sp.special.polygamma(n, self.primal)),
                                   self.tangent * sp.special.polygamma(n+1, self.primal))

From da1cc0e3fcae1c6bf7a2253b27036cd6be999e05 Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Mon, 4 Sep 2023 18:19:42 -0400
Subject: [PATCH 31/53] tests to demo, lead works tail doesnt

---
 tests/test_autodiff_deriv.py | 63 +++++++++++++++++++++++++++++-------
 1 file changed, 52 insertions(+), 11 deletions(-)

diff --git a/tests/test_autodiff_deriv.py b/tests/test_autodiff_deriv.py
index dece3a6..5d3ad05 100644
--- a/tests/test_autodiff_deriv.py
+++ b/tests/test_autodiff_deriv.py
@@ -258,33 +258,74 @@ def internal_sum(theta):
         # Checking
         npt.assert_allclose(dx_approx, dx_exact, atol=1e-4)
 
-    def test_some_other_issue(self):
+    def test_numpy_operator_lead(self):
         d = pd.DataFrame()
-        d['X'] = np.log([5, 10, 15, 20, 25])
-        d['Y'] = [118, 58, 5, 30, 58]
+        d['X'] = [1, 2, 3]
+        d['Y'] = [2, 4, 6]
         d['I'] = 1
         y = np.asarray(d['Y'])[:, None]
-        X = np.asarray(d[['I', 'X']])
+        X = np.asarray(d[['X', ]])
 
         def psi(theta):
             beta, alpha = theta[:-1], np.exp(theta[-1])
             beta = np.asarray(beta)[:, None]
-            pred_y = np.exp(np.dot(X, beta))
+            pred_y = np.dot(X, beta)
             ee_beta = ((y - pred_y) * X).T
 
-            # Another problematic transpose example
-            ee_alpha = (alpha - y).T
-            return np.vstack([ee_beta, ee_alpha])
+            # Order of operations issues that can happen
+            ee_alpha1 = ((y + alpha) * alpha).T
+            ee_alpha2 = ((y - alpha) * alpha).T
+            ee_alpha3 = ((-y + alpha) * alpha).T
+            ee_alpha4 = ((y * alpha) + alpha).T
+            ee_alpha5 = ((y / alpha) + alpha).T
+
+            return np.vstack([ee_beta,
+                              ee_alpha1, ee_alpha2, ee_alpha3, ee_alpha4, ee_alpha5
+                              ])
 
         def internal_sum(theta):
             return np.sum(psi(theta), axis=1)
 
         # Evaluating the derivatives at the points
-        dx_exact = auto_differentiation([5.503, -0.6019, 0.0166], internal_sum)
-        dx_approx = approx_fprime([5.503, -0.6019, 0.0166], internal_sum, epsilon=1e-9)
+        dx_approx = approx_fprime([1.503, 0.2], internal_sum, epsilon=1e-9)
+        dx_exact = auto_differentiation([1.503, 0.2], internal_sum)
 
         # Checking
-        npt.assert_allclose(dx_approx, dx_exact, atol=1e-4)
+        npt.assert_allclose(dx_approx, dx_exact, rtol=1e-4)
+
+    def test_numpy_operator_tail(self):
+        d = pd.DataFrame()
+        d['X'] = [1, 2, 3]
+        d['Y'] = [2, 4, 6]
+        d['I'] = 1
+        y = np.asarray(d['Y'])[:, None]
+        X = np.asarray(d[['X', ]])
+
+        def psi(theta):
+            alpha = np.exp(theta[0])
+            # ee_alpha1 = (np.asarray(d['Y']) + alpha)*alpha
+            ee_alpha2 = (np.asarray(d['Y']) * alpha) + alpha
+
+            ee_alpha1 = alpha * (np.asarray(d['Y']) + alpha)
+            # ee_alpha2 = alpha + (np.asarray(d['Y']) * alpha)
+            # TODO okay, a potential fix is to always reverse the operation order
+
+            # Attempted solutions
+            # ee_alpha1 = np.zeros(d['Y'].shape)*alpha + alpha*(np.asarray(d['Y']) + alpha) over-write does not work
+            # ee_alpha1 = (alpha ** (np.asarray(d['Y']) + alpha))**np.ones(d['Y'].shape)  over-write power does not work
+
+            return np.vstack([ee_alpha1,
+                              ee_alpha2])
+
+        def internal_sum(theta):
+            return np.sum(psi(theta), axis=1)
+
+        # Evaluating the derivatives at the points
+        dx_approx = approx_fprime([0.2, ], internal_sum, epsilon=1e-9)
+        dx_exact = auto_differentiation([0.2, ], internal_sum)
+
+        # Checking
+        npt.assert_allclose(dx_approx, dx_exact, rtol=1e-4)
 
     def test_scipy_special(self):
         def f(x):

From 7d2cc7afca1bb6914be44648fcedabeaf350333c Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Wed, 6 Sep 2023 15:29:18 -0400
Subject: [PATCH 32/53] updating autodiff to handle np.ndarray tailing

---
 delicatessen/derivative.py   |  41 ++++++++------
 tests/test_autodiff_deriv.py | 106 ++++++++++++++++++++++++++++-------
 2 files changed, 110 insertions(+), 37 deletions(-)

diff --git a/delicatessen/derivative.py b/delicatessen/derivative.py
index d6bc729..f115d25 100644
--- a/delicatessen/derivative.py
+++ b/delicatessen/derivative.py
@@ -172,23 +172,17 @@ class PrimalTangentPairs:
     tangent :
         Indicator for the location at which the derivatives is desired. Must be the same length as ``primal``.
     """
-    # After 3 days of debugging, this line is extremely important. Without it, the `test_numpy_operator_override`
-    #   test will fail. This issue that this single line solves is that it lowers the priority of the NumPy operators,
-    #   so that my __radd__ (or other reversed operators) will take precedence over numpy.ndarray.__add__
-    #   This is extremely important when trying to mix operators together that start with a numpy.ndarray, as otherwise
-    #   everything will default to a element-wise operation. However, we want it to stay as arrays up to that point.
+    # Some internal notes on handling ndarray's in this class (because there are some tricky issues). First, we do
+    #   not want to override the priority of the NumPy operators (in fact we rely on it to take precedence). This is
+    #   extremely important when trying to mix operators together that start with a numpy.ndarray, as we want
+    #   everything to go to element-wise operations. So, we must not do something like the following
     # __array_priority__ = 2
-    # __array_ufunc__ = None
-    # Okay, neither of these are the solutions I want. My solution needs to be preventing arrays making it into .primal
-    #   in the first place...
-    # The issue remains that sometimes the operation order gives a NumPy array as the second object. This then defaults
-    #   to an array operation. While setting the priority above gets rid of this issue, it opens a new issue.
-    #   Specifically, the np.ones(...) trick then breaks when used in things like ee_gformula. So, setting the priority
-    #   fixes the issue but introduces a new one.
-    # The current solution is to raise an error any time we see a ndarray sneak into the primal object. I have not
-    #   figured out how to process this to avoid the objects stacking themselves. Right now, this can be coded around
-    #   though via ensuring that the ndarray comes first in more complicated operations. This is because ndarray goes
-    #   to use an element-wise procedure which gets us out of the whole array issue.
+    # Instead, we have modified all the operations that include `other` to check if the input is a np.ndarray. If it is
+    #   (i.e., the np.ndarray is the second object in x [] y) then we manually reverse the operations so that the
+    #   np.ndarray leads (i.e., y [] x). This gets NumPy to decompose everything correctly to element-wise operations.
+    #   The downside is that this introduces floating point errors for powers, when it is being raised to a np.ndarray
+    #   power and everything is integers. This floating point error was determined to be acceptable to get everything
+    #   working for the automatic differentation.
 
     def __init__(self, primal, tangent):
         # Processing of the inputs into the class, both initial and recursive calls
@@ -296,6 +290,8 @@ def __add__(self, other):
         if isinstance(other, PrimalTangentPairs):                     # When adding PrimalTangentPairs
             return PrimalTangentPairs(self.primal + other.primal,     # ... add primals together
                                       self.tangent + other.tangent)   # ... and add tangents together
+        elif isinstance(other, np.ndarray):
+            return other + self
         else:                                                         # Otherwise
             return PrimalTangentPairs(self.primal + other,            # ... add the primal and other
                                       self.tangent)                   # ... tangent does not change by constants
@@ -309,6 +305,8 @@ def __sub__(self, other):
         if isinstance(other, PrimalTangentPairs):                     # When subtracting PrimalTangentPairs
             return PrimalTangentPairs(self.primal - other.primal,     # ... minus primals together
                                       self.tangent - other.tangent)   # ... and minus tangets together
+        elif isinstance(other, np.ndarray):
+            return -1*other + self
         else:                                                         # Otherwise
             return PrimalTangentPairs(self.primal - other,            # ... minus primal and other
                                       self.tangent)                   # ... tangent does not change by constants
@@ -323,6 +321,8 @@ def __mul__(self, other):
         if isinstance(other, PrimalTangentPairs):                     # When multiplying PrimalTangentPairs
             return PrimalTangentPairs(self.primal * other.primal,     # ... multiply primals
                                       self.tangent*other.primal + self.primal*other.tangent)
+        elif isinstance(other, np.ndarray):                           # When comparing with np.ndarray
+            return other * self                                       # ... reverse the order to make operations work
         else:                                                         # Otherwise
             return PrimalTangentPairs(self.primal * other,            # ... multiply primal and constant
                                       self.tangent * other)           # ... multiply primal and constant
@@ -337,6 +337,8 @@ def __truediv__(self, other):
         if isinstance(other, PrimalTangentPairs):                     # When dividing PrimalTangentPairs
             return PrimalTangentPairs(self.primal / other.primal,     # ... divide primals
                                       (self.tangent * other.primal - self.primal * other.tangent) / (other.primal ** 2))
+        elif isinstance(other, np.ndarray):                           # When comparing with np.ndarray
+            return (1/other) * self                                   # ... reverse the order to make operations work
         else:                                                         # Otherwise
             return PrimalTangentPairs(self.primal / other,            # ... divide primal and constant
                                       self.tangent / other)           # ... divide tangent and constant
@@ -353,6 +355,13 @@ def __pow__(self, other):
             gx = self.primal ** other.primal * np.log(self.primal) * other.tangent   # ... secondary piece of rule
             return PrimalTangentPairs(self.primal ** other.primal,                   # ... raise primal to primal
                                       fgx + gx)                                      # ... apply the power rule
+        elif isinstance(other, np.ndarray):
+            if self.primal >= 0:
+                return np.exp(other * np.log(self))
+            else:
+                return np.where(other % 2 == 1,
+                                -np.exp(other * np.log(-self)),
+                                np.exp(other * np.log(-self)))
         else:
             return PrimalTangentPairs(self.primal ** other,                          # Otherwise compute directly
                                       other * self.primal ** (other - 1) * self.tangent)
diff --git a/tests/test_autodiff_deriv.py b/tests/test_autodiff_deriv.py
index 5d3ad05..933a740 100644
--- a/tests/test_autodiff_deriv.py
+++ b/tests/test_autodiff_deriv.py
@@ -275,12 +275,14 @@ def psi(theta):
             # Order of operations issues that can happen
             ee_alpha1 = ((y + alpha) * alpha).T
             ee_alpha2 = ((y - alpha) * alpha).T
-            ee_alpha3 = ((-y + alpha) * alpha).T
+            ee_alpha3 = ((y + alpha) / alpha).T
             ee_alpha4 = ((y * alpha) + alpha).T
             ee_alpha5 = ((y / alpha) + alpha).T
+            ee_alpha6 = ((y ** alpha) + alpha).T
 
             return np.vstack([ee_beta,
-                              ee_alpha1, ee_alpha2, ee_alpha3, ee_alpha4, ee_alpha5
+                              ee_alpha1, ee_alpha2, ee_alpha3,
+                              ee_alpha4, ee_alpha5, ee_alpha6
                               ])
 
         def internal_sum(theta):
@@ -291,41 +293,43 @@ def internal_sum(theta):
         dx_exact = auto_differentiation([1.503, 0.2], internal_sum)
 
         # Checking
-        npt.assert_allclose(dx_approx, dx_exact, rtol=1e-4)
+        npt.assert_allclose(dx_approx, dx_exact, rtol=1e-6)
 
     def test_numpy_operator_tail(self):
         d = pd.DataFrame()
-        d['X'] = [1, 2, 3]
-        d['Y'] = [2, 4, 6]
+        d['Y'] = [2, -4, -3]
         d['I'] = 1
         y = np.asarray(d['Y'])[:, None]
-        X = np.asarray(d[['X', ]])
 
         def psi(theta):
-            alpha = np.exp(theta[0])
-            # ee_alpha1 = (np.asarray(d['Y']) + alpha)*alpha
-            ee_alpha2 = (np.asarray(d['Y']) * alpha) + alpha
-
-            ee_alpha1 = alpha * (np.asarray(d['Y']) + alpha)
-            # ee_alpha2 = alpha + (np.asarray(d['Y']) * alpha)
-            # TODO okay, a potential fix is to always reverse the operation order
+            alpha = np.log(theta[0])
+            ee_alpha1 = (alpha + (y * alpha)).T
+            ee_alpha2 = (alpha - (y * alpha)).T
+            ee_alpha3 = (alpha * (y + alpha)).T
+            ee_alpha4 = (alpha / (y + alpha)).T
+            ee_alpha5 = (alpha ** y).T
 
-            # Attempted solutions
-            # ee_alpha1 = np.zeros(d['Y'].shape)*alpha + alpha*(np.asarray(d['Y']) + alpha) over-write does not work
-            # ee_alpha1 = (alpha ** (np.asarray(d['Y']) + alpha))**np.ones(d['Y'].shape)  over-write power does not work
-
-            return np.vstack([ee_alpha1,
-                              ee_alpha2])
+            return np.vstack([ee_alpha1, ee_alpha2,
+                              ee_alpha3, ee_alpha4,
+                              ee_alpha5])
 
         def internal_sum(theta):
             return np.sum(psi(theta), axis=1)
 
-        # Evaluating the derivatives at the points
+        # Evaluating the derivatives at the point less than zero
         dx_approx = approx_fprime([0.2, ], internal_sum, epsilon=1e-9)
         dx_exact = auto_differentiation([0.2, ], internal_sum)
 
         # Checking
-        npt.assert_allclose(dx_approx, dx_exact, rtol=1e-4)
+        npt.assert_allclose(dx_approx, dx_exact, rtol=1e-6)
+
+        # Evaluating the derivatives at the point greater than zero
+        #   We do both, since it matters for the __pow__ check
+        dx_approx = approx_fprime([1.2, ], internal_sum, epsilon=1e-9)
+        dx_exact = auto_differentiation([1.2, ], internal_sum)
+
+        # Checking
+        npt.assert_allclose(dx_approx, dx_exact, rtol=1e-6)
 
     def test_scipy_special(self):
         def f(x):
@@ -367,6 +371,32 @@ def f(x):
         # Checking
         npt.assert_allclose(dx_approx, dx_exact, atol=1e-5)
 
+    def test_scipy_special_arrays(self):
+        # TODO this fails currently
+        d = pd.DataFrame()
+        d['Y'] = [2, 4, 6]
+        y = np.asarray(d['Y'])[:, None]
+
+        def f(x):
+            alpha = np.exp(x[0])
+            ee_alpha1 = (y + polygamma(n=1, x=alpha)).T
+            ee_alpha2 = (polygamma(n=2, x=y*alpha)).T
+            stack = np.vstack([ee_alpha1,
+                               ee_alpha2])
+            return np.sum(stack, axis=1)
+
+        # Points to Evaluate at
+        xinput = [0.5, ]
+
+        # Approximate Derivatives
+        dx_approx = approx_fprime(xinput, f, epsilon=1e-9)
+
+        # Evaluating the derivatives at the points
+        dx_exact = auto_differentiation(xinput, f)
+
+        # Checking
+        npt.assert_allclose(dx_approx, dx_exact, atol=1e-5)
+
     def test_compare_deli_utilities(self):
         # Defining the functions to check
         def f(x):
@@ -809,6 +839,40 @@ def psi(theta):
                             var_exact,
                             atol=5e-5)
 
+    def test_exact_bread_glm_lognb(self):
+        # TODO this still fails due to the issue with the polygamma operator for arrays
+        d = pd.DataFrame()
+        d['X'] = [1, -1, 0, 1, 2, 1, -2, -1, 0, 3, -3, 1, 1, -1, -1, -2, 2, 0, -1, 0]
+        d['Z'] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        d['Y'] = [0, 0, 0, 0, 0, 15, 15, 25, 25, 45, 0, 0, 0, 0, 15, 15, 15, 25, 25, 35]
+        d['I'] = 1
+
+        def psi(theta):
+            return ee_glm(theta, X=d[['I', 'X']], y=d['Y'],
+                          distribution='nb', link='log')
+
+        # Auto-differentation
+        mestr = MEstimator(psi, init=[0., 0., 1.])
+        mestr.estimate(solver='lm', deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr = MEstimator(psi, init=[0., 0., 1.])
+        mestr.estimate(solver='lm', deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=5e-5)
+
     def test_gcomputation(self):
         d = pd.DataFrame()
         d['W'] = [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,

From 657556511893b1e5356dd91ca417c88393ce0421 Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Wed, 6 Sep 2023 15:42:40 -0400
Subject: [PATCH 33/53] getting polygamma working with autodiff revisions

---
 delicatessen/utilities.py    | 33 ++++++++++++++++++---------------
 tests/test_autodiff_deriv.py |  2 --
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/delicatessen/utilities.py b/delicatessen/utilities.py
index 0c68e8b..f294e80 100644
--- a/delicatessen/utilities.py
+++ b/delicatessen/utilities.py
@@ -5,14 +5,16 @@
 from delicatessen.derivative import PrimalTangentPairs as PTPair
 
 
-def digamma(x):
-    """Digamma function. This is a wrapper function of ``scipy.special.digamma`` meant to enable automatic
+def polygamma(n, x):
+    """Polygamma functions. This is a wrapper function of ``scipy.special.polygamma`` meant to enable automatic
     differentation with ``delicatessen``. When the input is a ``PrimalTangentPairs`` object, then an internal function
-    that implements the digamma function is called. Otherwise, ``scipy.special.digamma`` is called for the input
+    that implements the polygamma function is called. Otherwise, ``scipy.special.polygamma`` is called for the input
     object.
 
     Parameters
     ----------
+    n : int
+        Order of the derivative of the digamma function
     x : int, float, ndarray
         Real valued input
 
@@ -21,22 +23,26 @@ def digamma(x):
     Return type depends on the input type (``PrimalTangentPairs`` will return ``PrimalTangentPairs``, otherwise will
     return ``ndarray).
     """
-    if isinstance(x, PTPair):
-        return x.polygamma(n=0)
+    if isinstance(x, np.ndarray):
+        storage = []
+        for xi in x:
+            pgnxi = polygamma(n=n, x=xi)
+            storage.append(pgnxi)
+        return np.array(storage)
+    elif isinstance(x, PTPair):
+        return x.polygamma(n=n)
     else:
-        return sp.special.polygamma(n=0, x=x)
+        return sp.special.polygamma(n=n, x=x)
 
 
-def polygamma(n, x):
-    """Polygamma functions. This is a wrapper function of ``scipy.special.polygamma`` meant to enable automatic
+def digamma(x):
+    """Digamma function. This is a wrapper function of ``scipy.special.digamma`` meant to enable automatic
     differentation with ``delicatessen``. When the input is a ``PrimalTangentPairs`` object, then an internal function
-    that implements the polygamma function is called. Otherwise, ``scipy.special.polygamma`` is called for the input
+    that implements the digamma function is called. Otherwise, ``scipy.special.digamma`` is called for the input
     object.
 
     Parameters
     ----------
-    n : int
-        Order of the derivative of the digamma function
     x : int, float, ndarray
         Real valued input
 
@@ -45,10 +51,7 @@ def polygamma(n, x):
     Return type depends on the input type (``PrimalTangentPairs`` will return ``PrimalTangentPairs``, otherwise will
     return ``ndarray).
     """
-    if isinstance(x, PTPair):
-        return x.polygamma(n=n)
-    else:
-        return sp.special.polygamma(n=n, x=x)
+    return polygamma(n=0, x=x)
 
 
 def logit(prob):
diff --git a/tests/test_autodiff_deriv.py b/tests/test_autodiff_deriv.py
index 933a740..81d3d7d 100644
--- a/tests/test_autodiff_deriv.py
+++ b/tests/test_autodiff_deriv.py
@@ -372,7 +372,6 @@ def f(x):
         npt.assert_allclose(dx_approx, dx_exact, atol=1e-5)
 
     def test_scipy_special_arrays(self):
-        # TODO this fails currently
         d = pd.DataFrame()
         d['Y'] = [2, 4, 6]
         y = np.asarray(d['Y'])[:, None]
@@ -840,7 +839,6 @@ def psi(theta):
                             atol=5e-5)
 
     def test_exact_bread_glm_lognb(self):
-        # TODO this still fails due to the issue with the polygamma operator for arrays
         d = pd.DataFrame()
         d['X'] = [1, -1, 0, 1, 2, 1, -2, -1, 0, 3, -3, 1, 1, -1, -1, -2, 2, 0, -1, 0]
         d['Z'] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

From 0ebe8fda47198edfd3674ff24ca9dfbc17e1bea9 Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Wed, 6 Sep 2023 15:42:54 -0400
Subject: [PATCH 34/53] importing custom special functions for regression

---
 delicatessen/estimating_equations/regression.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/delicatessen/estimating_equations/regression.py b/delicatessen/estimating_equations/regression.py
index c8e77bf..88c9d06 100644
--- a/delicatessen/estimating_equations/regression.py
+++ b/delicatessen/estimating_equations/regression.py
@@ -1,7 +1,7 @@
 import warnings
 import numpy as np
 from scipy.stats import norm, cauchy
-from scipy.special import digamma, polygamma
+from delicatessen.utilities import digamma, polygamma
 
 from delicatessen.utilities import (logit, inverse_logit, identity,
                                     robust_loss_functions,

From 0a2c48c54af8c9ac445187d692e6d823b89a42cb Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Wed, 6 Sep 2023 15:58:57 -0400
Subject: [PATCH 35/53] polygamma and digamma test functions

---
 delicatessen/utilities.py |  4 ++--
 tests/test_utilities.py   | 41 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/delicatessen/utilities.py b/delicatessen/utilities.py
index f294e80..a619df1 100644
--- a/delicatessen/utilities.py
+++ b/delicatessen/utilities.py
@@ -35,7 +35,7 @@ def polygamma(n, x):
         return sp.special.polygamma(n=n, x=x)
 
 
-def digamma(x):
+def digamma(z):
     """Digamma function. This is a wrapper function of ``scipy.special.digamma`` meant to enable automatic
     differentation with ``delicatessen``. When the input is a ``PrimalTangentPairs`` object, then an internal function
     that implements the digamma function is called. Otherwise, ``scipy.special.digamma`` is called for the input
@@ -51,7 +51,7 @@ def digamma(x):
     Return type depends on the input type (``PrimalTangentPairs`` will return ``PrimalTangentPairs``, otherwise will
     return ``ndarray).
     """
-    return polygamma(n=0, x=x)
+    return polygamma(n=0, x=z)
 
 
 def logit(prob):
diff --git a/tests/test_utilities.py b/tests/test_utilities.py
index 4a0c3b2..a295582 100644
--- a/tests/test_utilities.py
+++ b/tests/test_utilities.py
@@ -1,5 +1,6 @@
 import pytest
 import numpy as np
+import scipy as sp
 import numpy.testing as npt
 import pandas as pd
 import statsmodels.api as sm
@@ -9,6 +10,7 @@
 from delicatessen import MEstimator
 from delicatessen.estimating_equations import ee_regression
 from delicatessen.utilities import (identity, inverse_logit, logit,
+                                    polygamma, digamma,
                                     robust_loss_functions,
                                     regression_predictions,
                                     spline,
@@ -82,6 +84,45 @@ def test_logit_array(self):
 
         npt.assert_allclose(logit(prbs), odds)
 
+    def test_polygamma(self):
+        """Checks the polygamma wrapper function
+        """
+        y = np.array([-1, 0, 2, 3, 12, -58101, 5091244])
+
+        # Single input check
+        npt.assert_allclose(polygamma(n=1, x=y[0]),
+                            sp.special.polygamma(n=1, x=y[0]))
+        npt.assert_allclose(polygamma(n=3, x=y[-1]),
+                            sp.special.polygamma(n=3, x=y[-1]))
+
+        # Multiple input check
+        npt.assert_allclose(polygamma(n=2, x=y),
+                            sp.special.polygamma(n=2, x=y))
+        npt.assert_allclose(polygamma(n=4, x=y),
+                            sp.special.polygamma(n=4, x=y))
+
+        # Multiple input check into 2D array
+        y = y[:, None]
+        npt.assert_allclose(polygamma(n=1, x=y),
+                            sp.special.polygamma(n=1, x=y))
+        npt.assert_allclose(polygamma(n=4, x=y),
+                            sp.special.polygamma(n=4, x=y))
+
+    def test_digamma(self):
+        """Checks the digamma wrapper function
+        """
+        y = np.array([-1, 0, 2, 3, 12, -58101, 5091244])
+
+        # Single input check
+        v2 = sp.special.digamma(y[0])
+        npt.assert_allclose(digamma(y[0]), v2)
+        v2 = sp.special.digamma(y[-1])
+        npt.assert_allclose(digamma(y[-1]), v2)
+
+        # Multiple input check
+        v2 = sp.special.digamma(y)
+        npt.assert_allclose(digamma(z=y), v2)
+
     def test_inverse_logit_array(self):
         """Checks the inverse inverse logit transformation of an array
         """

From 6bfc5082a2214e44e9bfd2561d9b97ec47f832bd Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Thu, 7 Sep 2023 12:12:58 -0400
Subject: [PATCH 36/53] updating default maxiter for the root-finder

---
 delicatessen/mestimation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/delicatessen/mestimation.py b/delicatessen/mestimation.py
index 0123d03..7ac9fc1 100644
--- a/delicatessen/mestimation.py
+++ b/delicatessen/mestimation.py
@@ -175,7 +175,7 @@ def __init__(self, stacked_equations, init=None, subset=None):
         self.variance = None              # Covariance matrix for theta values (calculated later)
         self.asymptotic_variance = None   # Asymptotic covariance matrix for theta values (calculated later)
 
-    def estimate(self, solver='newton', maxiter=1000, tolerance=1e-9, deriv_method='approx', dx=1e-9, allow_pinv=True):
+    def estimate(self, solver='newton', maxiter=5000, tolerance=1e-9, deriv_method='approx', dx=1e-9, allow_pinv=True):
         """Function to carry out the point and variance estimation of theta. After this procedure, the point estimates
         (in ``theta``) and the covariance matrix (in ``variance``) can be extracted.
 
@@ -190,7 +190,7 @@ def estimate(self, solver='newton', maxiter=1000, tolerance=1e-9, deriv_method='
             return only the optimized values. Please review the provided example in the documentation for how to
             implement a custom root-finding algorithm.
         maxiter : int, optional
-            Maximum iterations to consider for the root finding procedure. Default is 1000 iterations. For complex
+            Maximum iterations to consider for the root finding procedure. Default is 5000 iterations. For complex
             estimating equations (without preceding optimization), this value may need to be increased. This argument
             is not used for user-specified solvers.
         tolerance : float, optional

From c3fdc154ae236019c97824bfb0691f6d2f617c96 Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Thu, 7 Sep 2023 12:37:05 -0400
Subject: [PATCH 37/53] updating default root-finder

---
 delicatessen/mestimation.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/delicatessen/mestimation.py b/delicatessen/mestimation.py
index 7ac9fc1..94f50fc 100644
--- a/delicatessen/mestimation.py
+++ b/delicatessen/mestimation.py
@@ -175,20 +175,20 @@ def __init__(self, stacked_equations, init=None, subset=None):
         self.variance = None              # Covariance matrix for theta values (calculated later)
         self.asymptotic_variance = None   # Asymptotic covariance matrix for theta values (calculated later)
 
-    def estimate(self, solver='newton', maxiter=5000, tolerance=1e-9, deriv_method='approx', dx=1e-9, allow_pinv=True):
+    def estimate(self, solver='lm', maxiter=5000, tolerance=1e-9, deriv_method='approx', dx=1e-9, allow_pinv=True):
         """Function to carry out the point and variance estimation of theta. After this procedure, the point estimates
         (in ``theta``) and the covariance matrix (in ``variance``) can be extracted.
 
         Parameters
         ----------
         solver : str, function, callable, optional
-            Method to use for the root-finding procedure. Default is the secant method (``scipy.optimize.newton``).
-            Other built-in option is the Levenberg-Marquardt algorithm (``scipy.optimize.root(method='lm')``), and
-            a modification of the Powell hybrid method (``scipy.optimize.root(method='hybr')``). Finally, any generic
-            root-finding algorithm can be used via a user-provided callable object. The function must
-            consist of two keyword arguments: ``stacked_equations``, and ``init``. Additionally, the function should
-            return only the optimized values. Please review the provided example in the documentation for how to
-            implement a custom root-finding algorithm.
+            Method to use for the root-finding procedure. Default is the Levenberg-Marquardt algorithm
+            (``scipy.optimize.root(method='lm')``). Other built-in option is the secant method
+            (``scipy.optimize.newton``), and a modification of the Powell hybrid method
+            (``scipy.optimize.root(method='hybr')``). Finally, any generic root-finding algorithm can be used via a
+            user-provided callable object. The function must consist of two keyword arguments: ``stacked_equations``,
+            and ``init``. Additionally, the function should return only the optimized values. Please review the
+            provided example in the documentation for how to implement a custom root-finding algorithm.
         maxiter : int, optional
             Maximum iterations to consider for the root finding procedure. Default is 5000 iterations. For complex
             estimating equations (without preceding optimization), this value may need to be increased. This argument

From 022d2e74a0ad7f78282a92b7d58bffc721ec42d4 Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Thu, 7 Sep 2023 12:37:19 -0400
Subject: [PATCH 38/53] Adding a Basics section to the docs

---
 docs/Basics.rst | 54 +++++++++++++++++++++++++++++++++++++++++++++++++
 docs/index.rst  |  1 +
 2 files changed, 55 insertions(+)
 create mode 100644 docs/Basics.rst

diff --git a/docs/Basics.rst b/docs/Basics.rst
new file mode 100644
index 0000000..fcd2dd0
--- /dev/null
+++ b/docs/Basics.rst
@@ -0,0 +1,54 @@
+Basics
+=====================================
+
+Here, the basics of M-estimator will be reviewed. An M-estimator, :math:`\hat{\theta}`, is defined as the solution to
+the estimating equation
+
+.. math::
+
+    \sum_{i=1}^{n} \psi(O_i, \hat{\theta}) = 0
+
+
+where :math:`\psi` is a known :math:`v \times 1`-dimension estimating function, :math:`O_i` indicates the observed unit
+:math:`i \in \{1,...,n\}`, and the parameters are the vector :math:`\theta = (\theta_1, \theta_2, ..., \theta_v)`. Note
+that :math:`v` is finite-dimensional and the number of parameters matches the dimension of the estimating functions.
+
+Point Estimation
+-------------------------------
+To implement the point estimation of :math:`\theta`, we use a *root-finding* algorithm. Root-finding algorithms are
+procedures for finding the zeroes (i.e., roots) of an equation. This is accomplished in ``delicatessen`` by using
+SciPy's root-finding algorithms.
+
+Variance Estimation
+-------------------------------
+To estimate the variance for :math:`\theta`, the M-estimator uses the empirical sandwich variance estimator:
+
+.. math::
+
+    V_n(O,\hat{\theta}) = B_n(O,\hat{\theta})^{-1} F_n(O,\hat{\theta}) \left(B_n(O,\hat{\theta})^{-1}\right)^T
+
+where the 'bread' is
+
+.. math::
+
+    B_n(O,\hat{\theta}) = n^{-1} \sum_{i=1}^n - \psi'(O_i, \hat{\theta})
+
+where the :math:`\psi'` indicates the partial derivative, and the 'filling' is
+
+.. math::
+
+    F_n(O, \hat{\theta}) = n^{-1} \sum_{i=1}^n \psi(O_i, \hat{\theta}) \psi(O_i, \hat{\theta})^T
+
+The sandwich variance requires finding the derivative of the estimating functions and some matrix algebra. Again, we
+can get the computer to complete all these calculations for us. For the derivative, ``delicatessen`` offers two
+options. First, the derivatives can be numerically approximated using the central difference method. This is done using
+SciPy's ``approx_fprime`` functionality. As of ``v2.0``, the derivatives can also be computed using forward-mode
+automatic differentiation. This approach provides the exact derivative (as opposed to an approximation). This is
+implemented by-hand in ``delicatessen`` via operator overloading. Finally, we use forward-mode because there is no
+time advantage of backward-mode because the Jacobian is square.
+
+After computing the derivatives, the filling is computed via a dot product. The bread is then inverted using NumPy.
+Finally, the bread and filling matrices are combined via dot products.
+
+This introduction has all been a little abstract. In the following Examples section, we will see how M-estimators can
+be used to address specific estimation problems.
diff --git a/docs/index.rst b/docs/index.rst
index ea01c17..95b8259 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -79,6 +79,7 @@ Contents:
 .. toctree::
   :maxdepth: 3
 
+  Basics.rst
   Examples.rst
   Built-in Equations <Built-in Equations.rst>
   Custom Equations <Custom Equations.rst>

From 85b413fb0017e570560c3be2bd698b0c1354442c Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Thu, 7 Sep 2023 14:20:08 -0400
Subject: [PATCH 39/53] updating np.nan recommendations for autodiff

---
 docs/Custom Equations.rst | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/docs/Custom Equations.rst b/docs/Custom Equations.rst
index 29c199d..1d4562d 100644
--- a/docs/Custom Equations.rst	
+++ b/docs/Custom Equations.rst	
@@ -164,13 +164,13 @@ Handling ``np.nan``
 -------------------------------------
 
 Sometimes, ``np.nan`` will be necessary to include in your data set. However, ``delicatessen`` does not naturally
-handle ``np.nan``. In fact, ``delicatessen`` will return an error when there are ``np.nan``'s present (this is by
+handle ``np.nan``. In fact, ``delicatessen`` will return an error when there are ``np.nan``'s detected (this is by
 design). The following discusses how ``np.nan`` can be handled appropriately in the estimating equations.
 
 In the first case, we will consider handling ``np.nan`` with a built-in estimating equation. When trying to fit a
-regression model where there are ``np.nan``'s present, the estimating equation missing values must be manually set to
-zero. This can be done via the ``numpy.nan_to_num`` function. Below is an example using the built-in logistic
-regression estimating equations:
+regression model where there are ``np.nan``'s present, the missing values will be set to a placeholder value and their
+contributions will be manually removed using an indicator function for missingness. Below is an example using the
+built-in logistic regression estimating equations:
 
 .. code::
 
@@ -187,25 +187,29 @@ regression estimating equations:
 
     X = np.asarray(d[['C', 'X']])
     y = np.asarray(d['y'])
+    y_no_nan = np.asarray(d['y'].fillna(0.5))
+    r = np.where(d['Y'].isna(), 0, 1)
 
 
     def psi(theta):
-        # Estimating logistic model values
+        # Estimating logistic model values with filled-in Y's
         a_model = ee_logistic_regression(theta,
-                                         X=X, y=y)
-        # Setting
-        a_model = np.nan_to_num(a_model, copy=False, nan=0.)
+                                         X=X, y=y_no_nan)
+        # Setting contributions with missing to zero manually
+        a_model = a_model * r
         return a_model
 
 
     mest = MEstimator(psi, init=[0, 0, ])
     mest.estimate()
 
-If the ``numpy.nan_to_num`` function had not been included, the optimized points would have been ``nan``.
+If the contribution to the estimating function with missing Y's had not been included, the optimized points would have
+been ``nan``. Alternatively, had ``numpy.nan_to_num`` been used with ``deriv_method='exact'``, this would have also
+resulted in an error (``numpy.nan_to_num`` is okay to use with ``deriv_method='approx'``).
 
 As a second example, we will consider estimating the mean with missing data and correcting for informative missing
 by inverse probability weighting. To reduce random error, this example uses 10,000 observations. Here, we must set
-nan's to be zero's prior to subtracting off the mean. This is shown below:
+``nan``'s to be zero's prior to subtracting off the mean. This is shown below:
 
 .. code::
 
@@ -269,6 +273,9 @@ Here is a list of common mistakes, most of which I have done myself.
    bread and filling), so do not sum over n in ``psi``!
 4. The ``theta`` values and ``b`` *must* be in the same order. If ``theta[0]`` is the mean, the 1st row of the returned
    array better be the estimating function for that mean!
+5. Automatic differentiation with ``np.nan_to_num``. This will result in the bread matrix having `nan` values.
+6. Trying to use a SciPy function with automatic differentation (only some functionalities are supported. please open
+   an issue on GitHub if you have one you would like to see added).
 
 If you still have trouble, please open an issue at
 `pzivich/Delicatessen <https://github.com/pzivich/Delicatessen/issues>`_. This will help me to add other common

From c2307318bc3336997cf18e1f580cecf437498832 Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Thu, 7 Sep 2023 14:50:22 -0400
Subject: [PATCH 40/53] enabling probit and cauchy glm with autodiff

---
 .../estimating_equations/regression.py        | 18 +++---
 delicatessen/utilities.py                     | 60 ++++++++++++++++++-
 2 files changed, 69 insertions(+), 9 deletions(-)

diff --git a/delicatessen/estimating_equations/regression.py b/delicatessen/estimating_equations/regression.py
index 88c9d06..f675335 100644
--- a/delicatessen/estimating_equations/regression.py
+++ b/delicatessen/estimating_equations/regression.py
@@ -1,11 +1,11 @@
 import warnings
 import numpy as np
 from scipy.stats import norm, cauchy
-from delicatessen.utilities import digamma, polygamma
 
 from delicatessen.utilities import (logit, inverse_logit, identity,
                                     robust_loss_functions,
-                                    additive_design_matrix)
+                                    additive_design_matrix,
+                                    digamma, polygamma, standard_normal_cdf, standard_normal_pdf)
 from delicatessen.estimating_equations.processing import generate_weights
 
 #################################################################
@@ -76,7 +76,7 @@ def ee_regression(theta, X, y, model, weights=None, offset=None):
     >>> data['Z'] = np.random.normal(size=n)
     >>> data['Y1'] = 0.5 + 2*data['X'] - 1*data['Z'] + np.random.normal(loc=0, size=n)
     >>> data['Y2'] = np.random.binomial(n=1, p=logistic.cdf(0.5 + 2*data['X'] - 1*data['Z']), size=n)
-    >>> data['Y3'] = np.random.poisson(np.exp(lam=0.5 + 2*data['X'] - 1*data['Z']), size=n)
+    >>> data['Y3'] = np.random.poisson(lam=np.exp(0.5 + 2*data['X'] - 1*data['Z']), size=n)
     >>> data['C'] = 1
 
     Note that ``C`` here is set to all 1's. This will be the intercept in the regression.
@@ -1351,11 +1351,15 @@ def _inverse_link_(betax, link):
         py = 1 - np.exp(-1*np.exp(betax))       # Inverse link
         dpy = np.exp(betax - np.exp(betax))     # Derivative of inverse link
     elif link == 'probit':
-        py = norm.cdf(betax)                    # Inverse link
-        dpy = norm.pdf(betax)                   # Derivative of inverse link
+        # py = norm.cdf(betax)                  # Inverse link
+        # dpy = norm.pdf(betax)                 # Derivative of inverse link
+        py = standard_normal_cdf(x=betax)       # Inverse link
+        dpy = standard_normal_pdf(x=betax)      # Derivative of inverse link
     elif link in ['cauchit', 'cauchy']:
-        py = cauchy.cdf(betax)                  # Inverse link
-        dpy = cauchy.pdf(betax)                 # Derivative of inverse link
+        # py = cauchy.cdf(betax)                # Inverse link
+        # dpy = cauchy.pdf(betax)               # Derivative of inverse link
+        py = (1/np.pi)*np.arctan(betax) + 0.5   # Inverse link
+        dpy = 1 / (np.pi*(1 + betax**2))        # Derivative of inverse link (by-hand)
     elif link in ['square_root', 'sqrt']:
         py = betax**2                           # Inverse link
         dpy = 2 * betax                         # Derivative of inverse link
diff --git a/delicatessen/utilities.py b/delicatessen/utilities.py
index a619df1..5a9e48f 100644
--- a/delicatessen/utilities.py
+++ b/delicatessen/utilities.py
@@ -21,7 +21,7 @@ def polygamma(n, x):
     Returns
     -------
     Return type depends on the input type (``PrimalTangentPairs`` will return ``PrimalTangentPairs``, otherwise will
-    return ``ndarray).
+    return ``ndarray``).
     """
     if isinstance(x, np.ndarray):
         storage = []
@@ -49,7 +49,7 @@ def digamma(z):
     Returns
     -------
     Return type depends on the input type (``PrimalTangentPairs`` will return ``PrimalTangentPairs``, otherwise will
-    return ``ndarray).
+    return ``ndarray``).
     """
     return polygamma(n=0, x=z)
 
@@ -104,6 +104,62 @@ def identity(value):
     return value
 
 
+def standard_normal_cdf(x):
+    """Cumulative distribution function for the standard normal distribution. This is a wrapper function of
+    ``scipy.stats.norm.cdf`` meant to enable automatic differentation with ``delicatessen``. When the input is a
+    ``PrimalTangentPairs`` object, then an internal function that implements the CDF function is called. Otherwise,
+    ``scipy.stats.norm.cdf`` is called for the input object.
+
+    Parameters
+    ----------
+    x : int, float, ndarray
+        Real valued input
+
+    Returns
+    -------
+    Return type depends on the input type (``PrimalTangentPairs`` will return ``PrimalTangentPairs``, otherwise will
+    return ``ndarray``).
+    """
+    if isinstance(x, np.ndarray):
+        storage = []
+        for xi in x:
+            normxi = standard_normal_cdf(x=xi)
+            storage.append(normxi)
+        return np.array(storage)
+    elif isinstance(x, PTPair):
+        return x.normal_cdf()
+    else:
+        return norm.cdf(x=x)
+
+
+def standard_normal_pdf(x):
+    """Probability density function for the standard normal distribution. This is a wrapper function of
+    ``scipy.stats.norm.pdf`` meant to enable automatic differentation with ``delicatessen``. When the input is a
+    ``PrimalTangentPairs`` object, then an internal function that implements the PDF function is called. Otherwise,
+    ``scipy.stats.norm.pdf`` is called for the input object.
+
+    Parameters
+    ----------
+    x : int, float, ndarray
+        Real valued input
+
+    Returns
+    -------
+    Return type depends on the input type (``PrimalTangentPairs`` will return ``PrimalTangentPairs``, otherwise will
+    return ``ndarray``).
+    """
+    if isinstance(x, np.ndarray):
+        storage = []
+        for xi in x:
+            normxi = standard_normal_pdf(x=xi)
+            storage.append(normxi)
+        return np.array(storage)
+    elif isinstance(x, PTPair):
+        return x.normal_pdf()
+    else:
+        return norm.pdf(x=x)
+
+
 def robust_loss_functions(residual, loss, k, a=None, b=None):
     r"""Loss functions for robust mean and robust regression estimating equations. This function is called internally
     for ``ee_mean_robust`` and ``ee_robust_regression``. This function can also be loaded, so user's can easily adapt

From 5825927e4c92f3540e8861ffe9e0d7f81e5c0eff Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Thu, 7 Sep 2023 14:50:56 -0400
Subject: [PATCH 41/53] removing unneeded hyperparameter call for glm

---
 tests/test_estimating_equations.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_estimating_equations.py b/tests/test_estimating_equations.py
index 98da116..08101c7 100644
--- a/tests/test_estimating_equations.py
+++ b/tests/test_estimating_equations.py
@@ -1472,8 +1472,7 @@ def test_glm_invnormal_identity(self):
 
         def psi(theta):
             return ee_glm(theta, X=d[['I', 'X', 'Z']], y=d['Y'],
-                          distribution='inverse_normal', link='identity',
-                          hyperparameter=1.5)
+                          distribution='inverse_normal', link='identity')
 
         mestr = MEstimator(psi, init=[2., 0., 0.])
         mestr.estimate(solver='lm', maxiter=5000)

From 9f03497f48b011421b7c71bbdd516367c1c14bd1 Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Thu, 7 Sep 2023 14:51:15 -0400
Subject: [PATCH 42/53] updating derivative class to handle norm

---
 delicatessen/derivative.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/delicatessen/derivative.py b/delicatessen/derivative.py
index f115d25..106283f 100644
--- a/delicatessen/derivative.py
+++ b/delicatessen/derivative.py
@@ -1,6 +1,7 @@
 import numpy
 import numpy as np
 import scipy as sp
+from scipy.stats import norm
 
 
 def auto_differentiation(xk, f):
@@ -493,3 +494,14 @@ def polygamma(self, n):
         # Polygamma function
         return PrimalTangentPairs(float(sp.special.polygamma(n, self.primal)),
                                   self.tangent * sp.special.polygamma(n+1, self.primal))
+
+    def normal_cdf(self):
+        # Cumulative Distribution Function (CDF) of the Normal Distribution
+        return PrimalTangentPairs(float(norm.cdf(self.primal)),
+                                  self.tangent * norm.pdf(self.primal))
+
+    def normal_pdf(self):
+        # Probability Density Function (PDF) of the Normal Distribution
+        dx_pdf = -(np.exp(-self.primal**2 / 2) * self.primal) / np.sqrt(2 * np.pi)
+        return PrimalTangentPairs(float(norm.pdf(self.primal)),
+                                  self.tangent * dx_pdf)

From c3c67a491d2db431e64dfe5ae11e2dd7d3bcdb9e Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Thu, 7 Sep 2023 16:25:26 -0400
Subject: [PATCH 43/53] a note to self about the power rule

---
 delicatessen/derivative.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/delicatessen/derivative.py b/delicatessen/derivative.py
index 106283f..c08c3bc 100644
--- a/delicatessen/derivative.py
+++ b/delicatessen/derivative.py
@@ -352,6 +352,9 @@ def __rtruediv__(self, other):
     def __pow__(self, other):
         # Power
         if isinstance(other, PrimalTangentPairs):                                    # For PrimalTangentPairs to powers
+            # I'm not sure about allowing this option. So it is disallowed for the time being
+            # if self.primal == 0:
+            #     return PrimalTangentPairs(0, self.tangent*0)
             fgx = other.primal * self.primal ** (other.primal - 1) * self.tangent    # ... initial piece of rule
             gx = self.primal ** other.primal * np.log(self.primal) * other.tangent   # ... secondary piece of rule
             return PrimalTangentPairs(self.primal ** other.primal,                   # ... raise primal to primal

From c83e215b881a5f1c627082322709726876673151 Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Thu, 7 Sep 2023 16:25:53 -0400
Subject: [PATCH 44/53] fixing docs for dose-response 4pl

---
 delicatessen/estimating_equations/dose_response.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/delicatessen/estimating_equations/dose_response.py b/delicatessen/estimating_equations/dose_response.py
index adbe224..04e8536 100644
--- a/delicatessen/estimating_equations/dose_response.py
+++ b/delicatessen/estimating_equations/dose_response.py
@@ -85,6 +85,7 @@ def ee_4p_logistic(theta, X, y):
     To summarize the recommendations, be sure to examine your data (e.g., scatterplot). This will help to determine the
     initial starting values for the root-finding procedure. Otherwise, you may come across a convergence error.
 
+
     >>> estr = MEstimator(psi, init=[np.min(resp_data),
     >>>                              (np.max(resp_data)+np.min(resp_data)) / 2,
     >>>                              (np.max(resp_data)+np.min(resp_data)) / 2,
@@ -120,18 +121,18 @@ def ee_4p_logistic(theta, X, y):
     # Generalized 4PL model function for y-hat
     fx = theta[0] + (theta[3] - theta[0]) / (1 + rho)
 
-    # Using a special implementatin of natural log here
+    # Using a special implementation of natural log here
     nested_log = np.log(X / theta[1],             # ... to avoid dose=0 issues only take log
                         where=0 < X)              # ... where dose>0 (otherwise puts zero in place)
 
     # Calculate the derivatives for the gradient
-    deriv = np.array((1 - 1/(1 + rho),                                         # Gradient for lower limit
-                     (theta[3]-theta[0])*theta[2]/theta[1]*rho/(1+rho)**2,     # Gradient for steepness
-                     (theta[3] - theta[0]) * nested_log * rho / (1 + rho)**2,  # Gradient for ED50
-                     1 / (1 + rho)), )                                         # Gradient for upper limit
+    deriv = np.array((1 - 1/(1 + rho),                                          # Gradient for lower limit
+                     (theta[3] - theta[0])*theta[2]/theta[1]*rho/(1 + rho)**2,  # Gradient for steepness
+                     (theta[3] - theta[0])*nested_log*rho/(1 + rho)**2,         # Gradient for ED50
+                     1 / (1 + rho)), )                                          # Gradient for upper limit
 
     # Compute gradient and return for each i
-    return -2*(y-fx)*deriv
+    return -2*(y - fx)*deriv
 
 
 def ee_3p_logistic(theta, X, y, lower):

From 7ae636dd2ecaad6aeec7432a4db463ef4fb5674e Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Thu, 7 Sep 2023 16:26:11 -0400
Subject: [PATCH 45/53] adding autodiff tests for all built-in EE

---
 tests/test_autodiff_deriv.py | 931 ++++++++++++++++++++++++++++-------
 1 file changed, 747 insertions(+), 184 deletions(-)

diff --git a/tests/test_autodiff_deriv.py b/tests/test_autodiff_deriv.py
index 81d3d7d..4014885 100644
--- a/tests/test_autodiff_deriv.py
+++ b/tests/test_autodiff_deriv.py
@@ -7,12 +7,16 @@
 from scipy.optimize import approx_fprime
 
 from delicatessen.derivative import auto_differentiation
-from delicatessen.utilities import inverse_logit, identity, digamma, polygamma
+from delicatessen.utilities import inverse_logit, identity, polygamma, standard_normal_cdf, standard_normal_pdf
 from delicatessen import MEstimator
+from delicatessen.data import load_inderjit
 from delicatessen.estimating_equations import (ee_mean_variance, ee_mean_robust,
-                                               ee_regression, ee_glm, ee_ridge_regression,
+                                               ee_regression, ee_glm, ee_robust_regression, ee_ridge_regression,
                                                ee_additive_regression,
-                                               ee_gformula, ee_gestimation_snmm)
+                                               ee_weibull_model, ee_aft_weibull,
+                                               ee_4p_logistic, ee_effective_dose_delta,
+                                               ee_gformula, ee_ipw, ee_ipw_msm, ee_aipw, ee_gestimation_snmm,
+                                               ee_mean_sensitivity_analysis)
 
 np.random.seed(20230704)
 
@@ -297,7 +301,7 @@ def internal_sum(theta):
 
     def test_numpy_operator_tail(self):
         d = pd.DataFrame()
-        d['Y'] = [2, -4, -3]
+        d['Y'] = [2, -4, -3, 0]
         d['I'] = 1
         y = np.asarray(d['Y'])[:, None]
 
@@ -308,10 +312,11 @@ def psi(theta):
             ee_alpha3 = (alpha * (y + alpha)).T
             ee_alpha4 = (alpha / (y + alpha)).T
             ee_alpha5 = (alpha ** y).T
+            ee_alpha6 = np.where(alpha + (alpha*y) > 0, alpha*y, alpha**2 + y).T
 
             return np.vstack([ee_alpha1, ee_alpha2,
                               ee_alpha3, ee_alpha4,
-                              ee_alpha5])
+                              ee_alpha5, ee_alpha6])
 
         def internal_sum(theta):
             return np.sum(psi(theta), axis=1)
@@ -349,7 +354,7 @@ def f(x):
         dx_exact = auto_differentiation(xinput, f)
 
         # Checking
-        npt.assert_allclose(dx_true, np.diag(dx_exact), atol=1e-5)
+        npt.assert_allclose(dx_true, np.diag(dx_exact), atol=1e-7)
 
     def test_scipy_special_numapprox(self):
         def f(x):
@@ -357,6 +362,8 @@ def f(x):
                     polygamma(n=2, x=x[1]) + x[1]**2,
                     polygamma(n=3, x=x[2]*x[3] + x[1]),
                     polygamma(n=4, x=np.log(x[3] + x[1]) + x[0]**2) - x[3],
+                    standard_normal_cdf(x=x[1]),
+                    standard_normal_pdf(x=x[2]),
                     ]
 
         # Points to Evaluate at
@@ -459,6 +466,8 @@ def f(x):
 
 class TestSandwichAutoDiff:
 
+    # Basics
+
     def test_exact_bread_mean(self):
         # Data set
         y = np.array([5, 1, 2, 4, 2, 4, 5, 7, 11, 1, 6, 3, 4, 6])
@@ -495,14 +504,10 @@ def psi(theta):
         var_approx = mestr.variance
 
         # Checking bread estimates
-        npt.assert_allclose(bread_approx,
-                            bread_exact,
-                            atol=1e-6)
+        npt.assert_allclose(bread_approx, bread_exact, atol=1e-6)
 
         # Checking variance estimates
-        npt.assert_allclose(var_approx,
-                            var_exact,
-                            atol=1e-5)
+        npt.assert_allclose(var_approx, var_exact, atol=1e-5)
 
     def test_exact_bread_robust_mean(self):
         n = 500
@@ -527,14 +532,10 @@ def psi(theta):
         var_approx = mestr.variance
 
         # Checking bread estimates
-        npt.assert_allclose(bread_approx,
-                            bread_exact,
-                            atol=1e-6)
+        npt.assert_allclose(bread_approx, bread_exact, atol=1e-6)
 
         # Checking variance estimates
-        npt.assert_allclose(var_approx,
-                            var_exact,
-                            atol=1e-6)
+        npt.assert_allclose(var_approx, var_exact, atol=1e-6)
 
         # Andrew
         def psi(theta):
@@ -555,14 +556,10 @@ def psi(theta):
         var_approx = mestr.variance
 
         # Checking bread estimates
-        npt.assert_allclose(bread_approx,
-                            bread_exact,
-                            atol=1e-6)
+        npt.assert_allclose(bread_approx, bread_exact, atol=1e-6)
 
         # Checking variance estimates
-        npt.assert_allclose(var_approx,
-                            var_exact,
-                            atol=1e-6)
+        npt.assert_allclose(var_approx, var_exact, atol=1e-6)
 
         # Tukey
         def psi(theta):
@@ -583,14 +580,10 @@ def psi(theta):
         var_approx = mestr.variance
 
         # Checking bread estimates
-        npt.assert_allclose(bread_approx,
-                            bread_exact,
-                            atol=1e-6)
+        npt.assert_allclose(bread_approx, bread_exact, atol=1e-6)
 
         # Checking variance estimates
-        npt.assert_allclose(var_approx,
-                            var_exact,
-                            atol=1e-6)
+        npt.assert_allclose(var_approx, var_exact, atol=1e-6)
 
         # Hampel
         def psi(theta):
@@ -611,14 +604,12 @@ def psi(theta):
         var_approx = mestr.variance
 
         # Checking bread estimates
-        npt.assert_allclose(bread_approx,
-                            bread_exact,
-                            atol=1e-6)
+        npt.assert_allclose(bread_approx, bread_exact, atol=1e-6)
 
         # Checking variance estimates
-        npt.assert_allclose(var_approx,
-                            var_exact,
-                            atol=1e-6)
+        npt.assert_allclose(var_approx, var_exact, atol=1e-6)
+
+    # Regression
 
     def test_exact_bread_linear_reg(self):
         n = 500
@@ -647,14 +638,10 @@ def psi(theta):
         var_approx = mestr.variance
 
         # Checking bread estimates
-        npt.assert_allclose(bread_approx,
-                            bread_exact,
-                            atol=1e-6)
+        npt.assert_allclose(bread_approx, bread_exact, atol=1e-6)
 
         # Checking variance estimates
-        npt.assert_allclose(var_approx,
-                            var_exact,
-                            atol=1e-6)
+        npt.assert_allclose(var_approx, var_exact, atol=1e-6)
 
     def test_exact_bread_logit_reg(self):
         n = 500
@@ -683,29 +670,24 @@ def psi(theta):
         var_approx = mestr.variance
 
         # Checking bread estimates
-        npt.assert_allclose(bread_approx,
-                            bread_exact,
-                            atol=1e-7)
+        npt.assert_allclose(bread_approx, bread_exact, atol=1e-7)
 
         # Checking variance estimates
-        npt.assert_allclose(var_approx,
-                            var_exact,
-                            atol=1e-7)
+        npt.assert_allclose(var_approx, var_exact, atol=1e-7)
 
-    def test_exact_bread_linear_ridge(self):
+    def test_exact_bread_poisson_reg(self):
         n = 500
         data = pd.DataFrame()
         data['X'] = np.random.normal(size=n)
         data['Z'] = np.random.normal(size=n)
-        data['Y'] = 0.5 + 2*data['X'] - 1*data['Z'] + np.random.normal(size=n)
+        data['Y'] = np.random.poisson(lam=np.exp(0.5 + 2*data['X'] - 1*data['Z']), size=n)
         data['C'] = 1
         data['w'] = np.random.uniform(1, 10, size=n)
 
         def psi(theta):
-            return ee_ridge_regression(theta,
-                                       X=data[['C', 'X', 'Z']], y=data['Y'],
-                                       penalty=[0, 1, 2],
-                                       model='linear', weights=data['w'])
+            return ee_regression(theta,
+                                 X=data[['C', 'X', 'Z']], y=data['Y'],
+                                 model='poisson', weights=data['w'])
 
         mestr = MEstimator(psi, init=[0., 2., -1.])
 
@@ -720,31 +702,23 @@ def psi(theta):
         var_approx = mestr.variance
 
         # Checking bread estimates
-        npt.assert_allclose(bread_approx,
-                            bread_exact,
-                            atol=1e-6)
+        npt.assert_allclose(bread_approx, bread_exact, atol=1e-3)
 
         # Checking variance estimates
-        npt.assert_allclose(var_approx,
-                            var_exact,
-                            atol=1e-6)
+        npt.assert_allclose(var_approx, var_exact, atol=1e-7)
 
-    def test_exact_bread_logit_ridge(self):
-        n = 500
-        data = pd.DataFrame()
-        data['X'] = np.random.normal(size=n)
-        data['Z'] = np.random.normal(size=n)
-        data['Y'] = np.random.binomial(n=1, p=logistic.cdf(0.5 + 2*data['X'] - 1*data['Z']), size=n)
-        data['C'] = 1
-        data['w'] = np.random.uniform(1, 10, size=n)
+    def test_exact_bread_glm_normal(self):
+        d = pd.DataFrame()
+        d['X'] = [1, -1, 0, 1, 2, 1, -2, -1, 0, 3, -3, 1, 1, -1, -1, -2, 2, 0, -1, 0]
+        d['Z'] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        d['Y'] = [1, 5, 1, 9, -1, 4, 3, 3, 1, -2, 4, -2, 3, 6, 6, 8, 7, 1, -2, 5]
+        d['I'] = 1
 
         def psi(theta):
-            return ee_ridge_regression(theta,
-                                       X=data[['C', 'X', 'Z']], y=data['Y'],
-                                       penalty=[0, 1, 2],
-                                       model='logistic', weights=data['w'])
+            return ee_glm(theta, X=d[['I', 'X', 'Z']], y=d['Y'],
+                          distribution='normal', link='identity')
 
-        mestr = MEstimator(psi, init=[0., 2., -1.])
+        mestr = MEstimator(psi, init=[0., 0., 0.])
 
         # Auto-differentation
         mestr.estimate(solver='lm', deriv_method='exact')
@@ -757,210 +731,799 @@ def psi(theta):
         var_approx = mestr.variance
 
         # Checking bread estimates
-        npt.assert_allclose(bread_approx,
-                            bread_exact,
-                            atol=1e-7)
+        npt.assert_allclose(bread_approx, bread_exact, atol=1e-5)
 
         # Checking variance estimates
-        npt.assert_allclose(var_approx,
-                            var_exact,
-                            atol=1e-7)
+        npt.assert_allclose(var_approx, var_exact, atol=1e-6)
 
-    def test_exact_bread_logit_gam(self):
-        n = 1000
-        data = pd.DataFrame()
-        data['X'] = np.random.normal(size=n)
-        data['Z'] = np.random.normal(size=n)
-        data['Y'] = np.random.binomial(n=1, p=logistic.cdf(0.5 + 2 * data['X'] - 0.001*data['X']**2 - 1 * data['Z']),
-                                       size=n)
-        data['C'] = 1
-        Xvals = np.asarray(data[['C', 'X', 'Z']])
-        yvals = np.asarray(data['Y'])
-        spec = [None, {"knots": [-1, 0, 1], "penalty": 3}, {"knots": [-2, -1, 0, 1, 2], "penalty": 5}]
+    def test_exact_bread_glm_linbin(self):
+        d = pd.DataFrame()
+        d['X'] = [1, -1, 0, 1, 2, 1, -2, -1, 0, 3, -3, 1, 1, -1, -1, -2, 2, 0, -1, 0]
+        d['Z'] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        d['Y'] = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0]
+        d['I'] = 1
 
-        def psi_regression(theta):
-            return ee_additive_regression(theta,
-                                          X=Xvals, y=yvals,
-                                          model='logistic',
-                                          specifications=spec)
+        def psi(theta):
+            return ee_glm(theta, X=d[['I', 'X', 'Z']], y=d['Y'],
+                          link='identity', distribution='binomial')
+
+        mestr = MEstimator(psi, init=[0.2, 0., 0.])
 
         # Auto-differentation
-        mestr = MEstimator(psi_regression, init=[0., 2., 0., 0., 1., 0., 0., 0., 0.])
         mestr.estimate(solver='lm', deriv_method='exact')
         bread_exact = mestr.bread
         var_exact = mestr.variance
 
         # Central difference method
-        mestr = MEstimator(psi_regression, init=[0., 2., 0., 0., 1., 0., 0., 0., 0.])
         mestr.estimate(solver='lm', deriv_method='approx')
         bread_approx = mestr.bread
         var_approx = mestr.variance
 
         # Checking bread estimates
-        npt.assert_allclose(bread_approx,
-                            bread_exact,
-                            atol=1e-6)
+        npt.assert_allclose(bread_approx, bread_exact, atol=1e-5)
 
         # Checking variance estimates
-        npt.assert_allclose(var_approx,
-                            var_exact,
-                            atol=1e-5)
+        npt.assert_allclose(var_approx, var_exact, atol=1e-7)
 
-    def test_exact_bread_glm_loggamma(self):
+    def test_exact_bread_glm_loglog(self):
         d = pd.DataFrame()
-        d['X'] = np.log([5, 10, 15, 20, 30, 40, 60, 80, 100])
-        d['Y'] = [118, 58, 42, 35, 27, 25, 21, 19, 18]
+        d['X'] = [1, -1, 0, 1, 2, 1, -2, -1, 0, 3, -3, 1, 1, -1, -1, -2, 2, 0, -1, 0]
+        d['Z'] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        d['Y'] = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0]
         d['I'] = 1
 
         def psi(theta):
-            return ee_glm(theta, X=d[['I', 'X']], y=d['Y'],
-                          distribution='gamma', link='log')
+            return ee_glm(theta, X=d[['I', 'X', 'Z']], y=d['Y'],
+                          link='loglog', distribution='binomial')
+
+        mestr = MEstimator(psi, init=[0., 0., 0.])
 
         # Auto-differentation
-        mestr = MEstimator(psi, init=[0., 0., 1.])
         mestr.estimate(solver='lm', deriv_method='exact')
         bread_exact = mestr.bread
         var_exact = mestr.variance
 
         # Central difference method
-        mestr = MEstimator(psi, init=[0., 0., 1.])
         mestr.estimate(solver='lm', deriv_method='approx')
         bread_approx = mestr.bread
         var_approx = mestr.variance
 
         # Checking bread estimates
-        npt.assert_allclose(bread_approx,
-                            bread_exact,
-                            atol=1e-6)
+        npt.assert_allclose(bread_approx, bread_exact, atol=1e-5)
 
         # Checking variance estimates
-        npt.assert_allclose(var_approx,
-                            var_exact,
-                            atol=5e-5)
+        npt.assert_allclose(var_approx, var_exact, atol=1e-6)
 
-    def test_exact_bread_glm_lognb(self):
+    def test_exact_bread_glm_probit(self):
         d = pd.DataFrame()
         d['X'] = [1, -1, 0, 1, 2, 1, -2, -1, 0, 3, -3, 1, 1, -1, -1, -2, 2, 0, -1, 0]
         d['Z'] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-        d['Y'] = [0, 0, 0, 0, 0, 15, 15, 25, 25, 45, 0, 0, 0, 0, 15, 15, 15, 25, 25, 35]
+        d['Y'] = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0]
         d['I'] = 1
 
         def psi(theta):
-            return ee_glm(theta, X=d[['I', 'X']], y=d['Y'],
-                          distribution='nb', link='log')
+            return ee_glm(theta, X=d[['I', 'X', 'Z']], y=d['Y'],
+                          link='probit', distribution='binomial')
+
+        mestr = MEstimator(psi, init=[0., 0., 0.])
 
         # Auto-differentation
-        mestr = MEstimator(psi, init=[0., 0., 1.])
         mestr.estimate(solver='lm', deriv_method='exact')
         bread_exact = mestr.bread
         var_exact = mestr.variance
 
         # Central difference method
-        mestr = MEstimator(psi, init=[0., 0., 1.])
         mestr.estimate(solver='lm', deriv_method='approx')
         bread_approx = mestr.bread
         var_approx = mestr.variance
 
         # Checking bread estimates
-        npt.assert_allclose(bread_approx,
-                            bread_exact,
-                            atol=1e-6)
+        npt.assert_allclose(bread_approx, bread_exact, atol=1e-7)
 
         # Checking variance estimates
-        npt.assert_allclose(var_approx,
-                            var_exact,
-                            atol=5e-5)
+        npt.assert_allclose(var_approx, var_exact, atol=1e-7)
 
-    def test_gcomputation(self):
+    def test_exact_bread_glm_cauchy(self):
         d = pd.DataFrame()
-        d['W'] = [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,
-                  1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,
-                  1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]
-        d['V'] = [1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,
-                  1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,
-                  1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0]
-        d['A'] = [1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
-                  1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
-                  1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1]
-        d['Y'] = [3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5,
-                  3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5,
-                  3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5]
+        d['X'] = [1, -1, 0, 1, 2, 1, -2, -1, 0, 3, -3, 1, 1, -1, -1, -2, 2, 0, -1, 0]
+        d['Z'] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        d['Y'] = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0]
         d['I'] = 1
-        d['A1'] = 1
-        d['A0'] = 0
 
-        # M-estimator
         def psi(theta):
-            return ee_gformula(theta=theta,
-                               y=d['Y'],
-                               X=d[['I', 'A', 'V', 'W']],
-                               X1=d[['I', 'A1', 'V', 'W']],
-                               X0=d[['I', 'A0', 'V', 'W']])
+            return ee_glm(theta, X=d[['I', 'X', 'Z']], y=d['Y'],
+                          link='cauchy', distribution='binomial')
+
+        mestr = MEstimator(psi, init=[0., 0., 0.])
 
         # Auto-differentation
-        mestr = MEstimator(psi, init=[0., ] * 7)
         mestr.estimate(solver='lm', deriv_method='exact')
         bread_exact = mestr.bread
         var_exact = mestr.variance
 
         # Central difference method
-        mestr = MEstimator(psi, init=[0., ] * 7)
         mestr.estimate(solver='lm', deriv_method='approx')
         bread_approx = mestr.bread
         var_approx = mestr.variance
 
         # Checking bread estimates
-        npt.assert_allclose(bread_approx,
-                            bread_exact,
-                            atol=1e-6)
+        npt.assert_allclose(bread_approx, bread_exact, atol=1e-7)
 
         # Checking variance estimates
-        npt.assert_allclose(var_approx,
-                            var_exact,
-                            atol=5e-5)
+        npt.assert_allclose(var_approx, var_exact, atol=1e-7)
 
-    def test_gestimation_linear_snm(self):
+    def test_exact_bread_glm_poisson(self):
         d = pd.DataFrame()
-        d['W'] = [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,
-                  1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,
-                  1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]
-        d['V'] = [1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,
-                  1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,
-                  1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0]
-        d['A'] = [1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
-                  1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
-                  1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1]
-        d['Y'] = [3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5,
-                  3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5,
-                  3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5]
+        d['X'] = [1, -1, 0, 1, 2, 1, -2, -1, 0, 3, -3, 1, 1, -1, -1, -2, 2, 0, -1, 0]
+        d['Z'] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        d['Y'] = [1, 5, 1, 9, 1, 4, 3, 3, 1, 2, 4, 2, 3, 6, 6, 8, 7, 1, 2, 5]
         d['I'] = 1
 
-        # M-estimator
         def psi(theta):
-            return ee_gestimation_snmm(theta=theta,
-                                       y=d['Y'], A=d['A'],
-                                       W=d[['I', 'V', 'W']],
-                                       V=d[['I', 'V']],
-                                       model='linear')
+            return ee_glm(theta, X=d[['I', 'X', 'Z']], y=d['Y'],
+                          distribution='poisson', link='sqrt')
+
+        mestr = MEstimator(psi, init=[2., 0., 0.])
 
         # Auto-differentation
-        mestr = MEstimator(psi, init=[0., ] * 5)
         mestr.estimate(solver='lm', deriv_method='exact')
         bread_exact = mestr.bread
         var_exact = mestr.variance
 
         # Central difference method
-        mestr = MEstimator(psi, init=[0., ] * 5)
         mestr.estimate(solver='lm', deriv_method='approx')
         bread_approx = mestr.bread
         var_approx = mestr.variance
 
         # Checking bread estimates
-        npt.assert_allclose(bread_approx,
-                            bread_exact,
-                            atol=1e-6)
+        npt.assert_allclose(bread_approx, bread_exact, atol=1e-5)
 
         # Checking variance estimates
-        npt.assert_allclose(var_approx,
-                            var_exact,
-                            atol=5e-5)
+        npt.assert_allclose(var_approx, var_exact, atol=1e-6)
+
+    def test_exact_bread_glm_invnormal(self):
+        d = pd.DataFrame()
+        d['X'] = [1, -1, 0, 1, 2, 1, -2, -1, 0, 3, -3, 1, 1, -1, -1, -2, 2, 0, -1, 0]
+        d['Z'] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        d['Y'] = [1, 5, 1, 9, 1, 4, 3, 3, 1, 2, 4, 2, 3, 6, 6, 8, 7, 1, 2, 5]
+        d['I'] = 1
+
+        def psi(theta):
+            return ee_glm(theta, X=d[['I', 'X', 'Z']], y=d['Y'],
+                          distribution='inverse_normal', link='identity')
+
+        mestr = MEstimator(psi, init=[2., 0., 0.])
+
+        # Auto-differentation
+        mestr.estimate(solver='lm', deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr.estimate(solver='lm', deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx, bread_exact, atol=1e-7)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx, var_exact, atol=1e-7)
+
+    def test_exact_bread_glm_tweedie(self):
+        d = pd.DataFrame()
+        d['X'] = [1, -1, 0, 1, 2, 1, -2, -1, 0, 3, -3, 1, 1, -1, -1, -2, 2, 0, -1, 0]
+        d['Z'] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        d['Y'] = [1, 5, 1, 9, 1, 4, 3, 3, 1, 2, 4, 2, 3, 6, 6, 8, 7, 1, 2, 5]
+        d['I'] = 1
+
+        def psi(theta):
+            return ee_glm(theta, X=d[['I', 'X', 'Z']], y=d['Y'],
+                          distribution='tweedie', link='log',
+                          hyperparameter=1.5)
+
+        mestr = MEstimator(psi, init=[2., 0., 0.])
+
+        # Auto-differentation
+        mestr.estimate(solver='lm', deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr.estimate(solver='lm', deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx, bread_exact, atol=1e-7)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx, var_exact, atol=1e-7)
+
+    def test_exact_bread_glm_loggamma(self):
+        d = pd.DataFrame()
+        d['X'] = np.log([5, 10, 15, 20, 30, 40, 60, 80, 100])
+        d['Y'] = [118, 58, 42, 35, 27, 25, 21, 19, 18]
+        d['I'] = 1
+
+        def psi(theta):
+            return ee_glm(theta, X=d[['I', 'X']], y=d['Y'],
+                          distribution='gamma', link='log')
+
+        # Auto-differentation
+        mestr = MEstimator(psi, init=[0., 0., 1.])
+        mestr.estimate(solver='lm', deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr = MEstimator(psi, init=[0., 0., 1.])
+        mestr.estimate(solver='lm', deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx, bread_exact, atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx, var_exact, atol=5e-5)
+
+    def test_exact_bread_glm_lognb(self):
+        d = pd.DataFrame()
+        d['X'] = [1, -1, 0, 1, 2, 1, -2, -1, 0, 3, -3, 1, 1, -1, -1, -2, 2, 0, -1, 0]
+        d['Z'] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        d['Y'] = [0, 0, 0, 0, 0, 15, 15, 25, 25, 45, 0, 0, 0, 0, 15, 15, 15, 25, 25, 35]
+        d['I'] = 1
+
+        def psi(theta):
+            return ee_glm(theta, X=d[['I', 'X']], y=d['Y'],
+                          distribution='nb', link='log')
+
+        # Auto-differentation
+        mestr = MEstimator(psi, init=[0., 0., 1.])
+        mestr.estimate(solver='lm', deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr = MEstimator(psi, init=[0., 0., 1.])
+        mestr.estimate(solver='lm', deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx, bread_exact, atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx, var_exact, atol=1e-5)
+
+    def test_exact_bread_robust_regression(self):
+        d = pd.DataFrame()
+        d['X'] = [1, -1, 0, 1, 2, 1, -2, -1, 0, 3, -3, 1, 1, -1, -1, -2, 2, 0, -1, 0]
+        d['Z'] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        d['Y'] = [1, 5, 1, 25, -1, 4, 3, 3, 1, -2, 4, -2, 3, 6, 6, 8, 7, 1, -20, 5]
+        d['I'] = 1
+
+        def psi(theta):
+            return ee_robust_regression(theta, X=d[['I', 'X', 'Z']], y=d['Y'],
+                                        model='linear', k=5)
+
+        mestr = MEstimator(psi, init=[0., 0., 0.])
+
+        # Auto-differentation
+        mestr.estimate(solver='lm', deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr.estimate(solver='lm', deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx, bread_exact, atol=1e-5)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx, var_exact, atol=1e-6)
+
+    def test_exact_bread_linear_ridge(self):
+        n = 500
+        data = pd.DataFrame()
+        data['X'] = np.random.normal(size=n)
+        data['Z'] = np.random.normal(size=n)
+        data['Y'] = 0.5 + 2*data['X'] - 1*data['Z'] + np.random.normal(size=n)
+        data['C'] = 1
+        data['w'] = np.random.uniform(1, 10, size=n)
+
+        def psi(theta):
+            return ee_ridge_regression(theta,
+                                       X=data[['C', 'X', 'Z']], y=data['Y'],
+                                       penalty=[0, 1, 2],
+                                       model='linear', weights=data['w'])
+
+        mestr = MEstimator(psi, init=[0., 2., -1.])
+
+        # Auto-differentation
+        mestr.estimate(solver='lm', deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr.estimate(solver='lm', deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=1e-6)
+
+    def test_exact_bread_logit_ridge(self):
+        n = 500
+        data = pd.DataFrame()
+        data['X'] = np.random.normal(size=n)
+        data['Z'] = np.random.normal(size=n)
+        data['Y'] = np.random.binomial(n=1, p=logistic.cdf(0.5 + 2*data['X'] - 1*data['Z']), size=n)
+        data['C'] = 1
+        data['w'] = np.random.uniform(1, 10, size=n)
+
+        def psi(theta):
+            return ee_ridge_regression(theta,
+                                       X=data[['C', 'X', 'Z']], y=data['Y'],
+                                       penalty=[0, 1, 2],
+                                       model='logistic', weights=data['w'])
+
+        mestr = MEstimator(psi, init=[0., 2., -1.])
+
+        # Auto-differentation
+        mestr.estimate(solver='lm', deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr.estimate(solver='lm', deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-7)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=1e-7)
+
+    def test_exact_bread_logit_gam(self):
+        n = 1000
+        data = pd.DataFrame()
+        data['X'] = np.random.normal(size=n)
+        data['Z'] = np.random.normal(size=n)
+        data['Y'] = np.random.binomial(n=1, p=logistic.cdf(0.5 + 2 * data['X'] - 0.001*data['X']**2 - 1 * data['Z']),
+                                       size=n)
+        data['C'] = 1
+        Xvals = np.asarray(data[['C', 'X', 'Z']])
+        yvals = np.asarray(data['Y'])
+        spec = [None, {"knots": [-1, 0, 1], "penalty": 3}, {"knots": [-2, -1, 0, 1, 2], "penalty": 5}]
+
+        def psi_regression(theta):
+            return ee_additive_regression(theta,
+                                          X=Xvals, y=yvals,
+                                          model='logistic',
+                                          specifications=spec)
+
+        # Auto-differentation
+        mestr = MEstimator(psi_regression, init=[0., 2., 0., 0., 1., 0., 0., 0., 0.])
+        mestr.estimate(solver='lm', deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr = MEstimator(psi_regression, init=[0., 2., 0., 0., 1., 0., 0., 0., 0.])
+        mestr.estimate(solver='lm', deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=1e-5)
+
+    # Survival
+
+    def test_exact_bread_weibull(self):
+        times = np.array([1, 2, 3, 5, 2, 3, 4, 3, 1, 4])
+        events = np.array([1, 0, 0, 0, 1, 1, 1, 0, 0, 1])
+
+        def psi(theta):
+            return ee_weibull_model(theta=theta,
+                                    t=times, delta=events)
+
+        mestr = MEstimator(psi, init=[1., 1.])
+
+        # Auto-differentation
+        mestr.estimate(solver='lm', deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr.estimate(solver='lm', deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=1e-5)
+
+    def test_exact_bread_weibull_aft(self):
+        d = pd.DataFrame()
+        d['t'] = [1, 2, 3, 5, 2, 3, 4, 3, 1, 4]
+        d['d'] = [1, 0, 0, 0, 1, 1, 1, 0, 0, 1]
+        d['X'] = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
+        d['I'] = 1
+
+        def psi(theta):
+            return ee_aft_weibull(theta=theta,
+                                  t=d['t'], delta=d['d'], X=d[['X', ]])
+
+        mestr = MEstimator(psi, init=[1.387, 0.107, 0.911])
+
+        # Auto-differentation
+        mestr.estimate(solver='hybr', deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr.estimate(solver='hybr', deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-5)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=1e-5)
+
+    # Dose-Response
+
+    def test_exact_doseresp_4plogit(self):
+        d = load_inderjit()
+        dose_data = d[:, 1] + 1e-6
+        resp_data = d[:, 0]
+
+        def psi(theta):
+            pl4 = ee_4p_logistic(theta=theta, X=dose_data, y=resp_data)
+            ed20 = ee_effective_dose_delta(theta[4], y=resp_data, delta=0.20,
+                                           steepness=theta[2], ed50=theta[1],
+                                           lower=theta[0], upper=theta[3])
+
+            # Returning stacked estimating equations
+            return np.vstack([pl4, ed20])
+
+        mestr = MEstimator(psi, init=[0.48, 3.05, 2.98, 7.79, 1.8])
+
+        # Central difference method
+        mestr.estimate(solver='lm', deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Auto-differentation
+        mestr.estimate(solver='hybr', deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-5)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=1e-5)
+
+    # Causal
+
+    def test_exact_gcomputation(self):
+        d = pd.DataFrame()
+        d['W'] = [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,
+                  1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,
+                  1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]
+        d['V'] = [1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,
+                  1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,
+                  1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0]
+        d['A'] = [1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
+                  1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
+                  1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1]
+        d['Y'] = [3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5,
+                  3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5,
+                  3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5]
+        d['I'] = 1
+        d['A1'] = 1
+        d['A0'] = 0
+
+        # M-estimator
+        def psi(theta):
+            return ee_gformula(theta=theta,
+                               y=d['Y'],
+                               X=d[['I', 'A', 'V', 'W']],
+                               X1=d[['I', 'A1', 'V', 'W']],
+                               X0=d[['I', 'A0', 'V', 'W']])
+
+        mestr = MEstimator(psi, init=[0., ] * 7)
+
+        # Auto-differentation
+        mestr.estimate(solver='lm', deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr.estimate(solver='lm', deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=5e-5)
+
+    def test_exact_ipw(self):
+        d = pd.DataFrame()
+        d['W'] = [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,
+                  1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,
+                  1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]
+        d['V'] = [1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,
+                  1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,
+                  1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0]
+        d['A'] = [1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
+                  1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
+                  1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1]
+        d['Y'] = [3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5,
+                  3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5,
+                  3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5]
+        d['I'] = 1
+
+        # M-estimator
+        def psi(theta):
+            return ee_ipw(theta=theta, y=d['Y'], A=d['A'],
+                          W=d[['I', 'V', 'W']])
+
+        mestr = MEstimator(psi, init=[0., ] * 6)
+
+        # Auto-differentation
+        mestr.estimate(solver='lm', deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr.estimate(solver='lm', deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=5e-5)
+
+    def test_exact_ipw_msm(self):
+        d = pd.DataFrame()
+        d['W'] = [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,
+                  1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]
+        d['V'] = [1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,
+                  1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0]
+        d['A'] = [1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
+                  1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1]
+        d['Y'] = [3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5, 3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5,
+                  ] + [np.nan, ]*15
+        d['I'] = 1
+
+        # Setting up data
+        W = d[['I', 'V', 'W']]
+        X = d[['I', 'A']]
+        msm = d[['I', 'A']]
+        a = d['A']
+        y = d['Y'].fillna(-1)
+        r = np.where(d['Y'].isna(), 0, 1)
+
+        # M-estimation
+        def psi(theta):
+            # Separating parameters out
+            alpha = theta[:2 + W.shape[1]]  # MSM & PS
+            gamma = theta[2 + W.shape[1]:]  # Missing score
+
+            # Estimating equation for IPMW
+            ee_ms = ee_regression(theta=gamma, X=X, y=r, model='logistic')
+            pi_m = inverse_logit(np.dot(X, gamma))
+            ipmw = r / pi_m
+
+            # Estimating equations for MSM and PS
+            ee_msm = ee_ipw_msm(alpha, y=y, A=a, W=W, V=msm,
+                                link='log', distribution='poisson', weights=ipmw)
+            ee_msm = ee_msm * r
+            return np.vstack([ee_msm, ee_ms])
+
+        init_vals = [0., 0., ] + [0., 0., 0.] + [0., 0.]
+        mestr = MEstimator(psi, init=init_vals)
+
+        # Auto-differentation
+        mestr.estimate(solver='lm', deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr.estimate(solver='lm', deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=1e-5)
+
+    def test_exact_aipw(self):
+        d = pd.DataFrame()
+        d['W'] = [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,
+                  1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,
+                  1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]
+        d['V'] = [1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,
+                  1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,
+                  1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0]
+        d['A'] = [1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
+                  1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
+                  1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1]
+        d['Y'] = [3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5,
+                  3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5,
+                  3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5]
+        d['I'] = 1
+        d['A1'] = 1
+        d['A0'] = 0
+
+        def psi(theta):
+            return ee_aipw(theta, y=d['Y'], A=d['A'],
+                           W=d[['I', 'W']],
+                           X=d[['I', 'A', 'W']],
+                           X1=d[['I', 'A1', 'W']],
+                           X0=d[['I', 'A0', 'W']])
+
+        mestr = MEstimator(psi, init=[0., ]*8)
+
+        # Auto-differentation
+        mestr.estimate(solver='lm', deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr.estimate(solver='lm', deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=1e-5)
+
+    def test_exact_gestimation_snmm(self):
+        d = pd.DataFrame()
+        d['W'] = [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,
+                  1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3,
+                  1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]
+        d['V'] = [1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,
+                  1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,
+                  1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0]
+        d['A'] = [1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
+                  1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
+                  1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1]
+        d['Y'] = [3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5,
+                  3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5,
+                  3, 5, 1, 5, 2, 5, 2, 1, 4, 2, 3, 4, 2, 5, 5]
+        d['I'] = 1
+
+        # M-estimator
+        def psi(theta):
+            return ee_gestimation_snmm(theta=theta,
+                                       y=d['Y'], A=d['A'],
+                                       W=d[['I', 'V', 'W']],
+                                       V=d[['I', 'V']],
+                                       model='linear')
+
+        mestr = MEstimator(psi, init=[0., ] * 5)
+
+        # Auto-differentation
+        mestr.estimate(solver='lm', deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr.estimate(solver='lm', deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=1e-5)
+
+    def test_robins_sensitivity_mean(self):
+        d = pd.DataFrame()
+        d['I'] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+        d['X'] = [0, 1, 2, 0, 1, 2, 0, 1, 2, 0]
+        d['Y'] = [7, 2, 5, np.nan, 1, 4, 8, np.nan, 1, np.nan]
+        d['delta'] = np.where(d['Y'].isna(), 0, 1)
+
+        def q_function(y_vals, alpha):
+            y_no_miss = np.where(np.isnan(y_vals), 0, y_vals)
+            return alpha * y_no_miss
+
+        def psi(theta):
+            return ee_mean_sensitivity_analysis(theta=theta,
+                                                y=d['Y'], delta=d['delta'], X=d[['I', 'X']],
+                                                q_eval=q_function(d['Y'], alpha=0.5),
+                                                H_function=inverse_logit)
+
+        mestr = MEstimator(psi, init=[0., 0., 0.])
+
+        # Auto-differentation
+        mestr.estimate(solver='lm', deriv_method='exact')
+        bread_exact = mestr.bread
+        var_exact = mestr.variance
+
+        # Central difference method
+        mestr.estimate(solver='lm', deriv_method='approx')
+        bread_approx = mestr.bread
+        var_approx = mestr.variance
+
+        # Checking bread estimates
+        npt.assert_allclose(bread_approx,
+                            bread_exact,
+                            atol=1e-6)
+
+        # Checking variance estimates
+        npt.assert_allclose(var_approx,
+                            var_exact,
+                            atol=1e-6)

From 03b252280c0ffe59db74dadb4e2e3873b27d37ca Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Thu, 7 Sep 2023 16:28:39 -0400
Subject: [PATCH 46/53] tweaking precision of single unit test

---
 tests/test_autodiff_deriv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_autodiff_deriv.py b/tests/test_autodiff_deriv.py
index 4014885..c562f24 100644
--- a/tests/test_autodiff_deriv.py
+++ b/tests/test_autodiff_deriv.py
@@ -908,7 +908,7 @@ def psi(theta):
         npt.assert_allclose(bread_approx, bread_exact, atol=1e-7)
 
         # Checking variance estimates
-        npt.assert_allclose(var_approx, var_exact, atol=1e-7)
+        npt.assert_allclose(var_approx, var_exact, atol=1e-6)
 
     def test_exact_bread_glm_tweedie(self):
         d = pd.DataFrame()

From e091841e54ce4d9b86d8195d0756062db0f3fb25 Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Thu, 7 Sep 2023 16:46:03 -0400
Subject: [PATCH 47/53] updating dose-response to be easier to maintain

---
 .../estimating_equations/dose_response.py     | 42 ++++---------------
 1 file changed, 7 insertions(+), 35 deletions(-)

diff --git a/delicatessen/estimating_equations/dose_response.py b/delicatessen/estimating_equations/dose_response.py
index 04e8536..b482e86 100644
--- a/delicatessen/estimating_equations/dose_response.py
+++ b/delicatessen/estimating_equations/dose_response.py
@@ -122,8 +122,9 @@ def ee_4p_logistic(theta, X, y):
     fx = theta[0] + (theta[3] - theta[0]) / (1 + rho)
 
     # Using a special implementation of natural log here
-    nested_log = np.log(X / theta[1],             # ... to avoid dose=0 issues only take log
-                        where=0 < X)              # ... where dose>0 (otherwise puts zero in place)
+    # nested_log = np.log(X / theta[1],             # ... to avoid dose=0 issues only take log
+    #                     where=0 < X)              # ... where dose>0 (otherwise puts zero in place)
+    nested_log = np.where(X > 0, np.log(X / theta[1]), 0)  # Handling when dose = 0
 
     # Calculate the derivatives for the gradient
     deriv = np.array((1 - 1/(1 + rho),                                          # Gradient for lower limit
@@ -230,23 +231,8 @@ def ee_3p_logistic(theta, X, y, lower):
     Inderjit, Streibig JC, & Olofsdotter M. (2002). Joint action of phenolic acid mixtures and its significance in
     allelopathy research. *Physiologia Plantarum*, 114(3), 422-428.
     """
-    # Creating rho to cut down on typing
-    rho = (X / theta[0])**theta[1]
-
-    # Generalized 3PL model function for y-hat
-    fx = lower + (theta[2] - lower) / (1 + rho)
-
-    # Using a special implementation of natural log here
-    nested_log = np.log(X / theta[0],             # ... to avoid dose=0 issues only take log
-                        where=0 < X)              # ... where dose>0 (otherwise puts zero in place)
-
-    # Calculate the derivatives for the gradient
-    deriv = np.array(((theta[2]-lower)*theta[1]/theta[0]*rho/(1+rho)**2,     # Gradient for steepness
-                      (theta[2]-lower) * nested_log * rho / (1+rho)**2,      # Gradient for ED50
-                      1 / (1 + rho)), )                                      # Gradient for upper limit
-
-    # Compute gradient and return for each i
-    return -2*(y - fx)*deriv
+    ee_dr = ee_4p_logistic(theta=[lower, theta[0], theta[1], theta[2]], X=X, y=y)
+    return ee_dr[1:, :]
 
 
 def ee_2p_logistic(theta, X, y, lower, upper):
@@ -344,22 +330,8 @@ def ee_2p_logistic(theta, X, y, lower, upper):
     Inderjit, Streibig JC, & Olofsdotter M. (2002). Joint action of phenolic acid mixtures and its significance in
     allelopathy research. *Physiologia Plantarum*, 114(3), 422-428.
     """
-    # Creating rho to cut down on typing
-    rho = (X / theta[0])**theta[1]
-
-    # Generalized 3PL model function for y-hat
-    fx = lower + (upper - lower) / (1 + rho)
-
-    # Using a special implementatin of natural log here
-    nested_log = np.log(X / theta[0],             # ... to avoid dose=0 issues only take log
-                        where=0 < X)              # ... where dose>0 (otherwise puts zero in place)
-
-    # Calculate the derivatives for the gradient
-    deriv = np.array(((upper-lower)*theta[1]/theta[0]*rho/(1+rho)**2,     # Gradient for steepness
-                      (upper-lower) * nested_log * rho / (1+rho)**2), )   # Gradient for ED50
-
-    # Compute gradient and return for each i
-    return -2*(y-fx)*deriv
+    ee_dr = ee_4p_logistic(theta=[lower, theta[0], theta[1], upper], X=X, y=y)
+    return ee_dr[1:3, :]
 
 
 def ee_effective_dose_delta(theta, y, delta, steepness, ed50, lower, upper):

From 12ac321173890c1e709add9191442079d4e5a49a Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Thu, 7 Sep 2023 16:46:18 -0400
Subject: [PATCH 48/53] updating tolerance for cauchy test

---
 tests/test_autodiff_deriv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_autodiff_deriv.py b/tests/test_autodiff_deriv.py
index c562f24..9cfb84e 100644
--- a/tests/test_autodiff_deriv.py
+++ b/tests/test_autodiff_deriv.py
@@ -850,7 +850,7 @@ def psi(theta):
         npt.assert_allclose(bread_approx, bread_exact, atol=1e-7)
 
         # Checking variance estimates
-        npt.assert_allclose(var_approx, var_exact, atol=1e-7)
+        npt.assert_allclose(var_approx, var_exact, atol=1e-6)
 
     def test_exact_bread_glm_poisson(self):
         d = pd.DataFrame()

From d0e7c84b0762292420f5770877dcb03a89ba3ef6 Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Thu, 7 Sep 2023 16:46:43 -0400
Subject: [PATCH 49/53] Adding check for nan's in bread to give better error
 back

---
 delicatessen/mestimation.py |  5 +++++
 tests/test_MEstimation.py   | 15 ++++++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/delicatessen/mestimation.py b/delicatessen/mestimation.py
index 94f50fc..4480a94 100644
--- a/delicatessen/mestimation.py
+++ b/delicatessen/mestimation.py
@@ -301,6 +301,11 @@ def estimate(self, solver='lm', maxiter=5000, tolerance=1e-9, deriv_method='appr
         self.meat = np.dot(evald_theta, evald_theta.T) / self.n_obs                  # Meat is dot product of arrays
 
         # STEP 2.3: assembling the sandwich (variance)
+        if np.isnan(self.bread).any():
+            raise ValueError("The bread matrix contains at least one np.nan, so it cannot be inverted. This may be an "
+                             "issue with the provided estimating equations or the evaluated theta. The point estimates "
+                             "were " + str(self.theta) +
+                             ". If using automatic differentiation, try setting numerical approximation instead.")
         if allow_pinv:                                                               # Support 1D theta-hat
             bread_invert = np.linalg.pinv(self.bread)                                # ... find pseudo-inverse
         else:                                                                        # Support 1D theta-hat
diff --git a/tests/test_MEstimation.py b/tests/test_MEstimation.py
index 7bdceb2..2f4d610 100644
--- a/tests/test_MEstimation.py
+++ b/tests/test_MEstimation.py
@@ -9,7 +9,8 @@
 
 from delicatessen import MEstimator
 from delicatessen.utilities import inverse_logit
-from delicatessen.estimating_equations import ee_regression
+from delicatessen.estimating_equations import ee_regression, ee_4p_logistic
+from delicatessen.data import load_inderjit
 
 np.random.seed(236461)
 
@@ -138,6 +139,18 @@ def psi(theta):
         with pytest.raises(ValueError, match="A 2-dimensional array is expected"):
             mestimator.estimate()
 
+    def test_error_bread_of_nan(self):
+        d = load_inderjit()
+        dose_data = d[:, 1]
+        resp_data = d[:, 0]
+
+        def psi(theta):
+            return ee_4p_logistic(theta=theta, X=dose_data, y=resp_data)
+
+        mestr = MEstimator(psi, init=[0.48, 3.05, 2.98, 7.79])
+        with pytest.raises(ValueError, match="bread matrix contains at least one np.nan"):
+            mestr.estimate(solver='hybr', deriv_method='exact')
+
     def test_mean_variance_1eq(self):
         """Tests the mean / variance with a single estimating equation.
         """

From 537d08b1847624bfda62435825c7b4dfa855d922 Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Thu, 7 Sep 2023 16:46:58 -0400
Subject: [PATCH 50/53] autodiff will be v2.0 release

---
 delicatessen/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/delicatessen/version.py b/delicatessen/version.py
index e992399..f2dc0e4 100644
--- a/delicatessen/version.py
+++ b/delicatessen/version.py
@@ -1 +1 @@
-__version__ = "1.4"
+__version__ = "2.0"

From 449c5780d9ab486588f1813920d07adadd04e7fb Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Tue, 26 Sep 2023 12:09:08 -0400
Subject: [PATCH 51/53] Noting some caveats about autodiff

---
 docs/Basics.rst | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/docs/Basics.rst b/docs/Basics.rst
index fcd2dd0..a0ad7fe 100644
--- a/docs/Basics.rst
+++ b/docs/Basics.rst
@@ -47,6 +47,17 @@ automatic differentiation. This approach provides the exact derivative (as oppos
 implemented by-hand in ``delicatessen`` via operator overloading. Finally, we use forward-mode because there is no
 time advantage of backward-mode because the Jacobian is square.
 
+Automatic Differentiation Caveats
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+There are some caveats to the use of automatic differentiation. First, some NumPy functionalities are not fully
+supported. For example, ``np.log(x, where=0<x)`` will result in an error since there is an attempt to evaluate a
+log at zero internally. When using these specialty functions are necessary, it is better to use numerical approximation
+for differentiation. The second is regarding discontinuities. Consider the following function :math:`f(x) = x**2` if
+:math:`x \ge 1` and :math:`f(x) = 0` otherwise. Because of how automatic differentiation operates, the derivative at
+:math:`x=1` will result in :math:`2x` (this is the same behavior as other automatic differentiation software, like
+autograd).
+
 After computing the derivatives, the filling is computed via a dot product. The bread is then inverted using NumPy.
 Finally, the bread and filling matrices are combined via dot products.
 

From 32d845601b92e8d9fde39926a4911d2d520ea06a Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Tue, 26 Sep 2023 12:09:38 -0400
Subject: [PATCH 52/53] Changing np.nan in the bread to a warning

This allows a user to bootstrap if the bread fails to compute
---
 delicatessen/mestimation.py | 40 +++++++++++++++++++------------------
 tests/test_MEstimation.py   |  6 +++++-
 2 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/delicatessen/mestimation.py b/delicatessen/mestimation.py
index 4480a94..a5a7f07 100644
--- a/delicatessen/mestimation.py
+++ b/delicatessen/mestimation.py
@@ -1,3 +1,5 @@
+import warnings
+
 import numpy as np
 from scipy.optimize import newton, root
 from scipy.misc import derivative
@@ -302,20 +304,20 @@ def estimate(self, solver='lm', maxiter=5000, tolerance=1e-9, deriv_method='appr
 
         # STEP 2.3: assembling the sandwich (variance)
         if np.isnan(self.bread).any():
-            raise ValueError("The bread matrix contains at least one np.nan, so it cannot be inverted. This may be an "
-                             "issue with the provided estimating equations or the evaluated theta. The point estimates "
-                             "were " + str(self.theta) +
-                             ". If using automatic differentiation, try setting numerical approximation instead.")
-        if allow_pinv:                                                               # Support 1D theta-hat
-            bread_invert = np.linalg.pinv(self.bread)                                # ... find pseudo-inverse
-        else:                                                                        # Support 1D theta-hat
-            bread_invert = np.linalg.inv(self.bread)                                 # ... find inverse
-        # Two sets of matrix multiplication to get the sandwich variance
-        sandwich = np.dot(np.dot(bread_invert, self.meat), bread_invert.T)
-
-        # STEP 3: updating storage for results
-        self.asymptotic_variance = sandwich       # Asymptotic variance requires division by n (done above)
-        self.variance = sandwich / self.n_obs     # Variance estimate requires division by n^2 (second done here)
+            warnings.warn("The bread matrix contains at least one np.nan, so it cannot be inverted. The variance will "
+                          "not be calculated. This may be an issue with the provided estimating equations or the "
+                          "evaluated theta.",
+                          UserWarning)
+        else:
+            if allow_pinv:                                                               # Support 1D theta-hat
+                bread_invert = np.linalg.pinv(self.bread)                                # ... find pseudo-inverse
+            else:                                                                        # Support 1D theta-hat
+                bread_invert = np.linalg.inv(self.bread)                                 # ... find inverse
+            sandwich = np.dot(np.dot(bread_invert, self.meat), bread_invert.T)           # Compute sandwich
+
+            # STEP 3: updating storage for results
+            self.asymptotic_variance = sandwich       # Asymptotic variance requires division by n (done above)
+            self.variance = sandwich / self.n_obs     # Variance estimate requires division by n^2 (second done here)
 
     def confidence_intervals(self, alpha=0.05):
         r"""Calculate Wald-type :math:`(1 - \alpha) \times` 100% confidence intervals using the point estimates and
@@ -342,9 +344,9 @@ def confidence_intervals(self, alpha=0.05):
             intervals for :math:`\theta_b`
         """
         # Check that estimate() has been called
-        if self.theta is None:
-            raise ValueError("theta has not been estimated yet. "
-                             "estimate() must be called before confidence_intervals()")
+        if self.variance is None:
+            raise ValueError("Either theta has not been estimated yet, or there is a np.nan in the bread matrix. "
+                             "Therefore, confidence_intervals() cannot be called.")
         # Check valid alpha value is being provided
         if not 0 < alpha < 1:
             raise ValueError("`alpha` must be 0 < a < 1")
@@ -382,8 +384,8 @@ def z_scores(self, null=0):
         """
         # Check that self.estimate() has been called
         if self.theta is None:
-            raise ValueError("theta has not been estimated yet. "
-                             "estimate() must be called before confidence_intervals()")
+            raise ValueError("Either theta has not been estimated yet, or there is a np.nan in the bread matrix. "
+                             "Therefore, z_scores() cannot be called.")
 
         # Calculating Z-scores
         se = np.sqrt(np.diag(self.variance))       # Extract the standard error estimates from the sandwich
diff --git a/tests/test_MEstimation.py b/tests/test_MEstimation.py
index 2f4d610..bd39e26 100644
--- a/tests/test_MEstimation.py
+++ b/tests/test_MEstimation.py
@@ -148,9 +148,13 @@ def psi(theta):
             return ee_4p_logistic(theta=theta, X=dose_data, y=resp_data)
 
         mestr = MEstimator(psi, init=[0.48, 3.05, 2.98, 7.79])
-        with pytest.raises(ValueError, match="bread matrix contains at least one np.nan"):
+        with pytest.warns(UserWarning, match="bread matrix contains at least one np.nan"):
             mestr.estimate(solver='hybr', deriv_method='exact')
 
+        # Ensuring variance is None but point estimates still exist
+        assert mestr.theta is not None
+        assert mestr.variance is None
+
     def test_mean_variance_1eq(self):
         """Tests the mean / variance with a single estimating equation.
         """

From 099ac0974efd5334b3a6781b54efbe6aa7a8a62c Mon Sep 17 00:00:00 2001
From: Paul Zivich <32672909+pzivich@users.noreply.github.com>
Date: Thu, 28 Sep 2023 08:37:51 -0400
Subject: [PATCH 53/53] tweaking autodiff tests

---
 tests/{test_autodiff_deriv.py => test_autodiff.py} | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
 rename tests/{test_autodiff_deriv.py => test_autodiff.py} (99%)

diff --git a/tests/test_autodiff_deriv.py b/tests/test_autodiff.py
similarity index 99%
rename from tests/test_autodiff_deriv.py
rename to tests/test_autodiff.py
index 9cfb84e..2a5bb17 100644
--- a/tests/test_autodiff_deriv.py
+++ b/tests/test_autodiff.py
@@ -1039,7 +1039,8 @@ def psi(theta):
             return ee_ridge_regression(theta,
                                        X=data[['C', 'X', 'Z']], y=data['Y'],
                                        penalty=[0, 1, 2],
-                                       model='linear', weights=data['w'])
+                                       model='linear', weights=data['w'],
+                                       center=[0, 1, -2])
 
         mestr = MEstimator(psi, init=[0., 2., -1.])