In [1]:
from collections.abc import Iterable
import numpy as np
import itertools
import math
from tqdm import tqdm
from utils import all_keys, type_name, isnumber, isiterable, scale_dict, raise_operand_exception

Напишем класс FwdAAD (Forward-mode Automatic Analytical Differentiation)

In [2]:
class FwdAAD:
    all_names = set()
    def __init__(self, real: float | int, dual: dict):
        self.real = real
        self.dual = dual
        self.original_name = None

    def value(self) -> float | int:
        """
        Get value of the variable.

        :returns: variable's current value
        """
        return self.real

    def get_gradient(self) -> dict:
        """
        Get the first derivatives of the variable with respect to all its child variables.

        :returns: dict which maps child variable's symbol to the derivative of the function w.r.t. this variable
        """
        return self.dual

    def print_gradient(self, precision: int = 6) -> None:
        print(', '.join(['d/d{} = {:.{}f}'.format(key, d, precision) for key, d in sorted(list(self.dual.items()), key=lambda x: x[0] if len(x[0].split('_')) == 1 else int(x[0].split('_')[-1]))]))

    def get_variable(symbol: str, value: float | int = None, ignore_existent: bool = False):
        """
        Create variable symbol.

        :param symbol: unique name to indicate the variable
        :param value: value of the variable
        :param ignore_existent: do not raise an exception if variable with
        name `symbol` already exists
        :returns: FwdAAD  variable
        :raises ValueError: if variable with name `symbol` already exists and
        `ignore_existent` is False
        """
        if symbol not in FwdAAD.all_names or ignore_existent:
            FwdAAD.all_names.add(symbol)
            res = FwdAAD(value, {symbol: 1})
            res.original_name = symbol
            return res
        else:
            raise ValueError(f"Variable with name '{symbol}' already exists."
                             "Please provide a unique name.")
    
    def get_vector(symbol: str, values: Iterable = None, length: int = None, ignore_existent: bool = False) -> list:
        """
        Create a list of variable symbols.

        :param symbol: unique name to indicate the variables. Each variable will have a name in format 'symbol'_i
        :param values: values of the variables
        :param length: length of the vector (is required if `values = None`, otherwise ignored)
        :param ignore_existent: do not raise an exception if a variable with name `symbol` with index in range 
        `[1; len(values)]` already exists 
        :returns: FwdAAD  variables list
        :raises ValueError: if a variable with name `symbol` with index in range `[1; len(values)]` already exists and
        `ignore_existent` is False
        :raises ValueError: if neither `values` nor `length` arguments have been specified
        """
        if values is not None:
            return [FwdAAD.get_variable(symbol + '_' + str(idx + 1), x, ignore_existent) for idx, x in enumerate(values)]
        elif length is not None:
            return [FwdAAD.get_variable(symbol + '_' + str(idx + 1), None, ignore_existent) for idx in range(length)]
        else:
            raise ValueError("Please provide values or specify length of the vector.")

    def set_value(self, value: int | float) -> None:
        """
        Change value of the variable to `value`. Note that this deletes the
        computed gradient for the variable.

        :param value: value of the variables
        :raises ValueError: if `value` is not a number
        """
        if not isnumber(value):
            raise ValueError(f"Expected a number, got '{type_name(value)}'")
        self.real = value
        self.dual = {self.original_name: 1}

    def set_name(self, name: str) -> None:
        """
        Change the symbol of the variable to `name`. Note that this deletes the
        computed gradient for the variable.

        :param name: new name of the variable
        :raises ValueError: if `name` is not a str
        """
        if not isinstance(name, str):
            raise ValueError(f"Expected a 'str', got '{type_name(value)}'")
        derivative = self.dual[self.original_name]
        del self.dual[self.original_name] 
        FwdAAD.all_names.remove(self.original_name)
        self.original_name = name
        FwdAAD.all_names.add(name)
        self.dual[name] = derivative

    def set_vector_values(vector, values: Iterable[int | float]) -> None:
        if not isiterable(vector):
            raise ValueError(f"object '{type_name(vector)}' is not iterable")
        if not isiterable(values):
            raise ValueError(f"object '{type_name(values)}' is not iterable")
        if len(vector) != len(values):
            raise ValueError("Variables vector and values sizes mismatch")
        for var, val in zip(vector, values):
            if not isinstance(var, FwdAAD):
                raise ValueError(f"Expected '{FwdAAD.__name__}', got '{type_name(var)}'")
            var.set_value(val) 

    def __repr__(self):
        return str(self.real)

    def __add__(self, other):
        if isinstance(other, FwdAAD):
            real = self.real + other.real
            dual = {key: self.dual.get(key, 0) + other.dual.get(key, 0) for key in all_keys(self.dual, other.dual)}
            return FwdAAD(real, dual)
        elif isnumber(other):
            return FwdAAD(self.real + other, self.dual)
        else:
            raise_operand_exception(self, other, '+')

    def __pos__(self):
        return self

    def __neg__(self):
        return self * (-1)

    def __sub__(self, other):
        if not (isinstance(other, FwdAAD) or isnumber(other)):
            raise_operand_exception(self, other, '-')
        return self + (-other)

    def __mul__(self, other):
        if isinstance(other, FwdAAD):
            real = self.real * other.real
            dual = {key: self.dual.get(key, 0) * other.real + self.real * other.dual.get(key, 0) for key in all_keys(self.dual, other.dual)}
            return FwdAAD(real, dual)
        elif isnumber(other):
            return FwdAAD(self.real * other, scale_dict(self.dual, other))
        else:
            raise_operand_exception(self, other, '*')
    
    def _inverse(self):
        real2 = self.real * self.real
        dual = {key: -d / real2 for key, d in self.dual.items()}
        return FwdAAD(1 / self.real, dual)
    
    def __truediv__(self, other):
        if isinstance(other, FwdAAD):
            return self * other._inverse()
        elif isnumber(other):
            return self * (1 / other)
        else:
            raise_operand_exception(self, other, '/')
    
    def __pow__(self, other):
        if isinstance(other, FwdAAD):
            real = pow(self.real, other.real)
            # (f^g)' = f^{g-1} * (g*f' + f*ln(f)*g')
            real_x_log_real = self.real * math.log(self.real)
            pow_self_real_other_real_minus_one = pow(self.real, other.real - 1)
            dual = {key: pow_self_real_other_real_minus_one * (other.real * self.dual.get(key, 0) +  real_x_log_real * other.dual.get(key, 0)) for key in all_keys(self.dual, other.dual)}
        elif isnumber(other):
            real = pow(self.real, other)
            term = other * pow(self.real, other - 1)  # optimization
            dual = scale_dict(self.dual, term)
        else:
            raise_operand_exception(self, other, '** or pow()')
        return FwdAAD(real, dual)

    def __rpow__(self, other):
        if isnumber(other):
            real = pow(other, self.real)
            term = real * math.log(other)
            dual = {key: term * d for key, d in self.dual.items()}
            return FwdAAD(real, dual)
        else:
            raise_operand_exception(self, other, '** or pow()')

    __radd__ = __add__
    def __rsub__(self, other):
        return other + (-self)
    __rmul__ = __mul__
    def __rtruediv__(self, other):
        return other * self._inverse()

Пользоваться им нужно так:

In [3]:
x: FwdAAD = FwdAAD.get_variable('x', 2, ignore_existent=True)
y: FwdAAD = FwdAAD.get_variable('y', np.pi, ignore_existent=True)

def func(x, y):
    return x ** 3 - 2 * x ** 2 * y ** 2 + y ** 3

f: FwdAAD = func(x, y)
f.print_gradient(precision=10)

# if we want to change values and names:
x.set_value(1)
y.set_value(1)
x.set_name('y_0')
y.set_name('y_1')

f: FwdAAD = func(x, y)
f.print_gradient(precision=0)

d/dx = -66.9568352087, d/dy = -20.6566692542
d/dy_0 = -1, d/dy_1 = -1


Для удобства реализована функция `get_vector`, позволяющая сразу получить целый вектор из переменных, и `set_vector_values`, чтобы менять значения у вектора переменных. Замерим скорость для двух требуемых функций для всех сочетаний шести переменных. 

In [4]:
def f_1(x):
    return np.sum(np.power(x, 2))
def f_2(x):
    return np.sum(np.power(x, x))

In [5]:
NUM_VARIABLES = 11
NUM_ARGUMENTS = 6

x = FwdAAD.get_vector('X', length=NUM_VARIABLES)

FwdAAD.set_vector_values(x, np.linspace(0, 1, NUM_VARIABLES))
for arguments in tqdm(itertools.product(x, repeat=NUM_ARGUMENTS), total=NUM_VARIABLES ** NUM_ARGUMENTS):
    f_1(arguments)

FwdAAD.set_vector_values(x, np.linspace(1, 2, NUM_VARIABLES))
for arguments in tqdm(itertools.product(x, repeat=NUM_ARGUMENTS), total=NUM_VARIABLES ** NUM_ARGUMENTS):
    f_2(arguments)

  0%|          | 0/1771561 [00:00<?, ?it/s]

100%|██████████| 1771561/1771561 [02:37<00:00, 11268.21it/s]
100%|██████████| 1771561/1771561 [04:11<00:00, 7036.51it/s]


Как видно, вычисление градиента для функции
$$f_1(x_1, \dotsc, x_6) = \sum_{i=1}^6 x_i^{2}$$
работает значительно быстрее, чем для функции
$$f_2(x_1, \dotsc, x_6) = \sum_{i=1}^6 x_i^{x_i}$$
из-за большего количества операций в вычислении градиента $f_2$ (в т.ч. благодаря необходимости вычислять логарифм при каждом возведении в степень).