In [53]:
import autograd.numpy as np
from autograd.core import primitive
from autograd import grad, jacobian, hessian
from autograd.numpy.numpy_grads import unbroadcast
import scipy.stats

In [35]:
@primitive
def MySum(x):
    return 2 * np.sum(x)

def MySameSum(x):
    return 2 * np.sum(x)

def MySum_vjp(g, ans, vs, gvs, x):
    return np.full(x.shape, g) * np.full(x.shape, 2)

MySum.defvjp(MySum_vjp)


In [115]:
@primitive
def BinSum(x, y):
    return y * x ** 2

# It appears that the gradient is always with respect to the argument specified in argnum,
# which defaults to zero (the first argument)

def BinSum_vjp_x(g, ans, vs, gvs, x, y):
    return unbroadcast(vs, gvs, g * 2 * x * y)

global_vs = 0
global_gvs = 0
def BinSum_vjp_y(g, ans, vs, gvs, x, y):
    global global_vs
    global global_gvs
    global_vs = vs
    global_gvs = gvs
    return unbroadcast(vs, gvs, g * 2 * x ** 2)

BinSum.defvjp(BinSum_vjp_x, argnum=0)
BinSum.defvjp(BinSum_vjp_y, argnum=1)

BinSumGradX = grad(BinSum)
BinSumGradY = grad(BinSum, argnum=1)
print BinSum.vjps
print BinSum(5., 0.1)
print BinSumGradX(5., 0.1)
print BinSumGradY(5., 0.1)


{0: <function VJP_0_of_BinSum at 0x7f966fcc8578>, 1: <function VJP_1_of_BinSum at 0x7f966fcc82a8>}
2.5
1.0
50.0


In [147]:
# Looking at the derivative of diag, it appears tha the output is supposed
# to have the same dimension as the input, but with g in the appropriate places?
# anp.diag.defvjp(   lambda g, ans, vs, gvs, x, k=0          : anp.diag(g, k))

# How does unbroadcast work?

@primitive
def ElementwiseProd(x, y):
    return x * y

def ElementwiseProd_vjp(g, ans, vs, gvs, x, y):
#     return unbroadcast(vs, gvs, y) # Wrong.
    return g * y

ElementwiseProd.defvjp(ElementwiseProd_vjp)

def AnotherProd(x, z, y):
    return z * ElementwiseProd(x, y)

AnotherProdJac = jacobian(AnotherProd)

x = np.array([2., 3.])
y = np.array([10., 200.])
z = np.array([100., 1000.])
print AnotherProd(x, y, z)
print AnotherProdJac(x, z, y)
print z * y


[   2000.  600000.]
[[   1000.       0.]
 [      0.  200000.]]
[   1000.  200000.]


In [128]:
x = np.array([5, 1])
y = np.expand_dims(x, 5)
print x.shape
print y.shape
print x ** 2
print np.diag(np.array([5., 6., 7.]), k=1)

(2,)
(2, 1)
[25  1]
[[ 0.  5.  0.  0.]
 [ 0.  0.  6.  0.]
 [ 0.  0.  0.  7.]
 [ 0.  0.  0.  0.]]


In [89]:
MySumGrad = grad(MySum)
MySameSumGrad = grad(MySameSum)

x = np.array([2., 4.])
print MySumGrad(x)
print MySameSumGrad(x)

x_full = np.full(13, x.shape)
print x_full
print x_full.shape

print MySum.vjps[0]

[ 2.  2.]
[ 2.  2.]
[2 2 2 2 2 2 2 2 2 2 2 2 2]
(13,)
<function VJP_0_of_MySum at 0x7f966fcf07d0>


In [54]:
@primitive
def SumSq(x):
    return np.sum(x ** 2)

def SumSq_vjp(g, ans, vs, gvs, x):
    print 'g: '
    print g
    print 'ans: '
    print ans
    print 'vs: '
    print vs
    print 'gvs: '
    print gvs
    print 'x: '
    print x
    print 'Returning.'
    return np.full(x.shape, g) * 2 * x

SumSq.defvjp(SumSq_vjp)

def MyFun(x):
    return 3 * SumSq(x)

SumSqGrad = grad(SumSq)
MyFunGrad = grad(MyFun)
MyFunHess = hessian(MyFun)

x = np.array([2., 4.])
print 'Stand alone:'
print SumSqGrad(x)

print '\nIn function:'
print MyFunGrad(x)

print '\nIn Hessian:'
print MyFunHess(x)

Stand alone:
g: 
1.0
ans: 
Autograd ArrayNode with value 20.0 and 1 progenitors(s)
vs: 
ArrayVSpace_{'dtype': dtype('float64'), 'shape': (2,), 'scalartype': <type 'float'>, 'size': 2}
gvs: 
ArrayVSpace_{'dtype': dtype('float64'), 'shape': (), 'scalartype': <type 'float'>, 'size': 1}
x: 
Autograd ArrayNode with value [ 2.  4.] and 1 progenitors(s)
Returning.
[ 4.  8.]

In function:
g: 
3.0
ans: 
Autograd ArrayNode with value 20.0 and 1 progenitors(s)
vs: 
ArrayVSpace_{'dtype': dtype('float64'), 'shape': (2,), 'scalartype': <type 'float'>, 'size': 2}
gvs: 
ArrayVSpace_{'dtype': dtype('float64'), 'shape': (), 'scalartype': <type 'float'>, 'size': 1}
x: 
Autograd ArrayNode with value [ 2.  4.] and 1 progenitors(s)
Returning.
[ 12.  24.]

In Hessian:
g: 
3.0
ans: 
Autograd ArrayNode with value 20.0 and 2 progenitors(s)
vs: 
ArrayVSpace_{'dtype': dtype('float64'), 'shape': (2,), 'scalartype': <type 'float'>, 'size': 2}
gvs: 
ArrayVSpace_{'dtype': dtype('float64'), 'shape': (), 'scalartype': 

In [51]:
from autograd import jacobian

def MyVecFun(x):

    return np.array([ SumSq(x), 5 * SumSq(x) ])

MyVecJac = jacobian(MyVecFun)
MyVecJac(x)

g: 
0.0
ans: 
Autograd ArrayNode with value 20.0 and 1 progenitors(s)
vs: 
ArrayVSpace_{'dtype': dtype('float64'), 'shape': (2,), 'scalartype': <type 'float'>, 'size': 2}
gvs: 
ArrayVSpace_{'dtype': dtype('float64'), 'shape': (), 'scalartype': <type 'float'>, 'size': 1}
x: 
Autograd ArrayNode with value [ 2.  4.] and 1 progenitors(s)
Returning.
g: 
1.0
ans: 
Autograd ArrayNode with value 20.0 and 1 progenitors(s)
vs: 
ArrayVSpace_{'dtype': dtype('float64'), 'shape': (2,), 'scalartype': <type 'float'>, 'size': 2}
gvs: 
ArrayVSpace_{'dtype': dtype('float64'), 'shape': (), 'scalartype': <type 'float'>, 'size': 1}
x: 
Autograd ArrayNode with value [ 2.  4.] and 1 progenitors(s)
Returning.
g: 
5.0
ans: 
Autograd ArrayNode with value 20.0 and 1 progenitors(s)
vs: 
ArrayVSpace_{'dtype': dtype('float64'), 'shape': (2,), 'scalartype': <type 'float'>, 'size': 2}
gvs: 
ArrayVSpace_{'dtype': dtype('float64'), 'shape': (), 'scalartype': <type 'float'>, 'size': 1}
x: 
Autograd ArrayNode with value [

array([[  4.,   8.],
       [ 20.,  40.]])

In [177]:
# Define my own Hessian

@primitive
def Magnitude(x):
    return np.sum(x ** 3)

def MagnitudeRaw(x):
    return np.sum(x ** 3)

@primitive
def MagnitudeGradPrimitive(x):
    return 3 * (x ** 2)

def MagnitudeGradJac(x):
    return 6 * np.diag(x)

def Magnitude_vjp(g, ans, vs, gvs, x):
    return unbroadcast(vs, gvs, g * MagnitudeGradPrimitive(x))

def MagnitudeGrad_vjp(g, ans, vs, gvs, x):
    return np.matmul(MagnitudeGradJac(x), g)


Magnitude.defvjp(Magnitude_vjp)
MagnitudeGradPrimitive.defvjp(MagnitudeGrad_vjp)

MagnitudeGrad = grad(Magnitude)
MagnitudeHess = hessian(Magnitude)

MagnitudeGradRaw = grad(MagnitudeRaw)
MagnitudeHessRaw = hessian(MagnitudeRaw)

x = np.array([2., 3.])
print Magnitude(x)
print MagnitudeGrad(x)
print MagnitudeHess(x)

print MagnitudeRaw(x)
print MagnitudeGradRaw(x)
print MagnitudeHessRaw(x)



35.0
[ 12.  27.]
[[ 12.   0.]
 [  0.  18.]]
35.0
[ 12.  27.]
[[ 12.   0.]
 [  0.  18.]]


In [191]:
# Making Logistic operate on vectors may be a headache.

# For testing
def LogisticRaw(rho):
    if rho <= 0:
        exp_rho = np.exp(rho)
        return exp_rho / (1 + exp_rho)
    else:
        mexp_rho = np.exp(-rho)
        return 1 / (1 + mexp_rho)

@primitive
def Logistic(rho):
    if rho <= 0:
        exp_rho = np.exp(rho)
        return exp_rho / (1 + exp_rho)
    else:
        mexp_rho = np.exp(-rho)
        return 1 / (1 + mexp_rho)

@primitive
def LogisticGradient(logit_rho):
    return logit_rho * (1 - logit_rho)

def LogisticHessian(logit_rho):
    return 1 - 2 * logit_rho

def Logistic_vjp(g, ans, vs, gvs, x):
    return unbroadcast(vs, gvs, g * LogisticGradient(ans))

def LogisticGradient_vjp(g, ans, vs, gvs, x):
    return unbroadcast(vs, gvs, g * LogisticHessian(x))

Logistic.defvjp(Logistic_vjp)
LogisticGradient.defvjp(LogisticGradient_vjp)

LogisticADGrad = grad(Logistic)
LogisticADHessian = hessian(Logistic)

LogisticRawADGrad = grad(LogisticRaw)
LogisticRawADHessian = hessian(LogisticRaw)

x = 3.6
print Logistic(x) - LogisticRaw(x)
print LogisticADGrad(x) - LogisticRawADGrad(x)
print LogisticADHessian(x) - LogisticRawADHessian(x)



0.0
1.00613961607e-16
-9.02056207508e-17


In [226]:

def Log1mInvLogitRaw(u):
    return -np.log1p(np.exp(u))

@primitive
def Log1mInvLogit(exp_u):
    return -np.log1p(exp_u)
  
# @primitive
def Log1mInvLogitDerivative(exp_u):
    return -exp_u / (1 + exp_u)

# @primitive
# def Log1mInvLogitSecondDerivative(logit_u):
#     return -logit_u * (1 - logit_u)

def Log1mInvLogit_vjp(g, ans, vs, gvs, exp_u):
    return unbroadcast(vs, gvs, g * Log1mInvLogitDerivative(exp_u))

# def Log1mInvLogitDerivative_vjp(g, ans, vs, gvs, logit_u):
#     return unbroadcast(vs, gvs, g * Log1mInvLogitSecondDerivative(logit_u))

def Log1mInvLogitOneArg(u):
    exp_u = np.exp(u)
    return Log1mInvLogit(exp_u)

Log1mInvLogit.defvjp(Log1mInvLogit_vjp, argnum=0)
# Log1mInvLogit.defvjp(lambda g, ans, vs, gvs, exp_u, logit_u: unbroadcast(vs, gvs, 0.), argnum=1)
# Log1mInvLogitDerivative.defvjp(Log1mInvLogitDerivative_vjp)
u = 0.3


Log1mInvLogitGrad = grad(Log1mInvLogitOneArg)
Log1mInvLogitHess = hessian(Log1mInvLogitOneArg)

Log1mInvLogitRawGrad = grad(Log1mInvLogitRaw)
Log1mInvLogitRawHess = hessian(Log1mInvLogitRaw)

print Log1mInvLogitGrad(u)
print Log1mInvLogitHess(u)

print Log1mInvLogitRawGrad(u)
print Log1mInvLogitRawHess(u)

-0.775416290764
-1.10540049589
-0.574442516812
-0.244458311691
