In [1]:
import autograd.numpy as np
from autograd.core import primitive
from autograd import grad, jacobian, hessian
from autograd.numpy.numpy_grads import unbroadcast
import scipy.stats

In [2]:
@primitive
def MySum(x):
    return 2 * np.sum(x)

def MySameSum(x):
    return 2 * np.sum(x)

def MySum_vjp(g, ans, vs, gvs, x):
    return np.full(x.shape, g) * np.full(x.shape, 2)

MySum.defvjp(MySum_vjp)


In [None]:
def MatEncode(mat, K):
    return mat[np.triu_indices(K)]

def EncodeInd(k1, k2):
    def LDInd(k1, k2):
        return k2 + k1 * (k1 + 1) / 2

    if k2 <= k1:
        return LDInd(k1, k2)
    else:
        return LDInd(k2, k1)
    
def MatDecode(vec, K):
    nums = []
    for k1 in range(K):
        for k2 in range(K):
            nums.append(vec[EncodeInd(k1, k2)])
    return np.array(nums).reshape(K, K)

K = 2
mat = np.random.rand(K * K).reshape(K, K)
mat = mat * mat.T
vec = MatEncode(mat, K)
print mat - MatDecode(vec, K)

# works:
MatEncodeJac = jacobian(MatEncode)
MatEncodeJac(mat, K)

# does not work:
MatDecodeJac = jacobian(MatDecode)
print MatDecodeJac(vec, K)


In [None]:
@primitive
def BinSum(x, y):
    return y * x ** 2

# It appears that the gradient is always with respect to the argument specified in argnum,
# which defaults to zero (the first argument)

def BinSum_vjp_x(g, ans, vs, gvs, x, y):
    return unbroadcast(vs, gvs, g * 2 * x * y)

global_vs = 0
global_gvs = 0
def BinSum_vjp_y(g, ans, vs, gvs, x, y):
    global global_vs
    global global_gvs
    global_vs = vs
    global_gvs = gvs
    return unbroadcast(vs, gvs, g * 2 * x ** 2)

BinSum.defvjp(BinSum_vjp_x, argnum=0)
BinSum.defvjp(BinSum_vjp_y, argnum=1)

BinSumGradX = grad(BinSum)
BinSumGradY = grad(BinSum, argnum=1)
print BinSum.vjps
print BinSum(5., 0.1)
print BinSumGradX(5., 0.1)
print BinSumGradY(5., 0.1)


In [None]:
# Looking at the derivative of diag, it appears tha the output is supposed
# to have the same dimension as the input, but with g in the appropriate places?
# anp.diag.defvjp(   lambda g, ans, vs, gvs, x, k=0          : anp.diag(g, k))

# How does unbroadcast work?

@primitive
def ElementwiseProd(x, y):
    return x * y

def ElementwiseProd_vjp(g, ans, vs, gvs, x, y):
#     return unbroadcast(vs, gvs, y) # Wrong.
    return g * y

ElementwiseProd.defvjp(ElementwiseProd_vjp)

def AnotherProd(x, z, y):
    return z * ElementwiseProd(x, y)

AnotherProdJac = jacobian(AnotherProd)

x = np.array([2., 3.])
y = np.array([10., 200.])
z = np.array([100., 1000.])
print AnotherProd(x, y, z)
print AnotherProdJac(x, z, y)
print z * y


In [None]:
x = np.array([5, 1])
y = np.expand_dims(x, 5)
print x.shape
print y.shape
print x ** 2
print np.diag(np.array([5., 6., 7.]), k=1)

In [None]:
MySumGrad = grad(MySum)
MySameSumGrad = grad(MySameSum)

x = np.array([2., 4.])
print MySumGrad(x)
print MySameSumGrad(x)

x_full = np.full(13, x.shape)
print x_full
print x_full.shape

print MySum.vjps[0]

In [None]:
@primitive
def SumSq(x):
    return np.sum(x ** 2)

def SumSq_vjp(g, ans, vs, gvs, x):
    print 'g: '
    print g
    print 'ans: '
    print ans
    print 'vs: '
    print vs
    print 'gvs: '
    print gvs
    print 'x: '
    print x
    print 'Returning.'
    return np.full(x.shape, g) * 2 * x

SumSq.defvjp(SumSq_vjp)

def MyFun(x):
    return 3 * SumSq(x)

SumSqGrad = grad(SumSq)
MyFunGrad = grad(MyFun)
MyFunHess = hessian(MyFun)

x = np.array([2., 4.])
print 'Stand alone:'
print SumSqGrad(x)

print '\nIn function:'
print MyFunGrad(x)

print '\nIn Hessian:'
print MyFunHess(x)

In [None]:
from autograd import jacobian

def MyVecFun(x):

    return np.array([ SumSq(x), 5 * SumSq(x) ])

MyVecJac = jacobian(MyVecFun)
MyVecJac(x)

In [None]:
# Define my own Hessian

@primitive
def Magnitude(x):
    return np.sum(x ** 3)

def MagnitudeRaw(x):
    return np.sum(x ** 3)

@primitive
def MagnitudeGradPrimitive(x):
    return 3 * (x ** 2)

def MagnitudeGradJac(x):
    return 6 * np.diag(x)

def Magnitude_vjp(g, ans, vs, gvs, x):
    return unbroadcast(vs, gvs, g * MagnitudeGradPrimitive(x))

def MagnitudeGrad_vjp(g, ans, vs, gvs, x):
    return np.matmul(MagnitudeGradJac(x), g)


Magnitude.defvjp(Magnitude_vjp)
MagnitudeGradPrimitive.defvjp(MagnitudeGrad_vjp)

MagnitudeGrad = grad(Magnitude)
MagnitudeHess = hessian(Magnitude)

MagnitudeGradRaw = grad(MagnitudeRaw)
MagnitudeHessRaw = hessian(MagnitudeRaw)

x = np.array([2., 3.])
print Magnitude(x)
print MagnitudeGrad(x)
print MagnitudeHess(x)

print MagnitudeRaw(x)
print MagnitudeGradRaw(x)
print MagnitudeHessRaw(x)



In [None]:
# Making Logistic operate on vectors may be a headache.

# For testing
def LogisticRaw(rho):
    if rho <= 0:
        exp_rho = np.exp(rho)
        return exp_rho / (1 + exp_rho)
    else:
        mexp_rho = np.exp(-rho)
        return 1 / (1 + mexp_rho)

@primitive
def Logistic(rho):
    if rho <= 0:
        exp_rho = np.exp(rho)
        return exp_rho / (1 + exp_rho)
    else:
        mexp_rho = np.exp(-rho)
        return 1 / (1 + mexp_rho)

@primitive
def LogisticGradient(logit_rho):
    return logit_rho * (1 - logit_rho)

def LogisticHessian(logit_rho):
    return 1 - 2 * logit_rho

def Logistic_vjp(g, ans, vs, gvs, x):
    return unbroadcast(vs, gvs, g * LogisticGradient(ans))

def LogisticGradient_vjp(g, ans, vs, gvs, x):
    return unbroadcast(vs, gvs, g * LogisticHessian(x))

Logistic.defvjp(Logistic_vjp)
LogisticGradient.defvjp(LogisticGradient_vjp)

LogisticADGrad = grad(Logistic)
LogisticADHessian = hessian(Logistic)

LogisticRawADGrad = grad(LogisticRaw)
LogisticRawADHessian = hessian(LogisticRaw)

x = 3.6
print Logistic(x) - LogisticRaw(x)
print LogisticADGrad(x) - LogisticRawADGrad(x)
print LogisticADHessian(x) - LogisticRawADHessian(x)



In [None]:

        
def Log1mInvLogitRaw(u):
    return -np.log1p(np.exp(u))

@primitive
def Log1mInvLogit(exp_u):
    return -np.log1p(exp_u)
  
def Log1mInvLogitDerivative(exp_u):
    return -1 / (1 + exp_u)

def Log1mInvLogit_vjp(g, ans, vs, gvs, exp_u):
    return unbroadcast(vs, gvs, g * Log1mInvLogitDerivative(exp_u))


def Log1mInvLogitOneArg(u):
    exp_u = np.exp(u)
    return Log1mInvLogit(exp_u)

Log1mInvLogit.defvjp(Log1mInvLogit_vjp)
u = 0.3


Log1mInvLogitGrad = grad(Log1mInvLogitOneArg)
Log1mInvLogitHess = hessian(Log1mInvLogitOneArg)

Log1mInvLogitRawGrad = grad(Log1mInvLogitRaw)
Log1mInvLogitRawHess = hessian(Log1mInvLogitRaw)

print Log1mInvLogitGrad(u)
print Log1mInvLogitHess(u)

print Log1mInvLogitRawGrad(u)
print Log1mInvLogitRawHess(u)

In [None]:
# Cache with a closure!

class State:
    pass

def GetLogitVarCache():
    st = State()
    st.u = float('nan')
    st.exp_u = float('nan')
    st.logit_u = float('nan')

    def Set(u):
        st.u = u
        st.exp_u = np.exp(u)
        st.logit_u = st.exp_u / (1 + st.exp_u)

    # Even though these take "u" as an argument, it is only to fool autograd.
    # They are funcitons only of the cached variables.
    @primitive
    def Log1mInvLogit(u):
        return -np.log1p(st.exp_u)

    @primitive
    def Log1mInvLogitDerivative(u):
        return -st.logit_u

    # Set as primitive because as-is this is not differentiable by autograd.
    @primitive
    def Log1mInvLogitSecondDerivative(u):
        return -st.logit_u * (1 - st.logit_u)

    def Log1mInvLogit_vjp(g, ans, vs, gvs, u):
        return unbroadcast(vs, gvs, g * Log1mInvLogitDerivative(u))

    def Log1mInvLogitDerivative_vjp(g, ans, vs, gvs, u):
        return unbroadcast(vs, gvs, g * Log1mInvLogitSecondDerivative(u))

    Log1mInvLogit.defvjp(Log1mInvLogit_vjp)
    Log1mInvLogitDerivative.defvjp(Log1mInvLogitDerivative_vjp)

    return Log1mInvLogit, Set


GetLogitVarCache_Log1mInvLogit, GetLogitVarCache_Set = GetLogitVarCache()
GetLogitVarCache_Set(0.3)

print '------\n'
print Log1mInvLogitRaw(0.3)
print GetLogitVarCache_Log1mInvLogit(0.3)

# This won't change depending on the argument!!!
print GetLogitVarCache_Log1mInvLogit(0.6)

Log1mInvLogitRawGrad = grad(Log1mInvLogitRaw)
Log1mInvLogitGrad = grad(GetLogitVarCache_Log1mInvLogit)

Log1mInvLogitRawHess = hessian(Log1mInvLogitRaw)
Log1mInvLogitHess = hessian(GetLogitVarCache_Log1mInvLogit)

def Log1mInvLogitGradSet(u):
    GetLogitVarCache_Set(u)
    return Log1mInvLogitGrad(u)

def Log1mInvLogitHessSet(u):
    GetLogitVarCache_Set(u)
    return Log1mInvLogitHess(u)

print '------\n'
print Log1mInvLogitGradSet(0.5)
print Log1mInvLogitRawGrad(0.5)

print '------\n'
print Log1mInvLogitHessSet(0.4)
print Log1mInvLogitRawHess(0.4)



In [None]:
import timeit

time_num = 3000
print 'Gradients:\n'
print timeit.timeit(lambda: Log1mInvLogitGrad(0.3), number=time_num) / time_num
print timeit.timeit(lambda: Log1mInvLogitRawGrad(0.3), number=time_num) / time_num

print 'Hessians:\n'
print timeit.timeit(lambda: Log1mInvLogitHess(0.3), number=time_num) / time_num
print timeit.timeit(lambda: Log1mInvLogitRawHess(0.3), number=time_num) / time_num

print 'Gradients with setting:\n'
print timeit.timeit(lambda: Log1mInvLogitGradSet(0.4), number=time_num) / time_num
print timeit.timeit(lambda: Log1mInvLogitRawGrad(0.4), number=time_num) / time_num

print 'Hessians with setting:\n'
print timeit.timeit(lambda: Log1mInvLogitHessSet(0.4), number=time_num) / time_num
print timeit.timeit(lambda: Log1mInvLogitRawHess(0.4), number=time_num) / time_num

