# Derivative of abundance_i with respect to traits_i and traits_k

Here I will be testing my solutions for
$\frac{ \partial \hat{N}_i }{ \partial \mathbf{V}_{i,t} }$
and
$\frac{ \partial \hat{N}_i }{ \partial \mathbf{V}_{k,t} }$
(see below)
by calculating the Jacobian using the `theano` package
and comparing those results to my solution.


## Importing packages and setting options

In [1]:
%env OMP_NUM_THREADS=4
%env THEANO_FLAGS='openmp=True'
import sympy
import theano
theano.config.cxx = ""
import theano.tensor as T
import numpy as np
import pandas as pd
from tqdm import tqdm
import math
pd.options.display.max_columns = 10

env: OMP_NUM_THREADS=4
env: THEANO_FLAGS='openmp=True'


## Equations

__Notes:__

- ${}^\text{T}$ represents transpose.
- Elements in __bold__ are matrices
- Multiplication between matrices is always matrix multiplication, not
  element-wise
  

Below are the equations for
(1) abundance for species $i$ at time $t+1$ (${N}_{i,t+1}$),
(2) the partial derivative of species $i$ abundance at time $t+1$ with respect 
to species $i$ abundance at time $t$, and
(3) the partial derivative of species $i$ abundance at time $t+1$ with respect 
to species $k$ abundance at time $t$.

\begin{align}
N_{i,t+1} &= N_{i,t} F_{i,t} \\
F_{i,t} &=  \exp \left\{
    r_0 - f \mathbf{V}_{i,t}^{\text{T}} \mathbf{C} \mathbf{V}_{i,t} - 
        \alpha_0 \text{e}^{-\mathbf{V}_{i,t}^{\text{T}} \mathbf{V}_{i,t}} 
        \Omega_{i,t}
    \right\} \\
\Omega_{i,t} &= N_{i,t} + \sum_{j \ne i}^{n}{ N_j \text{e}^{- \mathbf{V}_{j,t}^{\text{T}} \mathbf{D} \mathbf{V}_{j,t} } } \\
%
%
    \frac{ \partial N_{i,t+1} }{ \partial \mathbf{V}_{i,t} } &= 2 \, F_{i,t} \, N_{i,t} \left(
        \alpha_0 \, \Omega_{i,t} \, \text{e}^{ -\mathbf{V}_{i,t}^{\text{T}} \mathbf{V}_{i,t} } \, \mathbf{V}_{i,t}^{\text{T}}
        - f \, \mathbf{V}_{i,t}^{\text{T}} \, \mathbf{C}
    \right) \\
%
%
    \frac{ \partial N_{i,t+1} }{ \partial \mathbf{V}_{k,t} } &= 2 \, F_{i,t} \, N_{i,t} \, N_{k,t} \,  \alpha_0 \: \text{e}^{ -\mathbf{V}_{i,t}^{\text{T}} \mathbf{V}_{i,t} - \mathbf{V}_{k,t}^{\text{T}} \mathbf{D} \mathbf{V}_{k,t} } \mathbf{V}_{k,t}^{\text{T}} \mathbf{D} \\
\end{align}


## Read CSV of simulated datasets

In [2]:
sims = pd.read_csv("simulated_data.csv")
sims.head()

Unnamed: 0,V1,V2,V3,V4,V5,...,f,a0,eta,r0,d
0,4.94511,2.869199,6.747126,6.142522,5.629532,...,0.06889,0.112113,-0.33115,1.422746,-0.091228
1,0.718846,1.220364,0.815571,0.868633,0.838021,...,0.309021,0.057579,0.094811,1.237047,0.003429
2,3.369285,1.912974,3.131174,0.046303,1.416252,...,0.118318,0.40141,-0.036977,1.746024,0.01216
3,0.373669,0.283873,0.237735,0.053632,0.062281,...,0.497286,0.49973,0.117188,0.669199,0.081612
4,3.562637,1.635016,5.724176,4.953962,1.060083,...,0.042638,0.307171,-0.467453,0.952351,0.051834


---------


# N_i / V_i


## Functions to compare methods

In [7]:
def fitness(i, V, N, D, C, f, a0, r0):
    """Fitness function."""
    q = V.shape[0]
    Vi = V[:,i]
    Vi = Vi.reshape((q, 1))
    Nj_sum = np.sum([N[j] * np.exp(-V[:,j].T @ D @ V[:,j]) for j in range(N.size) if j != i])
    Ni = N[i]
    F = np.exp(
        r0 - f * Vi.T @ C @ Vi - 
        ( a0 * np.exp(-1 * Vi.T @ Vi) * (Ni + Nj_sum))
    )
    return F[0,0]

In [5]:
def automatic(i, V, N, D, C, f, a0, r0):
    """Automatic differentiation using theano pkg"""
    Omega = N[i] + np.sum([N[j] * np.exp(-V[:,j].T @ D @ V[:,j]) for j in range(N.size) if j != i])
    Vi = T.dvector('Vi')
    Nhat = N[i] * T.exp(
        r0 - f * T.dot(T.dot(Vi.T, C), Vi) - 
        ( a0 * T.exp(-1 * T.dot(Vi.T, Vi)) * Omega)
    )
    J = T.grad(Nhat, Vi)
    num_fun = theano.function([Vi], J)
    out_array = num_fun(V[:,i])
    return out_array

In [6]:
def symbolic(i, V, N, D, C, f, a0, r0):
    """Symbolic differentiation using math"""
    q = V.shape[0]
    Vi = V[:,i]
    Vi = Vi.reshape((q, 1))
    Omega = N[i] + np.sum([N[j] * np.exp(-V[:,j].T @ D @ V[:,j]) for j in range(N.size) if j != i])
    F = fitness(i, V, N, D, C, f, a0, r0)
    dNhat = 2 * F * N[i] * (a0 * Omega * (np.exp(-1 * Vi.T @ Vi)).item() * Vi.T - f * Vi.T @ C)
    return dNhat

In [7]:
def compare_methods(sim_i, abs = False):
    """Compare answers from symbolic and automatic methods"""
    
    # Fill info from data frame:
    N = sims.loc[sim_i, [x.startswith("N") for x in sims.columns]].values
    V = sims.loc[sim_i, [x.startswith("V") for x in sims.columns]].values
    n, q = (N.size, int(V.size / N.size))
    V = V.reshape((q, n), order = 'F')
    f = sims.loc[sim_i,"f"]
    a0 = sims.loc[sim_i,"a0"]
    r0 = sims.loc[sim_i,"r0"]
    eta = sims.loc[sim_i,"eta"]
    d = sims.loc[sim_i,"d"]
    D = np.zeros((q, q))
    np.fill_diagonal(D, d)
    C = np.zeros((q, q)) + eta
    np.fill_diagonal(C,1.0)
    
    # Create output array:
    diffs = np.empty((n, 4))
    diffs[:,0] = sim_i
    
    # Fill output array:
    for i in range(n):
        auto = automatic(i, V, N, D, C, f, a0, r0)
        sym =   symbolic(i, V, N, D, C, f, a0, r0)
        if abs:
            diff = auto - sym
        else:
            diff = (auto - sym) / sym
        diff = diff.flatten()
        diffs[i, 1] = i
        diffs[i, 2] = diff.min()
        diffs[i, 3] = diff.max()
    
    return diffs

### Example of using `compare_methods`:

In [8]:
diffs = compare_methods(0)
# Worst case examples:
print(diffs[:,2].min())
print(diffs[:,3].max())

-3.6762529600363004e-16
4.501697600905659e-16


## Comparing methods

This takes ~2 minutes.

In [9]:
n_per_rep = 4
diffs = np.empty((int(n_per_rep * 100), 4))

In [10]:
for rep in tqdm(range(100)):
    diffs_r = compare_methods(rep, abs = True)
    diffs[(rep * n_per_rep):((rep+1) * n_per_rep),:] = diffs_r

100%|██████████| 100/100 [01:44<00:00,  1.05s/it]


## The results
They appear to be extremely similar, enough so that I feel comfortable with my symbolic version.

In [11]:
print(diffs[:,2].min())
print(diffs[:,3].max())

-1.0658141036401503e-14
1.4210854715202004e-14


## Write output to file

To make sure the R version works, too, I'm writing to a CSV file the output from the symbolic version on the 100 datasets.

In [13]:
n = np.sum([x.startswith("N") for x in sims.columns])
q = int(np.sum([x.startswith("V") for x in sims.columns]) / n)
# Output array
results = np.zeros((100, n * q))

for sim_i in range(100):
    
    # Fill info from data frame:
    N = sims.loc[sim_i, [x.startswith("N") for x in sims.columns]].values
    V = sims.loc[sim_i, [x.startswith("V") for x in sims.columns]].values
    V = V.reshape((q, n), order = 'F')
    f = sims.loc[sim_i,"f"]
    a0 = sims.loc[sim_i,"a0"]
    r0 = sims.loc[sim_i,"r0"]
    eta = sims.loc[sim_i,"eta"]
    d = sims.loc[sim_i,"d"]
    D = np.zeros((q, q))
    np.fill_diagonal(D, d)
    C = np.zeros((q, q)) + eta
    np.fill_diagonal(C,1.0)

    # Fill output array:
    for i in range(0, n):
        sym = symbolic(i, V, N, D, C, f, a0, r0)
        results[sim_i, (i*q):((i+1)*q)] = sym.flatten()

# Make sure first and last aren't zeros:
results[[0, 99], :]

array([[-0.69957108,  0.39839891, -1.65267507, -0.90893944, -0.73986957,
         0.51124581, -3.62398247, -1.3879098 , -5.92892852, -0.52574788,
        -3.81052829, -2.66464366],
       [29.94232632, 11.87861809,  9.37618734, -2.40140465, -1.85601335,
         0.52964214, 38.08711769, 18.07846656, 13.03858434,  0.51854947,
        -1.37822159, -0.76105753]])

In [14]:
np.savetxt('results/dNi_dVi.csv', results, delimiter=',')

---------


# N_i / V_k


## Functions to compare methods

In [3]:
def automatic(i, k, V, N, D, C, f, a0, r0):
    """Automatic differentiation using theano pkg"""
    Vi = V[:,i]
    Vk = T.dvector('Vk')
    Nj = np.sum(np.array([N[j] * np.exp(-V[:,j].T @ D @ V[:,j]) for j in range(N.size) if j != i and j != k]))
    Ni = N[i]
    Nk = N[k]
    Nhat = Ni * T.exp(
        r0 - f * T.dot(T.dot(Vi.T, C), Vi) - 
        ( a0 * T.exp(-1 * T.dot(Vi.T, Vi)) * 
         (Ni + Nj + Nk * T.exp(-1 * T.dot(T.dot(Vk.T, D), Vk))))
    )
    J = T.grad(Nhat, Vk)
    num_fun = theano.function([Vk], J)
    out_array = num_fun(V[:,k])
    return out_array

In [4]:
def symbolic(i, k, V, N, D, C, f, a0, r0):
    """Symbolic differentiation using math"""
    q = V.shape[0]
    Vi = V[:,i]
    Vi = Vi.reshape((q, 1))
    Vk = V[:,k]
    Vk = Vk.reshape((q, 1))
    F = fitness(i, V, N, D, C, f, a0, r0)
    dNhat = 2 * F * N[i] * N[k] * a0 * (np.exp(-1 * Vk.T @ D @ Vk - Vi.T @ Vi)).item() * Vk.T @ D
    return dNhat

In [8]:
def compare_methods(sim_i, abs = False):
    """Compare answers from symbolic and automatic methods"""
    
    # Fill info from data frame:
    N = sims.loc[sim_i, [x.startswith("N") for x in sims.columns]].values
    V = sims.loc[sim_i, [x.startswith("V") for x in sims.columns]].values
    n, q = (N.size, int(V.size / N.size))
    V = V.reshape((q, n), order = 'F')
    f = sims.loc[sim_i,"f"]
    a0 = sims.loc[sim_i,"a0"]
    r0 = sims.loc[sim_i,"r0"]
    eta = sims.loc[sim_i,"eta"]
    d = sims.loc[sim_i,"d"]
    D = np.zeros((q, q))
    np.fill_diagonal(D, d)
    C = np.zeros((q, q)) + eta
    np.fill_diagonal(C,1.0)
    
    # Create output array:
    diffs = np.empty((math.factorial(n) // math.factorial(n-2), 4))
    j = 0
    for i in range(0, n):
        for k in [x for x in range(0, n) if x != i]:
            num = automatic(i, k, V, N, D, C, f, a0, r0)
            sym = symbolic(i, k, V, N, D, C, f, a0, r0)
            num = num.flatten()
            sym = sym.flatten()
            if abs:
                diff = num - sym
            else:
                diff = (num - sym) / sym
                if np.any(sym == 0):
                    for l in [x for x in range(0, diff.size) if sym[x] == 0]:
                        diff[l] = num[l];
            diffs[j, 0] = i
            diffs[j, 1] = k
            diffs[j, 2] = diff.min()
            diffs[j, 3] = diff.max()
            j += 1
    return diffs

### Example of using `compare_methods`:

In [10]:
diffs = compare_methods(0)
# Worst case examples:
print(diffs[:,2].min())
print(diffs[:,3].max())

-5.51943421868041e-15
7.493273996259699e-15


## Comparing methods

This takes ~4 minutes.

In [11]:
n_per_rep = math.factorial(4) // math.factorial(4-2)
diffs = np.empty((int(n_per_rep * 100), 4))

In [12]:
for rep in tqdm(range(100)):
    diffs_r = compare_methods(rep, abs = True)
    diffs[(rep * n_per_rep):((rep+1) * n_per_rep),:] = diffs_r

100%|██████████| 100/100 [03:41<00:00,  2.22s/it]


## The results
They appear to be extremely similar, enough so that I feel comfortable with my symbolic version.

In [13]:
print(diffs[:,2].min())
print(diffs[:,3].max())

-1.3322676295501878e-15
6.661338147750939e-16


## Write output to file

To make sure the R version works, too, I'm writing to a CSV file the output from the symbolic version on the 100 datasets.

In [18]:
n = np.sum([x.startswith("N") for x in sims.columns])
q = int(np.sum([x.startswith("V") for x in sims.columns]) / n)
s2 = 0.01
n_perms = math.factorial(n) // math.factorial(n-2)
# Output array
results = np.zeros((100, n_perms * q))

for sim_i in range(100):
    
    # Fill info from data frame:
    N = sims.loc[sim_i, [x.startswith("N") for x in sims.columns]].values
    V = sims.loc[sim_i, [x.startswith("V") for x in sims.columns]].values
    V = V.reshape((q, n), order = 'F')
    f = sims.loc[sim_i,"f"]
    a0 = sims.loc[sim_i,"a0"]
    r0 = sims.loc[sim_i,"r0"]
    eta = sims.loc[sim_i,"eta"]
    d = sims.loc[sim_i,"d"]
    D = np.zeros((q, q))
    np.fill_diagonal(D, d)
    C = np.zeros((q, q)) + eta
    np.fill_diagonal(C,1.0)
    
    # Fill output array:
    j = 0
    for i in range(0, n):
        for k in [x for x in range(0, n) if x != i]:
            sym = symbolic(i, k, V, N, D, C, f, a0, r0)
            results[sim_i, (j*q):((j+1)*q)] = sym.flatten()
            j += 1


# Make sure first and last aren't zeros:
results[[0, 99], :]

array([[-1.50648644e-31, -1.38067291e-31, -4.49654073e-32,
        -1.42169682e-32, -1.20020224e-32, -1.65001366e-32,
        -4.36978186e-34, -6.30604858e-34, -5.63058843e-34,
        -4.02567963e-29, -2.33573685e-29, -5.49265163e-29,
        -2.01413463e-30, -1.70034066e-30, -2.33759380e-30,
        -6.19072147e-32, -8.93385335e-32, -7.97692099e-32,
        -2.29601448e-14, -1.33216900e-14, -3.13269033e-14,
        -1.21725662e-14, -1.11559799e-14, -3.63325141e-15,
        -3.53082893e-17, -5.09535246e-17, -4.54957368e-17,
        -9.74580631e-04, -5.65460766e-04, -1.32972128e-03,
        -5.16684339e-04, -4.73533684e-04, -1.54219256e-04,
        -4.87603779e-05, -4.11637095e-05, -5.65910317e-05],
       [ 2.82386633e-01,  2.42173607e-01,  6.62734346e-02,
         1.30066489e+00,  5.83867865e-01,  4.03317334e-01,
         8.27726858e-03,  1.61209441e-01,  1.11448960e-01,
         2.54017695e-08,  8.53276805e-09,  6.19584435e-09,
         2.81282551e-08,  1.26267606e-08,  8.72216429e-

In [19]:
np.savetxt('results/dNi_dVk.csv', results, delimiter=',')