# Derivative of abundance_i with respect to abundance_i and abundance_k

Here I will be testing my solutions for
$\frac{ \partial \hat{N}_i }{ \partial N_i }$
and
$\frac{ \partial \hat{N}_i }{ \partial N_k }$
(see below)
by calculating the Jacobian using the `theano` package
and comparing those results to my solution.


## Importing packages and setting options

In [110]:
%env OMP_NUM_THREADS=4
%env THEANO_FLAGS='openmp=True'
import sympy
import theano
theano.config.cxx = ""
import theano.tensor as T
import numpy as np
import pandas as pd
from tqdm import tqdm
import math
pd.options.display.max_columns = 10

env: OMP_NUM_THREADS=4
env: THEANO_FLAGS='openmp=True'


## Equations

__Notes:__

- ${}^\text{T}$ represents transpose.
- Elements in __bold__ are matrices
- Multiplication between matrices is always matrix multiplication, not
  element-wise
  

Below are the equations for
(1) abundance for species $i$ at time $t+1$ (${N}_{i,t+1}$),
(2) the partial derivative of species $i$ abundance at time $t+1$ with respect 
to species $i$ abundance at time $t$, and
(3) the partial derivative of species $i$ abundance at time $t+1$ with respect 
to species $k$ abundance at time $t$.

\begin{align}
N_{i,t+1} &= N_{i,t} F_t \\
F_t &=  \exp \left\{
    r_0 - f \mathbf{V}_{i,t}^{\text{T}} \mathbf{C} \mathbf{V}_{i,t} - 
        \alpha_0 \text{e}^{-\mathbf{V}_{i,t}^{\text{T}} \mathbf{V}_{i,t}} 
        \left(
            N_{i,t} + \sum_{j \ne i}^{n}{ N_j \text{e}^{- \mathbf{V}_{j,t}^{\text{T}} \mathbf{D} \mathbf{V}_{j,t} } }
        \right)
    \right\} \\
%
%
    \frac{ \partial N_{i,t+1} }{ \partial N_{i,t} } &= F_t \left( 1 - N_{i,t} \: \alpha_0 \: \text{e}^{ -\mathbf{V}_{i,t}^{\text{T}} \mathbf{V}_{i,t} } \right) \\
%
%
    \frac{ \partial N_{i,t+1} }{ \partial N_{k,t} } &= - F_t \: N_{i,t} \: \alpha_0 \: \text{e}^{ -\mathbf{V}_{i,t}^{\text{T}} \mathbf{V}_{i,t} - \mathbf{V}_{k,t}^{\text{T}} \mathbf{D} \mathbf{V}_{k,t} } \\
\end{align}



## Read CSV of simulated datasets

In [111]:
sims = pd.read_csv("simulated_data.csv")
sims.head()

Unnamed: 0,V1,V2,V3,V4,V5,...,f,a0,eta,r0,d
0,4.94511,2.869199,6.747126,6.142522,5.629532,...,0.06889,0.112113,-0.33115,1.422746,-0.091228
1,0.718846,1.220364,0.815571,0.868633,0.838021,...,0.309021,0.057579,0.094811,1.237047,0.003429
2,3.369285,1.912974,3.131174,0.046303,1.416252,...,0.118318,0.40141,-0.036977,1.746024,0.01216
3,0.373669,0.283873,0.237735,0.053632,0.062281,...,0.497286,0.49973,0.117188,0.669199,0.081612
4,3.562637,1.635016,5.724176,4.953962,1.060083,...,0.042638,0.307171,-0.467453,0.952351,0.051834


---------


# N_i / N_i


## Functions to compare methods

In [112]:
def fitness(i, V, N, D, C, f, a0, r0):
    """Fitness function."""
    q = V.shape[0]
    Vi = V[:,i]
    Vi = Vi.reshape((q, 1))
    Nj_sum = np.sum([N[j] * np.exp(-V[:,j].T @ D @ V[:,j]) for j in range(N.size) if j != i])
    Ni = N[i]
    F = np.exp(
        r0 - f * Vi.T @ C @ Vi - 
        ( a0 * np.exp(-1 * Vi.T @ Vi) * (Ni + Nj_sum))
    )
    return F[0,0]

In [113]:
def automatic(i, V, N, D, C, f, a0, r0):
    """Automatic differentiation using theano pkg"""
    Vi = V[:,i]
    Nj_sum = np.sum([N[j] * np.exp(-V[:,j].T @ D @ V[:,j]) for j in range(N.size) if j != i])
    Ni = T.dscalar('Ni')
    Nhat = Ni * T.exp(
        r0 - f * T.dot(T.dot(Vi.T, C), Vi) - 
        ( a0 * T.exp(-1 * T.dot(Vi.T, Vi)) * (Ni + Nj_sum))
    )
    J = T.grad(Nhat, Ni)
    num_fun = theano.function([Ni], J)
    out_array = num_fun(N[i])
    return out_array

In [114]:
def symbolic(i, V, N, D, C, f, a0, r0):
    """Symbolic differentiation using math"""
    q = V.shape[0]
    Vi = V[:,i]
    Vi = Vi.reshape((q, 1))
    F = fitness(i, V, N, D, C, f, a0, r0)
    dNhat = F * (1 - N[i] * a0 * (np.exp(-1 * Vi.T @ Vi)).item())
    return dNhat

In [115]:
def compare_methods(sim_i, abs = False):
    """Compare answers from symbolic and automatic methods"""
    
    # Fill info from data frame:
    N = sims.loc[sim_i, [x.startswith("N") for x in sims.columns]].values
    V = sims.loc[sim_i, [x.startswith("V") for x in sims.columns]].values
    n, q = (N.size, int(V.size / N.size))
    V = V.reshape((q, n), order = 'F')
    f = sims.loc[sim_i,"f"]
    a0 = sims.loc[sim_i,"a0"]
    r0 = sims.loc[sim_i,"r0"]
    eta = sims.loc[sim_i,"eta"]
    d = sims.loc[sim_i,"d"]
    D = np.zeros((q, q))
    np.fill_diagonal(D, d)
    C = np.zeros((q, q)) + eta
    np.fill_diagonal(C,1.0)
    
    # Create output array:
    diffs = np.empty((n, 4))
    diffs[:,0] = sim_i
    
    # Fill output array:
    for i in range(n):
        auto = automatic(i, V, N, D, C, f, a0, r0)
        sym =   symbolic(i, V, N, D, C, f, a0, r0)
        if abs:
            diff = auto - sym
        else:
            diff = (auto - sym) / sym
        diff = diff.flatten()
        diffs[i, 1] = i
        diffs[i, 2] = diff.min()
        diffs[i, 3] = diff.max()
    
    return diffs

### Example of using `compare_methods`:

In [116]:
diffs = compare_methods(0)
# Worst case examples:
print(diffs[:,2].min())
print(diffs[:,3].max())

0.0
4.659349095435551e-16


## Comparing methods

This takes ~30 seconds.

In [117]:
n_per_rep = 4
diffs = np.empty((int(n_per_rep * 100), 4))

In [118]:
for rep in tqdm(range(100)):
    diffs_r = compare_methods(rep, abs = True)
    diffs[(rep * n_per_rep):((rep+1) * n_per_rep),:] = diffs_r

100%|██████████| 100/100 [00:27<00:00,  3.67it/s]


## The results
They appear to be extremely similar, enough so that I feel comfortable with my symbolic version.

In [119]:
print(diffs[:,2].min())
print(diffs[:,3].max())

-8.881784197001252e-16
6.661338147750939e-16


## Write output to file

To make sure the R version works, too, I'm writing to a CSV file the output from the symbolic version on the 100 datasets.

In [120]:
n = np.sum([x.startswith("N") for x in sims.columns])
q = int(np.sum([x.startswith("V") for x in sims.columns]) / n)
# Output array
results = np.zeros((100, n * q))

for sim_i in range(100):
    
    # Fill info from data frame:
    N = sims.loc[sim_i, [x.startswith("N") for x in sims.columns]].values
    V = sims.loc[sim_i, [x.startswith("V") for x in sims.columns]].values
    V = V.reshape((q, n), order = 'F')
    f = sims.loc[sim_i,"f"]
    a0 = sims.loc[sim_i,"a0"]
    r0 = sims.loc[sim_i,"r0"]
    eta = sims.loc[sim_i,"eta"]
    d = sims.loc[sim_i,"d"]
    D = np.zeros((q, q))
    np.fill_diagonal(D, d)
    C = np.zeros((q, q)) + eta
    np.fill_diagonal(C,1.0)

    # Fill output array:
    for i in range(0, n):
        sym = symbolic(i, V, N, D, C, f, a0, r0)
        results[sim_i, (i*q):((i+1)*q)] = sym.flatten()

# Make sure first and last aren't zeros:
results[[0, 99], :]

array([[0.4016391 , 0.4016391 , 0.4016391 , 0.35741785, 0.35741785,
        0.35741785, 1.51619382, 1.51619382, 1.51619382, 2.73714043,
        2.73714043, 2.73714043],
       [0.48889062, 0.48889062, 0.48889062, 0.28321771, 0.28321771,
        0.28321771, 0.07808013, 0.07808013, 0.07808013, 0.43248949,
        0.43248949, 0.43248949]])

In [121]:
np.savetxt('results/dNi_dNi.csv', results, delimiter=',')

---------


# N_i / N_k


## Functions to compare methods

In [122]:
def automatic(i, k, V, N, D, C, f, a0, r0):
    """Automatic differentiation using theano pkg"""
    Vi = V[:,i]
    Vk = V[:,k]
    Nj = np.sum(np.array([N[j] * np.exp(-V[:,j].T @ D @ V[:,j]) for j in range(N.size) if j != i and j != k]))
    Ni = N[i]
    Nk = T.dscalar('Nk')
    Nhat = Ni * T.exp(
        r0 - f * T.dot(T.dot(Vi.T, C), Vi) - 
        ( a0 * T.exp(-1 * T.dot(Vi.T, Vi)) * 
         (Ni + Nj + Nk * T.exp(-1 * T.dot(T.dot(Vk.T, D), Vk))))
    )
    J = T.grad(Nhat, Nk)
    num_fun = theano.function([Nk], J)
    out_array = num_fun(N[k])
    return out_array

In [123]:
def symbolic(i, k, V, N, D, C, f, a0, r0):
    """Symbolic differentiation using math"""
    q = V.shape[0]
    Vi = V[:,i]
    Vi = Vi.reshape((q, 1))
    Vk = V[:,k]
    Vk = Vk.reshape((q, 1))
    F = fitness(i, V, N, D, C, f, a0, r0)
    dNhat = -F * N[i] * a0 * np.exp(-1 * Vk.T @ D @ Vk - Vi.T @ Vi)
    return dNhat

In [124]:
def compare_methods(sim_i, abs = False):
    """Compare answers from symbolic and automatic methods"""
    
    # Fill info from data frame:
    N = sims.loc[sim_i, [x.startswith("N") for x in sims.columns]].values
    V = sims.loc[sim_i, [x.startswith("V") for x in sims.columns]].values
    n, q = (N.size, int(V.size / N.size))
    V = V.reshape((q, n), order = 'F')
    f = sims.loc[sim_i,"f"]
    a0 = sims.loc[sim_i,"a0"]
    r0 = sims.loc[sim_i,"r0"]
    eta = sims.loc[sim_i,"eta"]
    d = sims.loc[sim_i,"d"]
    D = np.zeros((q, q))
    np.fill_diagonal(D, d)
    C = np.zeros((q, q)) + eta
    np.fill_diagonal(C,1.0)
    
    # Create output array:
    diffs = np.empty((math.factorial(n) // math.factorial(n-2), 4))
    j = 0
    for i in range(0, n):
        for k in [x for x in range(0, n) if x != i]:
            num = automatic(i, k, V, N, D, C, f, a0, r0)
            sym = symbolic(i, k, V, N, D, C, f, a0, r0)
            num = num.flatten()
            sym = sym.flatten()
            if abs:
                diff = num - sym
            else:
                diff = (num - sym) / sym
                if np.any(sym == 0):
                    for l in [x for x in range(0, diff.size) if sym[x] == 0]:
                        diff[l] = num[l];
            diffs[j, 0] = i
            diffs[j, 1] = k
            diffs[j, 2] = diff.min()
            diffs[j, 3] = diff.max()
            j += 1
    return diffs

### Example of using `compare_methods`:

In [125]:
diffs = compare_methods(0)
# Worst case examples:
print(diffs[:,2].min())
print(diffs[:,3].max())

-5.750445668955183e-15
7.16611041363844e-15


## Comparing methods

This takes ~2 minutes.

In [126]:
n_per_rep = math.factorial(4) // math.factorial(4-2)
diffs = np.empty((int(n_per_rep * 100), 4))

In [127]:
for rep in tqdm(range(100)):
    diffs_r = compare_methods(rep, abs = True)
    diffs[(rep * n_per_rep):((rep+1) * n_per_rep),:] = diffs_r

100%|██████████| 100/100 [01:28<00:00,  1.13it/s]


## The results
They appear to be extremely similar, enough so that I feel comfortable with my symbolic version.

In [128]:
print(diffs[:,2].min())
print(diffs[:,3].max())

-2.220446049250313e-16
2.220446049250313e-16


## Write output to file

To make sure the R version works, too, I'm writing to a CSV file the output from the symbolic version on the 100 datasets.

In [129]:
n = np.sum([x.startswith("N") for x in sims.columns])
q = int(np.sum([x.startswith("V") for x in sims.columns]) / n)
s2 = 0.01
n_perms = math.factorial(n) // math.factorial(n-2)
# Output array
results = np.zeros((100, n_perms * q))

for sim_i in range(100):
    
    # Fill info from data frame:
    N = sims.loc[sim_i, [x.startswith("N") for x in sims.columns]].values
    V = sims.loc[sim_i, [x.startswith("V") for x in sims.columns]].values
    V = V.reshape((q, n), order = 'F')
    f = sims.loc[sim_i,"f"]
    a0 = sims.loc[sim_i,"a0"]
    r0 = sims.loc[sim_i,"r0"]
    eta = sims.loc[sim_i,"eta"]
    d = sims.loc[sim_i,"d"]
    D = np.zeros((q, q))
    np.fill_diagonal(D, d)
    C = np.zeros((q, q)) + eta
    np.fill_diagonal(C,1.0)
    
    # Fill output array:
    j = 0
    for i in range(0, n):
        for k in [x for x in range(0, n) if x != i]:
            sym = symbolic(i, k, V, N, D, C, f, a0, s2)
            results[sim_i, (j*q):((j+1)*q)] = sym.flatten()
            j += 1


# Make sure first and last aren't zeros:
results[[0, 99], :]

array([[-6.50940801e-33, -6.50940801e-33, -6.50940801e-33,
        -3.67566611e-34, -3.67566611e-34, -3.67566611e-34,
        -3.94551730e-35, -3.94551730e-35, -3.94551730e-35,
        -1.51293987e-30, -1.51293987e-30, -1.51293987e-30,
        -5.20735950e-32, -5.20735950e-32, -5.20735950e-32,
        -5.58966086e-33, -5.58966086e-33, -5.58966086e-33,
        -8.62893263e-16, -8.62893263e-16, -8.62893263e-16,
        -5.25966898e-16, -5.25966898e-16, -5.25966898e-16,
        -3.18801878e-18, -3.18801878e-18, -3.18801878e-18,
        -3.66269058e-05, -3.66269058e-05, -3.66269058e-05,
        -2.23255191e-05, -2.23255191e-05, -2.23255191e-05,
        -1.26065464e-06, -1.26065464e-06, -1.26065464e-06],
       [-1.98280598e-02, -1.98280598e-02, -1.98280598e-02,
        -8.99846379e-02, -8.99846379e-02, -8.99846379e-02,
        -3.31065119e-02, -3.31065119e-02, -3.31065119e-02,
        -1.83206958e-09, -1.83206958e-09, -1.83206958e-09,
        -1.94601305e-09, -1.94601305e-09, -1.94601305e-

In [130]:
np.savetxt('results/dNi_dNk.csv', results, delimiter=',')