In [1]:
###############################################
## Numpy/PyTorch implementation of Poisson Inverse Gaussian Distribution
## See: https://github.com/cran/gamlss.dist
## See (count distributions - page197): http://www.gamlss.com/wp-content/uploads/2013/01/gamlss-manual.pdf 
##
## Author: Chris Meaney
## Date: August 2021
###############################################

In [2]:
## Dependency modules
import numpy as np
import pandas as pd
import torch
from scipy.special import gammaln
from sinfo import sinfo

In [3]:
##########################################################
## Use pandas to import data, and store as DataFrame
## Data are 1) response/target variable (number of fish = count random variable), 2) lake size (single continous feature/predictor)
##########################################################
dat = pd.read_csv('C://Users//ChristopherMeaney//Desktop//PyTorch_Stuff//pytorch_count_dists//species.csv', encoding='latin1')
dat.head(n=15)

Unnamed: 0,fish,lake,x,scale_x
0,10,5,1.609438,-1.53343
1,37,41,3.713572,-0.901903
2,60,171,5.141664,-0.473281
3,113,25719,10.154985,1.031399
4,99,59596,10.995344,1.283621
5,13,1,0.0,-2.016481
6,30,44,3.78419,-0.880708
7,114,58016,10.968474,1.275556
8,112,19477,9.87699,0.947962
9,17,10,2.302585,-1.325392


In [4]:
## Describe the data
dat.fish.describe()

count     70.000000
mean      41.742857
std       47.849609
min        5.000000
25%       14.000000
50%       21.500000
75%       47.500000
max      245.000000
Name: fish, dtype: float64

In [5]:
mu_ = dat.fish.mean()
mu_

41.74285714285714

In [6]:
sigma_ = np.sqrt(dat.fish.var())
sigma_

47.84960912241293

In [7]:
###################################################
## Numpy implementation of Poisson Inverse Gaussian Loss/Density Function
## Function is basically a Numpy implementation of Rigby et al gamlss.dist R Code (which calls tofyPIG2.c)
## https://github.com/cran/gamlss.dist/tree/master/src/tofyPIG2.c
###################################################
def d_PIG_np(x, mu=1, sigma=1, log=True): 
    ## Determine length of data vector and parameters 
    ly = np.max(np.array([len(x), len(mu), len(sigma)]))  
    #x = np.repeat(a=x, repeats=ly)      
    nsigma = np.repeat(a=sigma, repeats=ly)
    nmu = np.repeat(a=mu, repeats=ly)
    ## Initial vectors to store computed PIG density values
    ny = int(len(x))
    maxyp1 = np.max(x) + 1
    tofY = np.zeros(shape=(maxyp1))
    sumlty = np.zeros(shape=(ly))
    ## Big for loop to compute PIG density (or log-density)
    ## This is directly from Rigby et al: tofyPIG2.c code.
    for i in range(1, ny+1):
        iy = x[i-1] + 1
        tofY[0] = nmu[i-1] * ((1 + 2*nsigma[i-1]*nmu[i-1])**(-0.5))
        sumT = 0 
        ## Start inner loop to compute rest of PIG density
        if (x[i-1]==0):
            sumT = 0
        else:
            for j in range(1, iy):
                tofY[j] = ((nsigma[i-1] * ((2*(j)-1)/nmu[i-1])) + (1/tofY[j-1])) * ((tofY[0])**2)
                sumT = sumT + np.log(tofY[j-1])
        sumlty[i-1] = sumT
    ## Add the kernel of the PIG density back to other constant component
    logfy = -gammaln(x+1) + (1 - np.sqrt(1 + 2*sigma*mu))/sigma + sumlty
    ## log={T,F} flag: T=return log-density; F=return density
    if(log==False):
        fy = np.exp(logfy)
    else:
        fy = logfy
    ## Return log density function to user
    return fy

d_PIG_np(x=np.arange(10), mu=np.array([2]), sigma=np.array([2]))

array([-1.        , -1.40546511, -1.99325177, -2.54181772, -3.0204051 ,
       -3.43882735, -3.81108203, -4.14828042, -4.45842434, -4.74722951])

In [16]:
###################################################
## PyTorch implementation of Poisson Inverse Gaussian Loss/Density Function
## Function is basically a Numpy implementation of Rigby et al gamlss.dist R Code (which calls tofyPIG2.c)
## https://github.com/cran/gamlss.dist/tree/master/src/tofyPIG2.c
###################################################
def d_PIG_th(x, mu, sigma): 
    ## Determine length of data vector and parameters 
    ly = int(torch.max(torch.Tensor([len(x), len(mu), len(sigma)])).item())
    #x = np.repeat(a=x, repeats=ly)      
    nsigma = sigma.repeat(ly)
    nmu = mu.repeat(ly)
    ## Initial vectors to store computed PIG density values
    ny = int(len(x))
    maxyp1 = torch.max(x) + 1
    tofY = torch.zeros(maxyp1)
    sumlty = torch.zeros(ly)
    ## Big for loop to compute PIG density (or log-density)
    ## This is directly from Rigby et al: tofyPIG2.c code.
    for i in torch.arange(1, ny+1):
        iy = x[i.item()-1] + 1
        tofY[0] = nmu[i.item()-1] * ((1 + 2*nsigma[i.item()-1]*nmu[i.item()-1])**(-0.5))
        sumT = torch.Tensor([0]) 
        ## Start inner loop to compute rest of PIG density
        if (x[i.item()-1]==0):
            sumT = torch.Tensor([0])
        else:
            for j in torch.arange(1, iy):
                tofY[j.item()] = ((nsigma[i.item()-1] * ((2*(j.item())-1)/nmu[i.item()-1])) + (1/tofY[j.item()-1])) * ((tofY[0])**2)
                sumT = sumT + np.log(tofY[j.item()-1])
        sumlty[i.item()-1] = sumT
    ## Add the kernel of the PIG density back to other constant component
    logfy = -torch.lgamma(x+1) + (1 - torch.sqrt(1 + 2*sigma*mu))/sigma + sumlty
    ## Return log density function to user
    return logfy

d_PIG_th(x=torch.arange(10), mu=torch.Tensor([2]), sigma=torch.Tensor([2]))

tensor([-1.0000, -1.4055, -1.9933, -2.5418, -3.0204, -3.4388, -3.8111, -4.1483,
        -4.4584, -4.7472])

In [9]:
## WARNING: read me... 
## Below is user-defined R implementation of PIG (Poisson Inverse Gaussian) density
## We also compare against gamlss.dist::dPIG() code rolled out in gamlss.dist package
## Documentation on page 197: http://www.gamlss.com/wp-content/uploads/2013/01/gamlss-manual.pdf
## You will see that above Numpy and PyTorch implementations agree with R output (up to many decimal places)

In [10]:
'''
> library(gamlss.dist)
>
> ###################################################
> ## User-defined R code for PIG (Poisson Inverse Gaussian) Distribution
> ###################################################
> d_PIG <- function(x, mu=1, sigma=1 , log=FALSE) { 
+ ## Warning messages on paramter and data space constraint violations
+     if (any(mu <= 0) )  stop(paste("mu must be greater than 0 ", "\n", "")) 
+     if (any(sigma <= 0) )  stop(paste("sigma must be greater than 0 ", "\n", "")) 
+     if (any(x < 0) )  stop(paste("x must be >=0", "\n", ""))  
+     ## Determine length of data vector and parameters 
+     ly <- max(length(x), length(mu), length(sigma)) 
+     x <- rep(x, length=ly)      
+     nsigma <- rep(sigma, length=ly)
+     nmu <- rep(mu, length=ly)
+     ## Initial vectors to store computed PIG density values
+     ny <- as.integer(length(x))
+     maxyp1 <- max(x) + 1
+     tofY <- rep(NA_real_, maxyp1)
+     sumlty <- rep(NA_real_, ly)
+     ## Big for loop to compute PIG density (or log-density)
+     ## This is directly from Rigby et al: tofyPIG2.c code.
+     ## I **think** it looks like its implementing recursive mixed-Pois prob calc
+     ## This is likely why (for large vectors, with large counts) that is done in C
+     for (i in 1:ny) {
+     iy <- x[i] + 1
+     tofY[1] <- nmu[i] * ((1 + 2*nsigma[i]*nmu[i])^(-0.5))
+     sumT <- 0 
+     ## Start inner loop to compute rest of PIG density
+     if (x[i]==0) {
+     sumT <- 0
+     } else {
+     for (j in 1:(iy-1)) {
+     tofY[j + 1] <- ((nsigma[i] * ((2*(j)-1)/nmu[i])) + (1/tofY[j])) * ((tofY[1])^2)
+     sumT <- sumT + log(tofY[j])
+     }
+     }
+     sumlty[i] <- sumT
+     }
+     ## Add the kernel of the PIG density back to other constant component
+     logfy <- -lgamma(x+1) + (1 - sqrt(1 + 2*sigma*mu))/sigma + sumlty
+     ## log={T,F} flag: T=return log-density; F=return density
+     if(log==FALSE) {
+     fy <- exp(logfy)
+     } else {
+     fy <- logfy
+     }
+     ## Return log density function to user
+     return(fy)
+ }
 
## User defined R implementation of PIG distribution
> d_PIG(x=0:9, mu=2, sigma=2, log=TRUE)
 [1] -1.000000 -1.405465 -1.993252 -2.541818 -3.020405 -3.438827 -3.811082 -4.148280 -4.458424 -4.747230

## gamlss.dist implementation of PIG distribution
> dPIG(x=0:9, mu=2, sigma=2, log=TRUE)
 [1] -1.000000 -1.405465 -1.993252 -2.541818 -3.020405 -3.438827 -3.811082 -4.148280 -4.458424 -4.747230 
'''
;

''

In [11]:
##############################################
## PIG Model - try to learn MLE of fish count data; via AutoGrad/SGD implementation in PyTorch
############################################## 

In [12]:
## Instantiate data tensor, and variable for (binomial) model parameters
x = torch.autograd.Variable(torch.from_numpy(dat.fish.to_numpy())).type(torch.FloatTensor)
l_mu = torch.autograd.Variable(torch.rand(1), requires_grad=True)
l_sigma = torch.autograd.Variable(torch.rand(1), requires_grad=True) 

In [13]:
def pig_nll(x, mu, sigma): 
    ## Determine length of data vector and parameters 
    ly = int(torch.max(torch.Tensor([len(x), len(mu), len(sigma)])).item())
    #x = np.repeat(a=x, repeats=ly)      
    nsigma = sigma.repeat(ly)
    nmu = mu.repeat(ly)
    ## Initial vectors to store computed PIG density values
    ny = int(len(x))
    maxyp1 = x.max().item() + 1
    tofY = torch.zeros(int(maxyp1))
    sumlty = torch.zeros(ly)
    ## Big for loop to compute PIG density (or log-density)
    ## This is directly from Rigby et al: tofyPIG2.c code.
    ## I **think** it looks like its implementing recursive mixed-Pois prob calc
    ## This is likely why (for large vectors, with large counts) that is done in C
    for i in torch.arange(1, ny+1, dtype=torch.int32):
        iy = x[i.item()-1] + 1
        tofY[0] = nmu[i.item()-1] * ((1 + 2*nsigma[i.item()-1]*nmu[i.item()-1])**(-0.5))
        sumT = torch.Tensor([0]) 
        ## Start inner loop to compute rest of PIG density
        if (x[i.item()-1]==0):
            sumT = torch.Tensor([0])
        else:
            for j in torch.arange(1, iy, dtype=torch.int32):
                tofY[j.item()] = ((nsigma[i.item()-1] * ((2*(j.item())-1)/nmu[i.item()-1])) + (1/tofY[j.item()-1])) * ((tofY[0])**2)
                sumT = sumT + torch.log(tofY[j.item()-1])
        sumlty[i.item()-1] = sumT
    ## Add the kernel of the PIG density back to other constant component
    logfy = -torch.lgamma(x+1) + (1 - torch.sqrt(1 + 2*sigma*mu))/sigma + sumlty
    ## Return neg log lik to user
    nll = -torch.sum(logfy)
    return nll

In [14]:
# torch.autograd.set_detect_anomaly(True)

## Learning rate
learning_rate_mu = 2e-5
learning_rate_sigma = 2e-5

## Training loop
for t in range(25000):
    ## Backprop on negative log likelihood loss
    NLLpig = pig_nll(x=x, mu=l_mu, sigma=l_sigma) 
    NLLpig.backward()
    ## Logging to console
    if t % 1000 == 0:
        print("Iteration = ", t, 
              "loglik  =", NLLpig.data.numpy(), 
              "lmu =", l_mu.data.numpy(), 
              "lsigma =", l_sigma.data.numpy(),  
              "dL/dlmu = ", l_mu.grad.data.numpy(), 
              "dL/dlsigma = ", l_sigma.grad.data.numpy())
    ## SGD update of parms
    l_mu.data -= learning_rate_mu * l_mu.grad.data
    l_sigma.data -= learning_rate_sigma * l_sigma.grad.data
    ## Zero the gradients
    l_mu.grad.data.zero_()
    l_sigma.grad.data.zero_()
    

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor []], which is output 0 of SelectBackward, is at version 2992; expected version 2991 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).

In [15]:
########################
## Print session info to console 
########################
sinfo()

-----
numpy       1.20.3
pandas      1.3.1
sinfo       0.3.1
torch       1.9.0
-----
IPython             7.26.0
jupyter_client      6.1.12
jupyter_core        4.7.1
jupyterlab          3.1.7
notebook            6.4.3
-----
Python 3.9.6 (default, Aug 18 2021, 15:44:49) [MSC v.1916 64 bit (AMD64)]
Windows-10-10.0.19042-SP0
8 logical CPU cores, Intel64 Family 6 Model 126 Stepping 5, GenuineIntel
-----
Session information updated at 2021-08-20 16:51
