In [1]:
###############################################
## PyTorch Poisson Distribution Likelihood Optimization Examples
## Author: Chris Meaney
## Date: August 2021
###############################################

In [2]:
## Dependency modules
import numpy as np
import pandas as pd
import torch
from sinfo import sinfo

In [3]:
##########################################################
## Use pandas to import data, and store as data.frame
## Data are 1) response/target variable (number of fish = count random variable), 2) lake size (single continous feature/predictor)
##########################################################
dat = pd.read_csv('C://Users//ChristopherMeaney//Desktop//PyTorch_Stuff//pytorch_count_dists//species.csv', encoding='latin1')
dat.head(n=15)

Unnamed: 0,fish,lake,x,scale_x
0,10,5,1.609438,-1.53343
1,37,41,3.713572,-0.901903
2,60,171,5.141664,-0.473281
3,113,25719,10.154985,1.031399
4,99,59596,10.995344,1.283621
5,13,1,0.0,-2.016481
6,30,44,3.78419,-0.880708
7,114,58016,10.968474,1.275556
8,112,19477,9.87699,0.947962
9,17,10,2.302585,-1.325392


In [4]:
## Describe the data
dat.fish.describe()

count     70.000000
mean      41.742857
std       47.849609
min        5.000000
25%       14.000000
50%       21.500000
75%       47.500000
max      245.000000
Name: fish, dtype: float64

In [5]:
mu_ = dat.fish.mean()
mu_

41.74285714285714

In [6]:
sigma_ = np.sqrt(dat.fish.var())
sigma_

47.84960912241293

In [7]:
################################################
## Poisson Model
################################################

In [8]:
## Instantiate data tensor, and variable for (Poisson) model parameters
x = torch.autograd.Variable(torch.from_numpy(dat.fish.to_numpy())).type(torch.FloatTensor)
l_mu = torch.autograd.Variable(torch.rand(1), requires_grad=True) 
## Log scale mean parm
## ---Note: I think the estimates might be slightly diff, if est unconstrained vs. constrained (log-link)
## ---Note: Re-parameterizing to log-scale seems to accelerate convergence

In [9]:
def poisson_nll(x, log_mu):
    nll = -torch.sum(-torch.exp(log_mu) + x*torch.log(torch.exp(log_mu)) - torch.lgamma(x))
    return nll

In [10]:
## Learning rate
learning_rate = 2e-4

## Training loop
for t in range(100):
    ## Backprop on negative log likelihood loss
    NLLp = poisson_nll(x=x, log_mu=l_mu)
    NLLp.backward()
    ## Logging to console
    if t % 10 == 0:
        print("Iteration = ", t, 
              "loglik  =", NLLp.data.numpy(), 
              "l_mu =", l_mu.data.numpy(), 
              "dL/dlmu = ", l_mu.grad.data.numpy())
    ## SGD update of parms
    l_mu.data -= learning_rate * l_mu.grad.data
    ## Zero the gradients
    l_mu.grad.data.zero_()
    

Iteration =  0 loglik  = 8156.642 l_mu = [0.41156012] dL/dlmu =  [-2816.3582]
Iteration =  10 loglik  = 1272.9039 l_mu = [3.7072985] dL/dlmu =  [-69.94849]
Iteration =  20 loglik  = 1272.0527 l_mu = [3.7315245] dL/dlmu =  [-0.01147461]
Iteration =  30 loglik  = 1272.0531 l_mu = [3.7315283] dL/dlmu =  [0.]
Iteration =  40 loglik  = 1272.0531 l_mu = [3.7315283] dL/dlmu =  [0.]
Iteration =  50 loglik  = 1272.0531 l_mu = [3.7315283] dL/dlmu =  [0.]
Iteration =  60 loglik  = 1272.0531 l_mu = [3.7315283] dL/dlmu =  [0.]
Iteration =  70 loglik  = 1272.0531 l_mu = [3.7315283] dL/dlmu =  [0.]
Iteration =  80 loglik  = 1272.0531 l_mu = [3.7315283] dL/dlmu =  [0.]
Iteration =  90 loglik  = 1272.0531 l_mu = [3.7315283] dL/dlmu =  [0.]


In [11]:
## Final estimate of Poisson mean parm
[l_mu.data.numpy(), np.exp(l_mu.data.numpy())]

[array([3.7315283], dtype=float32), array([41.742855], dtype=float32)]

In [12]:
## Compare against "intercept only" Poisson regression model

In [13]:
'''
> ## Poisson model
> library(gamlss)

> R_PO <- gamlss(fish ~ 1, data = species, family = "PO")
GAMLSS-RS iteration 1: Global Deviance = 3006.626 
GAMLSS-RS iteration 2: Global Deviance = 3006.626 
> summary(R_PO)
******************************************************************
Family:  c("PO", "Poisson") 

Call:  gamlss(formula = fish ~ 1, family = "PO", data = species) 

Fitting method: RS() 

------------------------------------------------------------------
Mu link function:  log
Mu Coefficients:
            Estimate Std. Error t value            Pr(>|t|)    
(Intercept)   3.7315     0.0185   201.7 <0.0000000000000002 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

------------------------------------------------------------------
No. of observations in the fit:  70 
Degrees of Freedom for the fit:  1
      Residual Deg. of Freedom:  69 
                      at cycle:  2 
 
Global Deviance:     3006.626 
            AIC:     3008.626 
            SBC:     3010.875 
******************************************************************
'''
;

''

In [14]:
################################
## Extend above example to Poisson regression
## y=Fish-count; x=Lake-size ==> y ~ b0 + b1*x
## Goal is to estimate Poisson regression parms: {b0, b1}
################################

In [15]:
## Instantiate data tensor, and variable for (Poisson) model parameters
x = torch.autograd.Variable(torch.from_numpy(dat.scale_x.to_numpy())).type(torch.FloatTensor)
y = torch.autograd.Variable(torch.from_numpy(dat.fish.to_numpy())).type(torch.FloatTensor)
b0 = torch.autograd.Variable(torch.rand(1), requires_grad=True) 
b1 = torch.autograd.Variable(torch.rand(1), requires_grad=True) 

In [16]:
def poisson_nll(x, y, b0, b1):
    nll = -torch.sum(-torch.exp(b0 + b1*x) + y*torch.log(torch.exp(b0 + b1*x)) - torch.lgamma(y))
    return nll

In [17]:
## Learning rate
learning_rate_b0 = 2e-4
learning_rate_b1 = 2e-4

## Training loop
for t in range(200):
    ## Backprop on negative log likelihood loss
    NLLp = poisson_nll(x=x, y=y, b0=b0, b1=b1)
    NLLp.backward()
    ## Logging to console
    if t % 10 == 0:
        print("Iteration = ", t, 
              "loglik  =", NLLp.data.numpy(), 
              "b0 =", b0.data.numpy(), 
              "b1 =", b1.data.numpy(), 
              "dL/db0 = ", b0.grad.data.numpy(),
              "dL/db1 = ", b1.grad.data.numpy()
             )
    ## SGD update of parms
    b0.data -= learning_rate_b0 * b0.grad.data
    b1.data -= learning_rate_b1 * b1.grad.data
    ## Zero the gradients
    b0.grad.data.zero_()
    b1.grad.data.zero_()
    

Iteration =  0 loglik  = 7293.042 b0 = [0.7016194] b1 = [0.0300743] dL/db0 =  [-2780.746] dL/db1 =  [-1713.5363]
Iteration =  10 loglik  = 762.66534 b0 = [3.312585] b1 = [0.8754502] dL/db0 =  [-229.77246] dL/db1 =  [207.63013]
Iteration =  20 loglik  = 717.02075 b0 = [3.5063746] b1 = [0.6965788] dL/db0 =  [-16.412598] dL/db1 =  [15.612061]
Iteration =  30 loglik  = 716.8185 b0 = [3.5186248] b1 = [0.684908] dL/db0 =  [-0.86694336] dL/db1 =  [0.82788086]
Iteration =  40 loglik  = 716.8178 b0 = [3.5192666] b1 = [0.68429524] dL/db0 =  [-0.04492188] dL/db1 =  [0.04321289]
Iteration =  50 loglik  = 716.8179 b0 = [3.5192997] b1 = [0.6842634] dL/db0 =  [-0.00268555] dL/db1 =  [0.00219727]
Iteration =  60 loglik  = 716.81775 b0 = [3.5193014] b1 = [0.68426186] dL/db0 =  [-0.00048828] dL/db1 =  [0.00012207]
Iteration =  70 loglik  = 716.81775 b0 = [3.5193014] b1 = [0.68426186] dL/db0 =  [-0.00048828] dL/db1 =  [0.00012207]
Iteration =  80 loglik  = 716.81775 b0 = [3.5193014] b1 = [0.68426186] dL/

In [18]:
## Final estimate of Poisson regression parms: {b0,b1}
[b0.data.numpy(), b1.data.numpy()]

[array([3.5193014], dtype=float32), array([0.68426186], dtype=float32)]

In [19]:
## Compare above Poisson regression estimates obtained from Poisson SGD; against those from GAMLSS package 
## Note: we NEED to scale the covariate/feature vector "x" (lake-size), in order to obtain reasonable parm estimates (convergence, etc.)

In [20]:
'''
> ## Poisson regression model
> library(gamlss)

> R_PO <- gamlss(fish ~ scale_x, data = species, family = "PO")
GAMLSS-RS iteration 1: Global Deviance = 1896.156 
GAMLSS-RS iteration 2: Global Deviance = 1896.156 
> summary(R_PO)
******************************************************************
Family:  c("PO", "Poisson") 

Call:  gamlss(formula = fish ~ scale_x, family = "PO", data = species) 

Fitting method: RS() 

------------------------------------------------------------------
Mu link function:  log
Mu Coefficients:
            Estimate Std. Error t value            Pr(>|t|)    
(Intercept)  3.51930    0.02256  155.97 <0.0000000000000002 ***
scale_x      0.68426    0.02198   31.14 <0.0000000000000002 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

------------------------------------------------------------------
No. of observations in the fit:  70 
Degrees of Freedom for the fit:  2
      Residual Deg. of Freedom:  68 
                      at cycle:  2 
 
Global Deviance:     1896.156 
            AIC:     1900.156 
            SBC:     1904.653 
******************************************************************
'''
;

''

In [21]:
########################
## Print session info to console 
########################
sinfo()

-----
numpy       1.20.3
pandas      1.3.1
sinfo       0.3.1
torch       1.9.0
-----
IPython             7.26.0
jupyter_client      6.1.12
jupyter_core        4.7.1
jupyterlab          3.1.7
notebook            6.4.3
-----
Python 3.9.6 (default, Aug 18 2021, 15:44:49) [MSC v.1916 64 bit (AMD64)]
Windows-10-10.0.19042-SP0
8 logical CPU cores, Intel64 Family 6 Model 126 Stepping 5, GenuineIntel
-----
Session information updated at 2021-08-21 01:18
