# Second order training of (deep) generalized linear model

## Introduction to GLM


### Model for count observations

## Example of linear poisson regression with artifical data

In [None]:
def towdir(s):
    return (str('./datasets_book/'+s))

import deepglmlib.utils as utils
import numpy as np

In [None]:
import importlib
importlib.reload(utils)

In [None]:
import gc
gc.collect()

In [None]:
import psutil
memory = psutil.virtual_memory()
print(f" Memory used      : {memory.percent} %\n",
      f"Memory available : { round(memory.free / (1024.0 ** 3),2)} GB")

In [None]:
!nvidia-smi

### Dataset

The datasets are loaded as follows.

In [None]:
n  = 5000 #number of rows
p1 = 7 #with intercept
p  = p1-1 #number of vars

print(n, p1, p)

In [None]:
X_train   = np.loadtxt(towdir("poisson_n5000_d7_Xtrain.txt"))
X_test    = np.loadtxt(towdir("poisson_n5000_d7_Xtest.txt"))
y_train   = np.loadtxt(towdir("poisson_n5000_d7_ytrain.txt")).astype(np.int64)
y_test    = np.loadtxt(towdir("poisson_n5000_d7_ytest.txt")).astype(np.int64)
beta0     = np.loadtxt(towdir("poisson_n5000_d7_beta0.txt"))
mu0       = np.loadtxt(towdir("poisson_n5000_d7_mu0.txt"))
idx_test  = np.loadtxt(towdir("poisson_n5000_d7_idxtest.txt")).astype(np.int64)
idx_train = np.loadtxt(towdir("poisson_n5000_d7_idxtrain.txt")).astype(np.int64)

In [None]:
n_train, p_train = X_train.shape
n_test, p_test   = X_test.shape

### Poisson regression fitting with numpy

#### Checking the target variable

#### Training of the regression model

In [None]:
import statsmodels.api as stm
ols= stm.Poisson(y_train, X_train)
fit_ols_train = ols.fit()
olssumy= fit_ols_train.summary()

In [None]:
beta_stm = fit_ols_train.params
beta_stm = beta_stm.reshape((len(beta_stm),1))

## Poisson fitting with numpy and full batches or mini-batches

In [None]:
from scipy.special import gammaln

def f_poisson_logLik(beta,X,y,name=None):
    beta = beta.reshape(len(beta),1)
    y = y.reshape((len(y),1)).astype(np.float64)
    mu_hat  = np.exp(X @ beta) #.ravel()
    logL    = np.sum(y * np.log(mu_hat) - mu_hat - gammaln(y+1))
    if name is not None: print(name+"=",np.round(logL,4))
    return logL

In [None]:
def f_mu_mse_cor_poisson(X,y,fit,mu0,isprint=None):
    beta = fit["beta"]
    algo = fit["algo"]
    mu_hat  = np.exp(X @ beta).ravel()
    mse_mu_hat = ( (mu_hat-mu0.ravel())**2 ).mean()
    cor_mu_hat = np.corrcoef(mu0.ravel(),mu_hat)[0,1]
    logLik = f_poisson_logLik(beta,X,y)
    
    if isprint is not None:
        print(str("mse_mu_"+algo+"="),np.round(mse_mu_hat,4), 
              #str("cor(cor_mu_"+namethod+",mu)="),np.round(cor_mu_hat,4),
              str("logLik_"+algo+"="),np.round(logLik,4))
    
    return {"mu":mu_hat, "msemu":mse_mu_hat, 
            "cormu":cor_mu_hat, "logL":logLik,
            "fit":fit}

## Poisson training with pytorch

### First order procedure

In [None]:
import deepglmlib.utils as utils

In [None]:
import torch.nn as nn
import torch
import copy

In [None]:
X_train = X_train[0:500,:]
y_train = y_train[0:500]
X_test  = X_test[0:500,:]
y_test  = y_test[0:500]

idx_train = idx_train[0:500]
idx_test = idx_test[0:500]

In [None]:
from torch.utils.data import DataLoader, TensorDataset

dt_train = TensorDataset( torch.from_numpy(X_train[:,1:].astype(np.float32)), 
                          torch.from_numpy(y_train.astype(np.float32)) )

dt_test  = TensorDataset( torch.from_numpy(X_test[:,1:].astype(np.float32)), 
                          torch.from_numpy(y_test.astype(np.float32)) )

batch_size= 8
dl_train = DataLoader(dt_train, batch_size= batch_size, shuffle=False,num_workers=1)
dl_test  = DataLoader(dt_test, batch_size= batch_size, shuffle=False,num_workers=1)
n_train, p_train = dl_train.dataset.tensors[0].shape
n_test, p_test   = dl_test.dataset.tensors[0].shape

print(n_train, p_train, n_test, p_test)

In [None]:
def f_update_model(model,loss,optimizer,device,b=None,Xb=None,yb=None):
    alpha_t = next(iter(optimizer.param_groups))['lr'] #here constant!
    for p in iter(model.parameters()):
        p.grad[p.grad>2] = 2
        p.grad[p.grad<-2] = -2
        p.data = p.data - alpha_t * p.grad

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

name_model = "PoissonRegression"
nbmax_epoqs = 80
debug_out   = 1
alpha_t     = 0.0001

layers = []
layers.append(nn.Linear(p_train,1, bias=True))

resus_gdth = utils.f_train_my_glm(dl_train, dl_test, layers, name_model,                   
               nbmax_epoqs=nbmax_epoqs, debug_out=debug_out, 
               alpha_t=alpha_t, transform_yb = utils.transform_yb,
               transform_yhatb = utils.transform_yhatb, device=device, 
               update_model=f_update_model,printed=2,)

In [None]:
def fun_model2vector(model):
    beta = [p.detach().numpy().ravel() 
               for p in model.parameters()]
    beta = [beta[(i + 1) % len(beta)]
               for i, x in enumerate(beta)]
    beta = np.concatenate( beta, axis=0 )
    return beta

beta_gdth = fun_model2vector(resus_gdth["model"].to(torch.device("cpu"))).ravel()
beta_gdth = beta_gdth.reshape(len(beta_gdth),1)

fit_gdth         = {"beta":beta_gdth,"algo":"gdth"}
quali_gdth_test  = f_mu_mse_cor_poisson(X_test,y_test,fit_gdth,mu0[idx_test])
quali_gdth_train = f_mu_mse_cor_poisson(X_train,y_train,fit_gdth,mu0[idx_train])

logL_gdth_train = quali_gdth_train["logL"]
logL_gdth_test = quali_gdth_test["logL"]
print(f"logL_gdth_train= {logL_gdth_train:5.2f}")
print(f"logL_gdth_test = {logL_gdth_test:5.2f}")

In [None]:
import statsmodels.api as stm
ols= stm.Poisson(y_train, X_train)
fit_ols_train = ols.fit()
beta_stm = fit_ols_train.params
beta_stm = beta_stm.reshape((len(beta_stm),1))

fit_stm        = {"beta":beta_stm,"algo":"stm"}
quali_stm_test = f_mu_mse_cor_poisson(X_test,y_test,fit_stm,mu0[idx_test])
quali_stm_train = f_mu_mse_cor_poisson(X_train,y_train,fit_stm,mu0[idx_train])

logL_stm_train = quali_stm_train["logL"]
logL_stm_test = quali_stm_test["logL"]
print(f"logL_stm_train = {logL_stm_train:5.2f}")
print(f"logL_stm_test  = {logL_stm_test:5.2f}")

### Example of training with a pytorch optimizer at second order

In [None]:
lambda_l1 = 0.01
def loss_yy_model(lossb,model):
    lossb_b_rg = lossb
    lossb_b_l1 = (torch.abs(list(model.parameters())[0])+0.000001).sum()
    #lossb_b_l1 = (lossb_b_l1 + torch.abs(list(model.parameters())[1])+0.000001).sum() 
    loss_b = lossb_b_rg + lambda_l1 * lossb_b_l1
    return loss_b

In [None]:
dl_train = DataLoader(dt_train, batch_size= 32, shuffle=False,num_workers=1)
dl_test  = DataLoader(dt_test, batch_size= 32, shuffle=False,num_workers=1)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

name_model = "PoissonRegression"
nbmax_epoqs = 8
debug_out   = 1
alpha_t     = 0.001

layers_regress = []
layers_regress.append(nn.Linear(p_train,1, bias=True))

resus_lbfgsth = utils.f_train_my_glm(dl_train, dl_test, layers_regress, name_model,                   
               nbmax_epoqs=nbmax_epoqs, debug_out=debug_out, 
               alpha_t=alpha_t, transform_yb = utils.transform_yb,
               transform_yhatb = utils.transform_yhatb, device=device, 
               update_model=f_update_model,printed=1,
               name_optimizer="LBFGS", nbmax_iter_lbgs=30,
               loss_yy_model=loss_yy_model)

In [None]:
beta_lbfgsth = fun_model2vector(resus_lbfgsth["model"].to(torch.device("cpu"))).ravel()
beta_lbfgsth = beta_lbfgsth.reshape(len(beta_lbfgsth),1)

fit_lbfgsth         = {"beta":beta_lbfgsth,"algo":"lbfgsth"}
quali_lbfgsth_test  = f_mu_mse_cor_poisson(X_test,y_test,fit_lbfgsth,mu0[idx_test])
quali_lbfgsth_train = f_mu_mse_cor_poisson(X_train,y_train,fit_lbfgsth,mu0[idx_train])

logL_lbfgsth_train = quali_lbfgsth_train["logL"]
logL_lbfgsth_test = quali_lbfgsth_test["logL"]
print(f"logL_lbfgsth_train = {logL_lbfgsth_train:5.2f}")
print(f"logL_lbfgsth_test  = {logL_lbfgsth_test:5.2f}")

In [None]:
import pandas as pd

method_s = ["gd-mb-torch", 
            "lbfgs-mb-torch", 
            "stm (module)"]

logLik_s = [quali_gdth_test["logL"],
            quali_lbfgsth_test["logL"],
            quali_stm_test["logL"]]

mse_mu_s = [quali_gdth_test["msemu"], 
            quali_lbfgsth_test["msemu"],
            quali_stm_test["msemu"]]

nbstep_s = [resus_gdth["tmax"], 
            resus_lbfgsth["tmax"], 4]

n_train_s = [n_train,n_train,n_train]
n_test_s  = [n_test,n_test,n_test]
p_s       = [p_train,p_train,p_train]

results = [method_s, logLik_s, mse_mu_s, nbstep_s,
           n_train_s, n_test_s, p_s]

results_pd = pd.DataFrame(results).transpose()
results_pd.columns = ["algo", "logL_te", 
                      "mse(mu_hat,mu)_te", "nb_steps_tr",
                      "n_train", "n_test", "nb_vars"]

with pd.option_context('float_format', '{:.4f}'.format, 
                       'display.expand_frame_repr', False):
    print(results_pd.to_string(index=False))#, header=False