In [100]:
import numpy as np
import pandas as pd
import scipy.stats.mstats as sci
from scipy.stats import norm

class zipModel(object):
    
    def __init__(self):
        
        self.converged = False
        self.iters = 0;                   # number of iterations for convergence
        self.loglike = 0;                 # log-likelihood
        
        self.coeff_count = np.array([0])  # coefficient vector for count model
        self.coeff_zero = np.array([0])   # coefficient vector for zero-inflation model
        self.residuals = np.array([0])    # function needs to be implemented
        self.vcov = 0
        
        # string vars
        self.var_names_count = ['']       # variable names, intercept in feat_names[0]
        self.var_names_zero = ['']        # var name for zero-inflated variable
        self.response_name = ''           # name of the response variable
        
        self.call = ''       
        
        ### continue adding variables relevant to zip model
        
    def covar(self):
        COUNT_STR = "ct_"
        ZERO_STR = "0_"

        var_names = [COUNT_STR + s for s in self.var_names_count]
        var_names += [ZERO_STR + s for s in self.var_names_zero]

        A = np.round(self.vcov, 5)
        names = var_names
        df = pd.DataFrame(A, index=names, columns=names)
        print(df)

    
    
    def printModel(self):
        MODEL1_HEADER = "Count model cefficients (poisson log link)"
        MODEL2_HEADER = "Zero-inflation model coefficients (binomial with logit link)"
    
        p_count = self.coeff_count.shape[0];   # num of preds (+ intercept) for poisson count
        p_zero  = self.coeff_zero.shape[0];    # num of preds (+ intercept) for zero-inflated model
        
        features = " + ".join(self.var_names_count[1:])
        self.call = "zip(formula = " + self.response_name + " ~ " + \
                              features + " | " + self.var_names_zero[1] + ")"
        
        # print string representation of model
        print("\nCall:\n" + self.call + "\n")
    
        # part 1: poisson count model
        print(MODEL1_HEADER + "\n")
        for line in [self.var_names_count, self.coeff_count]:
            print(('{:>12}' * p_count).format(*line))
        print("\n")
        
        # part 2: logit model for predicting excess zeros
        print(MODEL2_HEADER + "\n")
        for line in [self.var_names_zero, self.coeff_zero]:
            print(('{:>12}' * p_zero).format(*line))
        print("\n")
    
    def summary(self):
        
        RESIDUAL_OUTPUT = "Pearson residuals:"
        MODEL1_HEADER = "Count model cefficients (poisson log link): "
        MODEL2_HEADER = "Zero-inflation model coefficients (binomial with logit link): "

        
        ## chunk 1: output call, formula
        print("\nCall:\n" + self.call + "\n")
        
        
        ## chunk 2: output pearson residuals -- residuals function still needs to be implemented
        # object$residuals = residuals(object, type = "pearson")
        resid_summ = np.round(sci.mquantiles(self.residuals, prob = [0, 0.25, 0.5, 0.75, 1.0]), 5)
        resid_str  = ['Min', '1Q', 'Median', '3Q','Max'];
        print(RESIDUAL_OUTPUT + '\n')
        for line in [resid_str, resid_summ]:
            print(('{:>10}' * len(resid_summ)).format(*line))
        print("\n")
        
        ## compute z statistics
        # kc   = length(object$coefficients$count)
        # kz   = length(object$coefficients$zero)
        # se   = sqrt(diag(object$vcov))
        # coef = c(object$coefficients$count, object$coefficients$zero) 
        
        kc = self.coeff_count.shape[0]       # number of coefficients in count model
        kz = self.coeff_zero.shape[0]        # number of coefficients in zip model


        # compute standard error for all coefficients (both models)
        se = np.sqrt(np.diagonal(self.vcov)) 

        # compute z statistics for both models
        z_count = self.coeff_count / se[0:kc]
        z_zip = self.coeff_zero / se[kc:]

        # compute p-values
        pval_count = 2 * norm.cdf(-np.abs(z_count));
        pval_zip = 2 * norm.cdf(-np.abs(z_zip));
        
        # format p-values for output
        pc_format = [0] * len(pval_count)
        pz_format = [0] * len(pval_zip)

        for i in np.arange(len(pval_count)):
            if pval_count[i] < 2e-16:
                pc_format[i] = str("<2e-16")
            else:
                pval_count[i] = str(round(pval_count[i], 5))

        for i in np.arange(len(pval_zip)):
            if pval_zip[i] < 2e-16:
                pz_format[i] = str("<2e-16")
            else:
                pz_format[i] = str(round(pval_zip[i], 5))
                
        ## chunk 3: output count model coefficients
        print(MODEL1_HEADER + "\n")
        coeff_label = ['', 'Estimate', 'Std. Error', 'z value', 'Pr(>|z|)'];
        data_count = [coeff_label] + list(zip(self.var_names_count, self.coeff_count, \
                                        np.round(se[0:kc], 4), np.round(z_count, 3), pc_format))

        for i, d in enumerate(data_count):
            line = '|'.join(str(x).rjust(12) for x in d)
            print(line)
            if i == 0:
                print('-' * len(line))

        print('\n')
                    
        ## chunk 4: output zero-inflation model coefficients
        print(MODEL2_HEADER + "\n")
        data_zero = [coeff_label] + list(zip(self.var_names_zero, self.coeff_zero, \
                                        np.round(se[kc:], 4), np.round(z_zip, 3), pz_format))
        for i, d in enumerate(data_zero):
            line = '|'.join(str(x).rjust(12) for x in d)
            print(line)
            if i == 0:
                print('-' * len(line))
        print('\n')
        print('---')
        
        ## chunk 5: Number of iterations, log-likelihood
        print("Number of iterations in BFGS optimization: " + str(self.iters));
        print("Log-likelihood: " + str(self.loglike))
        
        

In [108]:
zip1 = zipModel();

zip1.coeff_count = np.array([1.598, -1.043, 0.834]);
zip1.coeff_zero = np.array([1.2974, -0.5643])

zip1.var_names_count = ['(Intercept)', 'child', 'camper']
zip1.var_names_zero = ['(Intercept)', 'persons']
zip1.response_name = 'count';

zip1.residuals = [-1.2369, -0.7540, -0.6080, -0.1921, 24.0847];


zip1.vcov = np.matrix('0.0073167777 -0.0007435476 -0.0071897373 -0.0012913251  0.0011246277; \
                      -0.0007435476  0.0099976511 -0.0005546518 -0.0077196665  0.0057985617; \
                      -0.0071897373 -0.0005546518  0.0087659761  0.0020388830 -0.0016532783; \
                      -0.0012913251 -0.0077196665  0.0020388830  0.1397655073 -0.0551097454; \
                       0.0011246277  0.0057985617 -0.0016532783 -0.0551097454  0.0265571924')



print('\n\nTesting print() function output')
zip1.printModel()
print('----------------------------------------------------------')

print('\n\nTesting summary() function output')
zip1.summary()
print('\n')
print('----------------------------------------------------------')

print('\n\nTesting covar() function output\n')
zip1.covar()
print('----------------------------------------------------------')




Testing print() function output

Call:
zip(formula = count ~ child + camper | persons)

Count model cefficients (poisson log link)

 (Intercept)       child      camper
       1.598      -1.043       0.834


Zero-inflation model coefficients (binomial with logit link)

 (Intercept)     persons
      1.2974     -0.5643


----------------------------------------------------------


Testing summary() function output

Call:
zip(formula = count ~ child + camper | persons)

Pearson residuals:

       Min        1Q    Median        3Q       Max
   -1.2369  -0.89887    -0.608   7.09094   24.0847


Count model cefficients (poisson log link): 

            |    Estimate|  Std. Error|     z value|    Pr(>|z|)
----------------------------------------------------------------
 (Intercept)|       1.598|      0.0855|      18.682|      <2e-16
       child|      -1.043|         0.1|     -10.431|      <2e-16
      camper|       0.834|      0.0936|       8.908|      <2e-16


Zero-inflation model coeffic