## Poisson Regression of NINO3.4, MDR SSTA, and QBO

#### Import packages

In [138]:
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
from statsmodels.genmod.generalized_estimating_equations import GEE
from statsmodels.genmod.cov_struct import (Exchangeable,
    Independence,Autoregressive)
from statsmodels.genmod.families import Poisson

In [137]:
#!conda install statsmodels

Solving environment: done


  current version: 4.5.1
  latest version: 4.5.2

Please update conda by running

    $ conda update -n base conda



# All requested packages already installed.



#### Read in data from netCDF

In [139]:
ds = xr.open_dataset('/data/jennie/Pobs_start_season_1950_2016.nc')

In [140]:
starts = ds.starts

In [141]:
starts1 = ds.sel(X=-50,Y=14)


In [143]:
starts1a=starts1.starts.values

In [144]:
dMDR=xr.open_dataset('/data/jennie/Pkaplan_MDR_season_1950_2016.nc')

In [145]:
MDR=dMDR.MDR.values

In [146]:
dNINO3p4=xr.open_dataset('/data/jennie/Pkaplan_NINO3p4_season_1950_2016.nc')

In [147]:
NINO3p4=dNINO3p4.NINO3p4.values

In [148]:
dQBO=xr.open_dataset('/data/jennie/Pncep_QBO30mb_season_1950_2016.nc')

In [149]:
QBO=dQBO.QBO.values

In [150]:
subject= np.ones(67)

In [151]:
data = {'starts': starts1a,
        'MDR': MDR,
        'NINO3p4': NINO3p4,
         'QBO': QBO,'subject': subject}


In [152]:
y = list(range(0,67))

In [153]:
df = pd.DataFrame(data, index=[y])
df


Unnamed: 0,MDR,NINO3p4,QBO,starts,subject
0,-0.189863,-0.769151,-5.722588,0.0,1.0
1,0.114655,0.408132,-4.862255,0.0,1.0
2,0.237360,-0.248826,-6.171616,0.0,1.0
3,0.125122,0.368024,-1.336268,0.0,1.0
4,-0.242273,-0.635435,-13.017033,0.0,1.0
5,0.106813,-0.956393,6.063123,0.0,1.0
6,-0.260626,-0.740760,-14.943699,0.0,1.0
7,0.080522,0.677707,5.854491,0.0,1.0
8,0.361595,0.153841,-15.525922,0.0,1.0
9,-0.242420,-0.381926,5.698317,0.0,1.0


In [155]:
fam = Poisson()
ind = Independence()
model1 = GEE.from_formula("starts ~ MDR + NINO3p4 + QBO", "subject", df, cov_struct=ind, family=fam)
result1 = model1.fit()
print(result1.summary())


                               GEE Regression Results                              
Dep. Variable:                      starts   No. Observations:                   67
Model:                                 GEE   No. clusters:                        1
Method:                        Generalized   Min. cluster size:                  67
                      Estimating Equations   Max. cluster size:                  67
Family:                            Poisson   Mean cluster size:                67.0
Dependence structure:         Independence   Num. iterations:                     8
Date:                     Tue, 08 May 2018   Scale:                           1.000
Covariance type:                    robust   Time:                         19:40:38
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -2.9003   3.85e-16  -7.54e+15      0.000      -2.900      -2.900
MDR    

In [156]:
result1.scale

1.0

#### 1.0 the variance is equal to the mean - perfect!

In [157]:
result1.params

Intercept   -2.900295
MDR         -0.426472
NINO3p4     -0.287436
QBO         -0.007589
dtype: float64

In [158]:
result1.params.MDR

-0.4264715922028683

In [166]:
result1

<statsmodels.genmod.generalized_estimating_equations.GEEResultsWrapper at 0x7f115f185908>