## Chapter 8: Generalized Linear Models

In [43]:
import pandas as pd
import numpy as np

from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf

import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

In [11]:
# Odds Ratio

In [12]:
0.1/(1-0.1) / (0.01/(1-0.01))

11.0

In [13]:
0.99/(1-0.99) / (0.9/(1-0.9))

10.999999999999988

In [14]:
# The ratio of probabilities

In [15]:
0.1/0.01

10.0

In [17]:
0.99/0.9

1.0999999999999999

---

In [21]:
df = pd.read_csv("bookprogs/bookdead.dat", sep=' ', header=None, index_col=[0])
df.head()

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,1,1,48,0,0,12,0,0,0
1,2,1,49,0,0,12,0,0,0
1,3,1,50,0,0,12,0,0,0
1,4,1,51,0,0,12,0,0,0
1,5,1,52,0,0,12,0,0,0


In [196]:
%%time
# Program for dichotomous probit model

# number of iteratios
itr = 5000

# data
x = df[np.arange(2,9)].values
y = df[9].values

# create variables, set values, and write out starting values
b = np.zeros((itr, 7))
vb = np.linalg.inv(x.T@x)
ch = np.linalg.cholesky(vb)

# write output to file and screen
with open('dprob_gibbs.csv', 'w+') as file:

    #begin MCMC simulation
    for i in range(1,itr):

        #simulate latent data from truncated normal distributions
        u = np.random.uniform(size=len(y))
        xb = x@b[i]
        
        cdf = stats.norm.cdf(0, loc=xb)
        ystar = stats.norm.ppf(q = y*(u + cdf) + u*(-1)**y*cdf, loc=xb, scale=x[:,0])

        #simulate beta vector from appropriate mvn
        b[i] = vb@x.T@ystar + np.random.normal(size=7).T@ch
            
        # Write to file
        file.write(str(i)+'\t'.join(['%f'%F for F in b[i]])+'\n')

        if (i%100==0):
            print(i,'\t'+'\t'.join(['%f'%F for F in b[i]]))

100 	-0.994585	0.003030	-0.002508	0.015980	-0.003556	0.007173	0.000492
200 	-0.900120	-0.022737	-0.007086	0.020795	0.005562	-0.122692	0.000467
300 	-0.991803	0.013172	0.008143	0.011771	-0.006887	0.126523	-0.000380
400 	-0.982592	0.013170	-0.028085	0.004544	-0.002148	0.069989	-0.000998
500 	-0.913366	0.003583	-0.025367	0.001723	0.000186	-0.002970	0.000642
600 	-0.938131	0.012775	-0.000488	-0.010602	-0.003202	-0.049367	0.001668
700 	-0.891301	-0.013826	-0.006620	-0.000188	0.002562	-0.017991	-0.000042
800 	-0.876646	0.012525	0.004599	-0.009599	-0.003301	-0.043253	0.001376
900 	-0.815088	0.000043	-0.007287	0.006313	-0.000498	0.037550	-0.000514
1000 	-0.898410	0.005594	-0.022471	0.012772	-0.001375	0.006654	0.000190
1100 	-0.956556	0.011706	-0.038807	0.006569	0.000536	-0.045363	0.001352
1200 	-0.895554	0.008292	-0.014510	0.007947	-0.004007	0.047546	0.000064
1300 	-0.924267	-0.011751	0.000286	-0.015026	0.005740	-0.051247	-0.000792
1400 	-1.037889	0.032979	-0.007221	-0.005448	-0.008583	0.10744

In [46]:
iplot([go.Scatter(y=b[:,1])], show_link=False)

In [51]:
hist_data = [b[:,1]]

group_labels = ['Draws']
colors = ['#333F44'] #, '#37AA9C']

# Create distplot
fig = ff.create_distplot(hist_data, group_labels, show_hist=False, colors=colors)

# Add title & plot
fig['layout'].update(title='$\\beta_{1}$', 
#                      yaxis = dict(domain = [0.28, 1]),
                     legend=dict(traceorder='normal'), 
                     showlegend=False)
iplot(fig, show_link=False)

---

In [193]:
%%time
# Program for dichotomous probit model

# number of iteratios
itr = 5000

# data
x = df[np.arange(2,9)].values
y = df[9].values

# create variables, set values, and write out starting values
b = np.zeros((itr, 7))
vb = np.linalg.inv(x.T@x)
ch = np.linalg.cholesky(vb)

# write output to file and screen
with open('dprob_gibbs.csv', 'w+') as file:

    #begin MCMC simulation
    for i in range(1,itr):

        #simulate latent data from truncated normal distributions
        u = np.random.uniform(size=len(y))
        xb = x@b[i]
        
        cdf = stats.norm.cdf(0,loc=xb)
        U = np.random.uniform(low=y*cdf, high=(1-y)*cdf)
        ystar = stats.norm.ppf(U, loc=xb)
#         ystar = stats.norm.ppf(q=y*u + u*(-1)**y*stats.norm.cdf(0, loc=xb, scale=x[:,0]) + 
#                     y*stats.norm.cdf(0, loc=xb, scale=1), loc=xb, scale=x[:,0])

        #simulate beta vector from appropriate mvn
        b[i] = vb@x.T@ystar + np.random.normal(size=7).T@ch
            
        # Write to file
        file.write(str(i)+'\t'.join(['%f'%F for F in b[i]])+'\n')

        if (i%100==0):
            print(i,'\t'+'\t'.join(['%f'%F for F in b[i]]))

100 	-0.763930	0.004596	-0.009509	0.018647	-0.001836	0.040822	-0.000208
200 	-0.752577	0.046491	0.003825	-0.016321	-0.015078	0.135247	0.001117
300 	-0.802813	-0.039278	-0.008731	0.006130	0.014510	-0.250626	0.000922
400 	-0.776554	0.027295	-0.009995	-0.002109	-0.009070	0.093275	0.000259
500 	-0.890219	0.019959	-0.000036	0.010672	-0.004813	0.148575	-0.000572
600 	-0.707323	-0.021157	0.005140	-0.000162	0.006834	-0.145446	0.000839
700 	-0.662959	-0.033795	-0.004190	-0.000615	0.011197	-0.190647	0.000475
800 	-0.858728	0.017232	0.009942	-0.021393	-0.005363	0.086175	-0.000210
900 	-0.753728	0.012787	-0.000886	-0.012778	-0.004696	0.009967	0.000235
1000 	-0.849933	0.024186	-0.008102	-0.008731	-0.005452	0.085175	0.000112
1100 	-0.810077	0.025821	0.016339	-0.030764	-0.008040	0.109061	-0.000134
1200 	-0.823209	-0.007555	0.012839	-0.009890	0.003491	-0.029392	-0.000472
1300 	-0.859863	0.024162	0.015092	-0.009253	-0.007450	0.142048	-0.000773
1400 	-0.909584	0.013350	0.004314	0.010761	-0.003768	-0.007

In [194]:
iplot([go.Scatter(y=b[:,1])], show_link=False)

In [195]:
hist_data = [b[:,1]]

group_labels = ['Draws']
colors = ['#333F44'] #, '#37AA9C']

# Create distplot
fig = ff.create_distplot(hist_data, group_labels, show_hist=False, colors=colors)

# Add title & plot
fig['layout'].update(title='$\\beta_{1}$', 
#                      yaxis = dict(domain = [0.28, 1]),
                     legend=dict(traceorder='normal'), 
                     showlegend=False)
iplot(fig, show_link=False)

In [170]:
xs = np.linspace(-3,5, 100)
trace = go.Histogram(x=Z, 
                      histnorm='probability')
pdf = go.Scatter(x = xs,
                 y = stats.norm.pdf(xs, loc = 1.645))
iplot([trace, pdf], show_link=False)