In [111]:
import pymc3 as pm
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import pandas
import inspect
import argparse
import arviz as az
import pickle
import theano
import theano.tensor.extra_ops as te
#import theano.tensor as tt

from pandas.api.types import CategoricalDtype

In [2]:
#######################
## utility functions ##
#######################

def get_num_of_levels(series):
    '''Get the number of unique levels of a factor'''
    return series.cat.categories.shape[0]

In [3]:
data = pandas.read_csv('../data/data.for.aaron.csv') #22264

In [4]:
data = data[data.distance != 'filler'] #5480
data.subject = data.subject.astype('category')
data.judgment = (data.judgment-1).astype(CategoricalDtype(ordered=True))
data.item = data.item.astype('category')
data.dependency = data.dependency.astype('category')
data.island = data.island.astype('category')
data.structure = data.structure.astype(CategoricalDtype(categories=['non', 'island'], ordered=False))
data.distance = data.distance.astype(CategoricalDtype(categories=['short', 'long'], ordered=False))

In [5]:
def init_did():
    '''try:
        with open('params/dependency_islandistance', 'rb') as f:
            return pickle.load(f)
        #return pickle.load(open('params/dependency_island_distance', 'rb'))
        with open('params/dependency_island_distance', 'rb') as f:
            u = pickle._Unpickler(f)
            u.encoding = 'latin1'
            return u.load()
        #return pickle.load(open('params/dependency_island_structure'))
    except IOError:
        print('random initialization of dependency_island_distance'        )'''
    return np.random.normal(0., 1.,
                                size=[get_num_of_levels(data.dependency),
                                      get_num_of_levels(data.island),
                                      get_num_of_levels(data.distance)])
def init_dis():
    
    '''try:
        return pickle.load(open('params/dependency_island_structure'))
    
    except IOError:
        print('random initialization of dependency_island_structure')'''
    return np.random.normal(0., 1.,
                                size=[get_num_of_levels(data.dependency),
                                      get_num_of_levels(data.island),
                                      get_num_of_levels(data.structure)])


def init_violation_intercept():
    
    '''try:
        violation = pickle.load(open('params/violation', encoding='rb'))
        return violation.min()
                    
    except IOError:
    print('deterministic initialization of violation_intercept')'''
    return 0.

def init_violation_scale():

    '''try:
        violation = pickle.load(open('params/violation'))
        intercept = init_violation_intercept()
        violation_zeroed = violation - intercept
        return violation_zeroed.max()/args.numofviolations
        
    except IOError:
        print('deterministic initialization of violation_scale')'''
    return 1.

    
def init_violation():
    '''try:
        violation = pickle.load(open('params/violation', encoding='rb'))

        if args.unboundedviolation:
            return violation
        else:
            violation_scale = init_violation_scale()
            violation_zeroed = violation - init_violation_intercept()

            if args.violationtype == 'gradient':
                if args.violationintercept:
                    return violation_zeroed/violation_scale
                else:
                    return violation/violation_scale
                    
            else:
                if args.violationintercept:
                    return np.round(violation_zeroed/violation_scale)
                else:
                    return np.round(violation/violation_scale)
                            
    except IOError:
        print('random initialization of violation')'''
        
    if args.unboundedviolation:
        return np.random.exponential(1.,
                                     size=[get_num_of_levels(data.dependency),
                                           get_num_of_levels(data.island)])

    elif args.violationtype == 'categorical':
        return np.random.binomial(n=args.numofviolations,
                                  p=.5,
                                  size=[get_num_of_levels(data.dependency),
                                        get_num_of_levels(data.island)])

    elif args.violationtype == 'gradient':
        return np.random.uniform(low=0.,
                                 high=args.numofviolations,
                                 size=[get_num_of_levels(data.dependency),
                                       get_num_of_levels(data.island)])

def init_item():
    '''try:
        return pickle.load(open('params/intercepts_item'))
    except IOError:
        print('random initialization of intercepts_item')'''
    return np.random.normal(0., 1., size=get_num_of_levels(data.item))


def init_subjadd():
    '''try:
        return pickle.load(open('params/intercepts_subj_add'))
    except IOError:
        print('random initialization of intercepts_subj_add')'''
    return np.random.normal(0., 1., size=get_num_of_levels(data.subject))

def init_subjmult():
    '''try:
        return pickle.load(open('params/intercepts_subj_mult'))
    except IOError:
        print('random initialization of intercepts_subj_mult')'''
    return np.ones(get_num_of_levels(data.subject))
    
def init_jump():
    '''try:
        return pickle.load(open('params/jump'))
    except IOError:'''
    return sp.stats.expon.rvs(scale=.1,
                                  size=get_num_of_levels(data.judgment)-1)

In [70]:
####################
## argument "parser"
####################

class Args:
    data = '../data/data.for.aaron.csv'
    outputdir = './model/'
    
    unboundedviolation = False
    violationtype = 'gradient' # 'none', 'gradient', 'categorical'
    violationform = 'marginal' # 'marginal', 'joint'
    numofviolations = 1
    violationintercept = False
    additivesubjrandomeffects = True
    multiplicativesubjrandomeffects = False
    
    iterations = 1100000
    burnin = 100000
    thinning = 1000
    sampleonlyviolation = False

args = Args()

In [107]:
basic_model = pm.Model()
with basic_model:
    
    ###################
    ## fixed effects ##
    ###################
    
    dependency_island_distance = pm.Normal(name='dependency_island_distance',
                                           mu=0.,
                                           tau=1e-6,
                                           observed=init_did()
                                          )
    dependency_island_structure = pm.Normal(name='dependency_island_structure',
                                            mu=0.,
                                            tau=1e-6,
                                           observed=init_dis())
    
    fixed_tensor = pm.Deterministic('fixed_tensor', dependency_island_distance[:,:,:,None] +\
                                    dependency_island_structure[:,:,None,:])
    
    if args.unboundedviolation:
        
        violation = pm.Exponential(name='violation_ub',
                                   lam=0.5,
                                   observed=init_violation())
        
        no_violations = pm.Deterministic('no_violations', fixed_tensor[data.dependency.cat.codes,
                                        data.island.cat.codes,
                                        data.distance.cat.codes,
                                        data.structure.cat.codes])
        violations = pm.Deterministic('violations', violation[data.dependency.cat.codes,
                                        data.island.cat.codes])
        
        fixed = pm.Deterministic('fixed', no_violations - data.distance.cat.codes *\
                                 data.structure.cat.codes )#* violations); ValueError: Length of Elemwise{mul,no_inplace}.0 cannot be determined
        
    if args.violationtype == 'none':
        
        fixed = pm.Deterministic('fixed', fixed_tensor[data.dependency.cat.codes,
                                data.island.cat.codes,
                                data.distance.cat.codes,
                                data.structure.cat.codes])
    else:
        
        violation_intercept = pm.Exponential(name='violation_intercept', lam=0.5)
        
        violation_scale = pm.Exponential(name='violation_scale',
                                        lam=1.*args.numofviolations,
                                        observed=init_violation_scale())
        if args.violationform == 'joint':
            violation_propensity = pm.Normal(name='violation_propensity',
                                            mu=0.,
                                            tau=1e-6,
                                            observed=np.random.normal(0., 1.,
                                                                      size=[get_num_of_levels(data.dependency),
                                                                            get_num_of_levels(data.island),
                                                                            args.numofviolations]))

            violation_prob = pm.InvLogit(name='violation_prob', ltheta=violation_propensity)

            if args.violationtype == 'categorical':
                violation = pm.Bernoulli(name='violation',
                                        p=violation_prob,
                                        observed=False)

            else:
                violation = violation_prob
                
        elif args.violationform == 'marginal':

            if args.violationtype == 'categorical':
                violation = pm.DiscreteUniform(name='violation',
                                                lower=0,
                                                upper=args.numofviolations,
                                                observed=init_violation())

            else:
                violation = pm.DiscreteUniform(name='violation',
                                    lower=0,
                                    upper=args.numofviolations,
                                    observed=init_violation())
                
        no_violations = pm.Deterministic('no_violations', fixed_tensor[data.dependency.cat.codes,
                                        data.island.cat.codes,
                                        data.distance.cat.codes,
                                        data.structure.cat.codes])
        if args.violationform == 'joint':
            violation_sum = violation.sum(axis=2)[data.dependency.cat.codes,
                                                data.island.cat.codes]

        else:
            violation_sum = pm.Deterministic('violation_sum', violation[data.dependency.cat.codes,
                                    data.island.cat.codes])
        
        if args.violationintercept:            
            fixed = pm.Deterministic('fixed', no_violations -\
                            data.distance.cat.codes * data.structure.cat.codes * violation_intercept -\
                            data.distance.cat.codes * data.structure.cat.codes * violation_scale * violation_sum)
        else:
            fixed = pm.Deterministic('fixed', no_violations -\
                    data.distance.cat.codes * data.structure.cat.codes)# * violation_scale * violation_sum); ValueError: Length of Elemwise{mul,no_inplace}.0 cannot be determined
    
    ####################              
    ## random effects ##
    ####################
    intercepts_item_prior = pm.Gamma(name='intercepts_item_prior', 
                                    alpha=0.001,
                                    beta=1/0.001,
                                    observed=sp.stats.expon.rvs(scale=.1))

    intercepts_item = pm.Normal(name='intercepts_item',
                                mu=0.,
                                tau=intercepts_item_prior,
                                observed=init_item())
    
    ################
    ## likelihood ##
    ################
    
    jump = pm.Gamma(name='jump', 
                    alpha=0.001,
                    beta=1/0.001,
                    observed=init_jump())


    if args.additivesubjrandomeffects:

        intercepts_subj_add_prior = pm.Gamma(name='intercepts_subj_add_prior', 
                                        alpha=0.001,
                                        beta=1/0.001,
                                        observed=sp.stats.expon.rvs(scale=.1))
            
        intercepts_subj_add = pm.Normal(name='intercepts_subj_add',
                                    mu=0.,
                                    tau=intercepts_subj_add_prior,
                                    observed=init_subjadd())
        
        
        param = pm.Deterministic('param', fixed + intercepts_subj_add[np.array(data.subject.cat.codes)] + intercepts_item[np.array(data.item.cat.codes)])
        
        #@pymc.deterministic
        #def param(fixed=fixed, intercepts_subj_add=intercepts_subj_add, intercepts_item=intercepts_item):
        #    return fixed + intercepts_subj_add[np.array(data.subject.cat.codes)] + intercepts_item[np.array(data.item.cat.codes)]
    else:
        
        param = pm.Deterministic('param', fixed + intercepts_item[np.array(data.item.cat.codes)])
        
        #@pymc.deterministic
        #def param(fixed=fixed, intercepts_item=intercepts_item):
        #    return fixed + intercepts_item[np.array(data.item.cat.codes)]
        
    if args.multiplicativesubjrandomeffects:

        intercepts_subj_mult_prior = pm.Gamma(name='intercepts_subj_mult_prior', 
                                        alpha=0.001,
                                        beta=1/0.001,
                                        observed=sp.stats.expon.rvs(scale=.1))
            
        intercepts_subj_mult = pm.Exponential(name='intercepts_subj_mult',
                                        beta=intercepts_subj_mult_prior,
                                        observed=init_subjmult())
        
        jump_warped = (1/intercepts_subj_mult[data.subject.cat.codes,None])*jump
        log_prob = pm.Deterministic('log_prob', te.cumsum(jump_warped, axis=1)-param[:,None])
        
        #@pymc.deterministic
        #def log_prob(jump=jump, param=param, intercepts_subj_mult=intercepts_subj_mult):
        #    jump_warped = (1/intercepts_subj_mult[data.subject.cat.codes,None])*jump
        #
        #    return np.cumsum(jump_warped, axis=1)-param[:,None]
    else:
        
        log_prob = pm.Deterministic('log_prob', te.cumsum(jump)[None,:]-param[:,None])
        
        #@pymc.deterministic
        #def log_prob(jump=jump, param=param):
        #    return np.cumsum(jump)[None,:]-param[:,None]
        
    #@pymc.deterministic
    def prob(log_prob=log_prob):
        cdfs = 1 / (1+np.exp(-log_prob))
        
        zeros = np.zeros(data.shape[0])[:,None]
        ones = np.ones(data.shape[0])[:,None]

        return np.append(cdfs, ones, axis=1) - np.append(zeros, cdfs, axis=1)


    judgment = pm.Categorical(name='judgment',
                                p=prob,
                                observed=data.judgment)
    
    #map_estimate = pm.find_MAP(model=basic_model)
basic_model

<pymc3.model.Model at 0x7f831f7994c0>

In [108]:
map_estimate = pm.find_MAP(model=basic_model)




In [110]:
map_estimate

{'violation_intercept_log__': array(-10.991222),
 'fixed_tensor': array([[[[-0.75276331,  1.16565912],
          [-1.25463661,  0.66378582]],
 
         [[ 0.30025481, -3.4047187 ],
          [ 0.87856272, -2.82641078]],
 
         [[-0.00590855,  2.71674692],
          [-2.26441047,  0.45824501]],
 
         [[-0.12898169, -1.50108491],
          [ 1.67400189,  0.30189867]]],
 
 
        [[[ 1.28322867,  4.89449521],
          [-0.81071129,  2.80055525]],
 
         [[ 0.4349657 , -0.90824697],
          [ 0.37037967, -0.972833  ]],
 
         [[ 1.96095663,  0.9708424 ],
          [ 0.78762259, -0.20249164]],
 
         [[ 0.17372748, -0.69948565],
          [-0.74381455, -1.61702768]]],
 
 
        [[[ 0.10888286, -1.30621876],
          [-0.87262325, -2.28772487]],
 
         [[-2.09027937, -3.30515512],
          [ 1.37814566,  0.16326992]],
 
         [[-0.13112647,  1.03738961],
          [-1.70058096, -0.53206488]],
 
         [[-1.5357104 , -1.73206361],
          [ 0.70248784

In [109]:
pm.fit(n=10000, method='fmin_l_bfgs_b', model=basic_model)

KeyboardInterrupt: 

In [10]:
basic_model.fit(method='fmin_l_bfgs_b', iterlim=100000)

AttributeError: 'Model' object has no attribute 'fit'

In [49]:
basic_model = pm.Model()
with basic_model:
    
    ###################
    ## fixed effects ##
    ###################
    
    dependency_island_distance = pm.Normal(name='dependency_island_distance',
                                           mu=0.,
                                           tau=1e-6,
                                           observed=init_did()
                                          )
    dependency_island_structure = pm.Normal(name='dependency_island_structure',
                                            mu=0.,
                                            tau=1e-6,
                                           observed=init_dis())
    #@pm.deterministic
    #def fixed_tensor(did=dependency_island_distance, dis=dependency_island_structure):
    #    return pm.Deterministic("fixed_tensor", did[:,:,:,None] + dis[:,:,None,:])
    
    fixed_tensor = pm.Deterministic("fixed_tensor", dependency_island_distance[:,:,:,None] +\
                                    dependency_island_structure[:,:,None,:])
    
    
    
    if args.unboundedviolation:
        
        violation = pm.Exponential(name='violation',
                                   lam=0.5,
                                   observed=init_violation())
        
        no_violations = pm.Deterministic('no_violations', fixed_tensor[data.dependency.cat.codes,
                                        data.island.cat.codes,
                                        data.distance.cat.codes,
                                        data.structure.cat.codes])
        violations = pm.Deterministic('violations', violation[data.dependency.cat.codes,
                                        data.island.cat.codes])
        
        fixed = pm.Deterministic('fixed', no_violations - data.distance.cat.codes *\
                                 data.structure.cat.codes)# * violations)
        
        '''def fixed(fixed_tensor=fixed_tensor, violation=violation):
            no_violations = fixed_tensor[data.dependency.cat.codes,
                                        data.island.cat.codes,
                                        data.distance.cat.codes,
                                        data.structure.cat.codes]

            violations = violation[data.dependency.cat.codes,
                                data.island.cat.codes]

            return no_violations - data.distance.cat.codes * data.structure.cat.codes * violations'''
        
    if args.violationtype == 'none':
        
        fixed = pm.Deterministic('fixed', fixed_tensor[data.dependency.cat.codes,
                                data.island.cat.codes,
                                data.distance.cat.codes,
                                data.structure.cat.codes])
        
        '''def fixed(fixed_tensor=fixed_tensor):
            return fixed_tensor[data.dependency.cat.codes,
                                data.island.cat.codes,
                                data.distance.cat.codes,
                                data.structure.cat.codes]'''
    else:
        
        violation_intercept = pm.Exponential(name='violation_intercept', lam=0.5)
        
        violation_scale = pm.Exponential(name='violation_scale',
                                        lam=1.*args.numofviolations,
                                        observed=init_violation_scale())

        
        if args.violationform == 'joint':
            violation_propensity = pm.Normal(name='violation_propensity',
                                            mu=0.,
                                            tau=1e-6,
                                            observed=np.random.normal(0., 1.,
                                                                      size=[get_num_of_levels(data.dependency),
                                                                            get_num_of_levels(data.island),
                                                                            args.numofviolations]))

            violation_prob = pm.InvLogit(name='violation_prob', ltheta=violation_propensity)

            if args.violationtype == 'categorical':
                violation = pm.Bernoulli(name='violation',
                                        p=violation_prob,
                                        observed=False)

            else:
                violation = violation_prob

        elif args.violationform == 'marginal':

            if args.violationtype == 'categorical':
                violation = pm.DiscreteUniform(name='violation',
                                                lower=0,
                                                upper=args.numofviolations,
                                                observed=init_violation())

            else:
                violation = pm.DiscreteUniform(name='violation',
                                    lower=0,
                                    upper=args.numofviolations,
                                    observed=init_violation())

                
            
        #@pymc.deterministic
        '''def fixed(fixed_tensor=fixed_tensor, violation=violation, violation_intercept=violation_intercept, violation_scale=violation_scale):
            no_violations = fixed_tensor[data.dependency.cat.codes,
                                        data.island.cat.codes,
                                        data.distance.cat.codes,
                                        data.structure.cat.codes]

            if args.violationform == 'joint':
                violation_sum = violation.sum(axis=2)[data.dependency.cat.codes,
                                                    data.island.cat.codes]

            else:
                violation_sum = violation[data.dependency.cat.codes,
                                        data.island.cat.codes]

            if args.violationintercept:            
                return no_violations -\
                data.distance.cat.codes * data.structure.cat.codes * violation_intercept -\
                data.distance.cat.codes * data.structure.cat.codes * violation_scale * violation_sum
            else:
                return no_violations -\
                data.distance.cat.codes * data.structure.cat.codes * violation_scale * violation_sum'''
        
        no_violations = pm.Deterministic('no_violations', fixed_tensor[data.dependency.cat.codes,
                                        data.island.cat.codes,
                                        data.distance.cat.codes,
                                        data.structure.cat.codes])
        if args.violationform == 'joint':
            violation_sum = violation.sum(axis=2)[data.dependency.cat.codes,
                                                data.island.cat.codes]

        else:
            violation_sum = pm.Deterministic('violation_sum', violation[data.dependency.cat.codes,
                                    data.island.cat.codes])
        
        if args.violationintercept:            
            fixed = pm.Deterministic('fixed', no_violations -\
                            data.distance.cat.codes * data.structure.cat.codes * violation_intercept -\
                            data.distance.cat.codes * data.structure.cat.codes * violation_scale * violation_sum)
        else:
            fixed = pm.Deterministic('fixed', no_violations -\
                    data.distance.cat.codes * data.structure.cat.codes * violation_scale * violation_sum)
        
        
            
    ####################              
    ## random effects ##
    ####################
    intercepts_item_prior = pm.Gamma(name='intercepts_item_prior', 
                                    alpha=0.001,
                                    beta=1/0.001,
                                    observed=sp.stats.expon.rvs(scale=.1))

    intercepts_item = pm.Normal(name='intercepts_item',
                                mu=0.,
                                tau=intercepts_item_prior,
                                observed=init_item())
    
    ################
    ## likelihood ##
    ################
    
    jump = pm.Gamma(name='jump', 
                    alpha=0.001,
                    beta=1/0.001,
                    observed=init_jump())


    if args.additivesubjrandomeffects:

        intercepts_subj_add_prior = pm.Gamma(name='intercepts_subj_add_prior', 
                                        alpha=0.001,
                                        beta=1/0.001,
                                        observed=sp.stats.expon.rvs(scale=.1))
            
        intercepts_subj_add = pm.Normal(name='intercepts_subj_add',
                                    mu=0.,
                                    tau=intercepts_subj_add_prior,
                                    observed=init_subjadd())

        
        #@pymc.deterministic
        def param(fixed=fixed, intercepts_subj_add=intercepts_subj_add, intercepts_item=intercepts_item):
            return fixed + intercepts_subj_add[tt.stacklists(data.subject.cat.codes)] + intercepts_item[tt.stacklists(data.item.cat.codes)]
    else:

        #@pymc.deterministic
        def param(fixed=fixed, intercepts_item=intercepts_item):
            return fixed + intercepts_item[tt.stacklists(data.item.cat.codes)]

        
    if args.multiplicativesubjrandomeffects:

        intercepts_subj_mult_prior = pm.Gamma(name='intercepts_subj_mult_prior', 
                                        alpha=0.001,
                                        beta=1/0.001,
                                        observed=sp.stats.expon.rvs(scale=.1))
            
        intercepts_subj_mult = pm.Exponential(name='intercepts_subj_mult',
                                        beta=intercepts_subj_mult_prior,
                                        observed=init_subjmult())

        #@pymc.deterministic
        def log_prob(jump=jump, param=param, intercepts_subj_mult=intercepts_subj_mult):
            jump_warped = (1/intercepts_subj_mult[data.subject.cat.codes,None])*jump

            return np.cumsum(jump_warped, axis=1)-param[:,None]
    else:

        #@pymc.deterministic
        def log_prob(jump=jump, param=param):
            return np.cumsum(jump)[None,:]-param[:,None]
    
    #@pymc.deterministic
    def prob(log_prob=log_prob):
        cdfs = 1 / (1+np.exp(-log_prob))
        
        zeros = np.zeros(data.shape[0])[:,None]
        ones = np.ones(data.shape[0])[:,None]

        return np.append(cdfs, ones, axis=1) - np.append(zeros, cdfs, axis=1)


    judgment = pm.Categorical(name='judgment',
                                p=prob,
                                observed=data.judgment)
    


ValueError: Variable name violation already exists.

In [155]:
args.numofviolations

2

In [157]:
violation[0]

IndexError: too many indices for array

In [143]:
dependency_island_distance[:,:,:,None] + dependency_island_structure[:,:,None,:]

Elemwise{add,no_inplace}.0

In [141]:
type(dependency_island_distance)

pymc3.model.ObservedRV

In [137]:
fixed_tensor

<function __main__.fixed_tensor(did=dependency_island_distance, dis=dependency_island_structure)>

In [129]:
map_estimate = pm.find_MAP(model=basic_model)

logp = -2,532.9, ||grad|| = 0.00053917: 100%|██████████| 18/18 [00:00<00:00, 2593.35it/s]  


In [130]:
basic_model

<pymc3.model.Model at 0x7f97cb254f40>

In [131]:
map_estimate

{'violation_intercept_log__': array(-10.991222),
 'violation': array(0),
 'violation_intercept': array(1.68489537e-05)}

In [103]:
map_estimate

{'violation_log__': array(-10.991222), 'violation': array(1.68489537e-05)}

In [64]:
data

Unnamed: 0,subject,survey,order,judgment,item,condition,zscores,dependency,island,structure,distance
0,A10H33HEZVZI8U,7.3,17,1.0,D.whe.isl.lg.08,D.whe.isl.lg,-0.906327,DLINK,whe,island,long
1,A10H33HEZVZI8U,7.3,23,3.0,D.whe.isl.lg.04,D.whe.isl.lg,0.000000,DLINK,whe,island,long
2,A10H33HEZVZI8U,7.3,20,3.0,D.whe.isl.sh.05,D.whe.isl.sh,0.000000,DLINK,whe,island,short
3,A10H33HEZVZI8U,7.3,30,5.0,D.whe.isl.sh.01,D.whe.isl.sh,0.906327,DLINK,whe,island,short
4,A10H33HEZVZI8U,7.3,11,4.0,D.whe.non.lg.06,D.whe.non.lg,0.453163,DLINK,whe,non,long
...,...,...,...,...,...,...,...,...,...,...,...
22259,AZIY6IFOS4CVC,2.4,21,6.0,WH.np.isl.sh.06,WH.np.isl.sh,0.984166,WH,np,island,short
22260,AZIY6IFOS4CVC,2.4,13,1.0,WH.np.non.lg.03,WH.np.non.lg,-0.890436,WH,np,non,long
22261,AZIY6IFOS4CVC,2.4,30,2.0,WH.np.non.lg.07,WH.np.non.lg,-0.515516,WH,np,non,long
22262,AZIY6IFOS4CVC,2.4,15,6.0,WH.np.non.sh.04,WH.np.non.sh,0.984166,WH,np,non,short


In [66]:
locals()

  for val, m in zip(values.ravel(), mask.ravel())
  for val, m in zip(values.ravel(), mask.ravel())
  for val, m in zip(values.ravel(), mask.ravel())


{'__name__': '__main__',
 '__doc__': 'Automatically created module for IPython interactive environment',
 '__package__': None,
 '__loader__': None,
 '__spec__': None,
 '__builtin__': <module 'builtins' (built-in)>,
 '__builtins__': <module 'builtins' (built-in)>,
 '_ih': ['',
  'import pymc3 as pm\nimport pandas',
  'import pymc3 as pm\nimport pandas\nimport pickle',
  'with pm.Model():',
  "data = pandas.read_csv('../data/data_for_aaron.csv')",
  "data = pandas.read_csv('../data/data.for.aaron.csv')",
  'data',
  'import pymc3 as pm\nimport matplotlib.pyplot as plt\nimport pandas\nimport pickle',
  'basic_model = pm.Model()\n\nwith basic_model:\n\n    # Priors for unknown model parameters\n    alpha = pm.Normal("alpha", mu=0, sigma=10)\n    beta = pm.Normal("beta", mu=0, sigma=10, shape=2)\n    sigma = pm.HalfNormal("sigma", sigma=1)\n\n    # Expected value of outcome\n    mu = alpha + beta[0] * X1 + beta[1] * X2\n\n    # Likelihood (sampling distribution) of observations\n    Y_obs =

In [62]:
inspect.getfullargspec(pm.Normal)

FullArgSpec(args=['cls', 'name'], varargs='args', varkw='kwargs', defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={})

In [37]:
data

Unnamed: 0,subject,survey,order,judgment,item,condition,zscores,dependency,island,structure,distance
0,A10H33HEZVZI8U,7.3,17,1.0,D.whe.isl.lg.08,D.whe.isl.lg,-0.906327,DLINK,whe,island,long
1,A10H33HEZVZI8U,7.3,23,3.0,D.whe.isl.lg.04,D.whe.isl.lg,0.000000,DLINK,whe,island,long
2,A10H33HEZVZI8U,7.3,20,3.0,D.whe.isl.sh.05,D.whe.isl.sh,0.000000,DLINK,whe,island,short
3,A10H33HEZVZI8U,7.3,30,5.0,D.whe.isl.sh.01,D.whe.isl.sh,0.906327,DLINK,whe,island,short
4,A10H33HEZVZI8U,7.3,11,4.0,D.whe.non.lg.06,D.whe.non.lg,0.453163,DLINK,whe,non,long
...,...,...,...,...,...,...,...,...,...,...,...
22259,AZIY6IFOS4CVC,2.4,21,6.0,WH.np.isl.sh.06,WH.np.isl.sh,0.984166,WH,np,island,short
22260,AZIY6IFOS4CVC,2.4,13,1.0,WH.np.non.lg.03,WH.np.non.lg,-0.890436,WH,np,non,long
22261,AZIY6IFOS4CVC,2.4,30,2.0,WH.np.non.lg.07,WH.np.non.lg,-0.515516,WH,np,non,long
22262,AZIY6IFOS4CVC,2.4,15,6.0,WH.np.non.sh.04,WH.np.non.sh,0.984166,WH,np,non,short


In [11]:
# True parameter values
alpha, sigma = 1, 1
beta = [1, 2.5]

# Size of dataset
size = 100

# Predictor variable
X1 = np.random.randn(size)
X2 = np.random.randn(size) * 0.2

# Simulate outcome variable
Y = alpha + beta[0] * X1 + beta[1] * X2 + np.random.randn(size) * sigma

In [12]:
basic_model = pm.Model()

with basic_model:

    # Priors for unknown model parameters
    alpha = pm.Normal("alpha", mu=0, sigma=10)
    beta = pm.Normal("beta", mu=0, sigma=10, shape=2)
    sigma = pm.HalfNormal("sigma", sigma=1)

    # Expected value of outcome
    mu = alpha + beta[0] * X1 + beta[1] * X2

    # Likelihood (sampling distribution) of observations
    Y_obs = pm.Normal("Y_obs", mu=mu, sigma=sigma, observed=Y)

In [3]:
with pm.Model():

SyntaxError: unexpected EOF while parsing (<ipython-input-3-0123a0e44a54>, line 1)