In [1]:
import pandas as pd
import numpy as np
import scipy.optimize as opt

#### Read in Data

In [2]:
df=pd.read_excel("PhysDiff.xls")
#Generate physicians ID
df["ID"]=(df.index)//6
df.head()

Unnamed: 0,Period Pres,Upto Pres,Cum Det,Rnd 1,Rnd 2,Rnd 3,Rnd 4,Rnd 5,Rnd 6,Rnd 7,Rnd 8,Rnd 9,Rnd 10,Last Obs,ID
0,0,1,0,1.322058,1.331552,-0.458844,1.03048,-0.100846,-0.284677,0.513185,1.174519,1.942139,-0.305398,0,0
1,0,1,0,1.606255,0.863877,0.876901,-0.050385,2.009774,-0.171038,-1.165551,-0.580196,0.91909,0.503233,0,0
2,0,1,0,-1.270226,-0.814384,-2.648688,-0.545644,-2.365354,1.574516,0.405809,-0.771119,-0.471666,-0.85162,0,0
3,0,1,0,1.045399,-0.000841,-0.798137,-0.309227,0.779146,0.325385,0.610192,0.301137,0.296502,-1.642758,0,0
4,1,1,1,0.540844,0.741571,-0.409022,-0.335746,0.282179,0.201364,-0.154678,-0.843039,0.610842,0.762996,0,0


Period Pres: Period in which physician first prescribes
    
Upto Pres: Periods upto first prescription - as the likelihood discards all subsequent observations

Cum Det: Cumulative number of calls for each physician

Rnd 1-10: Random draws 1-10


#### Bayesian Learning

In [3]:
def crit(params):
    '''
    Function computes the negative log likelihood of Bayesian Learning Model
    Input:
    params: alpha, alpha_0 and sigma_square
    D (number of draws), assume given for this case
    '''  
    #Step 0: Starting values
    IQP, TQP, ADSV=params
    
    #Step 1: STandard normal draws
    D=10
    #Given in this dataset
    #Otherwise use np.random.normal to make NTDJ draws
    
    #New dataframe for storing likelihood for each physician
    global liktab
    liktab=pd.DataFrame({"ID":df["ID"]})
    
    #Step 2: transform distributions
    
    for i in range(1,D+1):
        df["NDr "+str(i)]=TQP*df["Cum Det"]+np.sqrt(ADSV*df["Cum Det"])*df["Rnd "+str(i)]
    
    #Step3: Posterior belief:
        df["qp "+str(i)]=(IQP+df["NDr "+str(i)]/ADSV)/(1+df["Cum Det"]/ADSV)
        
    #Step 4-7: Likelihood of adoption
        Pr=np.exp(df["qp "+str(i)])/(1+np.exp(df["qp "+str(i)]))
        df["prp "+str(i)]=(Pr*df["Period Pres"]+(1-Pr)*(1-df["Period Pres"]))**df["Upto Pres"]
        
    #Step 8:
        liktab[i]=df.groupby("ID")[["prp "+str(i)]].prod()
        
    #Step 9: Physicians' average likelihood
    liktab=liktab.drop(columns=['ID'])
    liktab["avg"]=liktab.mean(axis=1)
    
    #Step 10: Sum of log likelihood
    LL=np.sum(np.log(liktab["avg"]))
    
    return -LL

In [4]:
#Step 11: Maximize the LL by choosing alpha_0, alpha and sigma_square parameters
para_init=[-1,2,3]
results = opt.minimize(crit, para_init,tol=1e-10, method='L-BFGS-B')
a1,a2,a3 = results.x
print(" alpha_0:",a1,"\n",
      "alpha",a2,"\n",
      "sigma^2:",a3,"\n",
      "Maximized Log Likelihood:",-results.fun)

 alpha_0: -1.2914972474901312 
 alpha 2.5918291877903616 
 sigma^2: 2.407976762122733 
 Maximized Log Likelihood: -1868.740615803639


#### LC Model (allow for some unobserved heterogeneity across physicians)

Try latent-class heterogeneity model with 3 segments

In [5]:
def crit2(params):
    '''
    Function computes the negative log likelihood of Bayesian Learning Model
    Input:
    params: alpha, alpha_0 and sigma_square
    D (number of draws), assume given for this case
    '''  
    IQP1, TQP1, ADSV1, \
    IQP2, TQP2, ADSV2, \
    IQP3, TQP3, ADSV3 ,lam1,lam2=params    
    D=10
    liktab1=pd.DataFrame({"ID":df["ID"]})
    liktab2=pd.DataFrame({"ID":df["ID"]})
    liktab3=pd.DataFrame({"ID":df["ID"]})

    #Segment 1
    for i in range(1,D+1):
        df["NDr "+str(i)]=TQP1*df["Cum Det"]+np.sqrt(ADSV1*df["Cum Det"])*df["Rnd "+str(i)] 
        df["qp "+str(i)]=(IQP1+df["NDr "+str(i)]/ADSV1)/(1+df["Cum Det"]/ADSV1)
        Pr=np.exp(df["qp "+str(i)])/(1+np.exp(df["qp "+str(i)]))
        df["prp "+str(i)]=(Pr*df["Period Pres"]+(1-Pr)*(1-df["Period Pres"]))**df["Upto Pres"]
        liktab1[i]=df.groupby("ID")[["prp "+str(i)]].prod()
    liktab1=liktab1.drop(columns=['ID'])
    liktab1["avg"]=liktab1.mean(axis=1)

    #Segment 2
    for i in range(1,D+1):
        df["NDr "+str(i)]=TQP2*df["Cum Det"]+np.sqrt(ADSV2*df["Cum Det"])*df["Rnd "+str(i)] 
        df["qp "+str(i)]=(IQP2+df["NDr "+str(i)]/ADSV2)/(1+df["Cum Det"]/ADSV2)
        Pr=np.exp(df["qp "+str(i)])/(1+np.exp(df["qp "+str(i)]))
        df["prp "+str(i)]=(Pr*df["Period Pres"]+(1-Pr)*(1-df["Period Pres"]))**df["Upto Pres"]
        liktab2[i]=df.groupby("ID")[["prp "+str(i)]].prod()
    liktab2=liktab2.drop(columns=['ID'])
    liktab2["avg"]=liktab2.mean(axis=1)
 
    #Segment 3
    for i in range(1,D+1):
        df["NDr "+str(i)]=TQP3*df["Cum Det"]+np.sqrt(ADSV3*df["Cum Det"])*df["Rnd "+str(i)] 
        df["qp "+str(i)]=(IQP3+df["NDr "+str(i)]/ADSV3)/(1+df["Cum Det"]/ADSV3)
        Pr=np.exp(df["qp "+str(i)])/(1+np.exp(df["qp "+str(i)]))
        df["prp "+str(i)]=(Pr*df["Period Pres"]+(1-Pr)*(1-df["Period Pres"]))**df["Upto Pres"]
        liktab3[i]=df.groupby("ID")[["prp "+str(i)]].prod()
    liktab3=liktab3.drop(columns=['ID'])
    liktab3["avg"]=liktab3.mean(axis=1)
    
    pi1=np.exp(lam1)/(1+np.exp(lam1) + np.exp(lam2))
    pi2=np.exp(lam2)/(1+np.exp(lam1) + np.exp(lam2))
    pi3=1-pi1-pi2
    
    agg_seg=liktab1["avg"]*pi1+liktab2["avg"]*pi2+liktab3["avg"]*pi3
    
    LL=np.sum(np.log(agg_seg))
    
    return -LL

In [16]:
#Step 11: Maximize the LL by choosing alpha, alpha_0 and sigma_square parameters
para_init=[-1.3,2.6,2.4,-1,4,4,-2,1,1,0,0]
results2 = opt.minimize(crit2, para_init,tol=1e-10, method='BFGS')
a1,a2,a3,b1,b2,b3,c1,c2,c3, lam1,lam2 = results2.x
print(" alpha_0:",a1,"\n",
      "alpha",a2,"\n",
      "sigma^2:",a3,"\n",
      "Maximized Log Likelihood:",-results.fun)

 alpha_0: -1.291497104008225 
 alpha 2.5918382289828994 
 sigma^2: 2.407986247552964 
 Maximized Log Likelihood: -1868.740615803639


In [17]:
print("Probability of belonging to seg 1:",np.exp(lam1)/(1+np.exp(lam1)+np.exp(lam2)))
print("Probability of belonging to seg 2:",np.exp(lam2)/(1+np.exp(lam1)+np.exp(lam2)))
print("Probability of belonging to seg 3:",1/(1+np.exp(lam1)+np.exp(lam2)))

Probability of belonging to seg 1: 0.7265481930964626
Probability of belonging to seg 2: 0.14992474382310497
Probability of belonging to seg 3: 0.1235270630804324


In [18]:
pd.DataFrame({"Seg1":[a1,a2,a3],
              'Seg2':[b1,b2,b3],
              'Seg3':[c1,c2,c3]},
              index=['alpha_0','alpha','sigma^2'])

Unnamed: 0,Seg1,Seg2,Seg3
alpha_0,-1.291497,-1.291498,-1.291497
alpha,2.591838,2.591736,2.591736
sigma^2,2.407986,2.407882,2.407868
