In [102]:
import pandas as pd
import numpy as np
import scipy.optimize as opt

#### Read in Data


In [9]:
#Data Description
pd.read_excel("ED_Data_for_957physicians.xlsx")

Unnamed: 0,variable,variable descriptoin
0,g_p_key,Physician id
1,time,Sequence of patient visits
2,num,Total number of physician prescription during ...
3,pres_product,Name of prescribed drug
4,pres_choice,"Viagra: 1, Levitra: 2, Cialis: 3"
5,pres_week_end,Week of prescription
6,day_dt,Date of prescription
7,visitid,Patient visit ID (Note: they are all new patie...
8,num_Levitra,Number of Levitra's detailing visits between p...
9,num_Viagra,Number of Viagra's detailing visits between pa...


In [103]:
#ED Data
df=pd.read_excel("ED_Data_for_957physicians.xlsx",sheet_name="Data_for_957_physicians")
df.head()

Unnamed: 0,g_p_key,time,num,pres_product,pres_choice,pres_week_end,day_dt,visitid,num_Levitra,num_Viagra,num_Cialis
0,3,1,15,Levitra,2,2003-09-12,2003-09-11,6533581,2,0,0
1,3,2,15,Levitra,2,2003-12-05,2003-12-01,7431074,4,0,0
2,3,3,15,Cialis,3,2003-12-12,2003-12-08,7497237,0,0,1
3,3,4,15,Levitra,2,2003-12-12,2003-12-09,7510596,1,0,0
4,3,5,15,Viagra,1,2003-12-19,2003-12-15,7593560,0,0,0


In [104]:
#Cumulative number of physician prescription 
df["Cum_Lev"]=df["num_Levitra"]
for i in range(1,df.shape[0]):
    if df.loc[i,"g_p_key"]==df.loc[i-1,"g_p_key"]:
        df.loc[i,"Cum_Lev"]=df.loc[i,"num_Levitra"]+df.loc[i-1,"Cum_Lev"]
    else:
        df.loc[i,"Cum_Lev"]=df.loc[i,"num_Levitra"]
        
df["Cum_Via"]=df["num_Viagra"]
for i in range(1,df.shape[0]):
    if df.loc[i,"g_p_key"]==df.loc[i-1,"g_p_key"]:
        df.loc[i,"Cum_Via"]=df.loc[i,"num_Viagra"]+df.loc[i-1,"Cum_Via"]
    else:
        df.loc[i,"Cum_Via"]=df.loc[i,"num_Viagra"]

df["Cum_Cia"]=df["num_Cialis"]
for i in range(1,df.shape[0]):
    if df.loc[i,"g_p_key"]==df.loc[i-1,"g_p_key"]:
        df.loc[i,"Cum_Cia"]=df.loc[i,"num_Cialis"]+df.loc[i-1,"Cum_Cia"]
    else:
        df.loc[i,"Cum_Cia"]=df.loc[i,"num_Cialis"]

In [105]:
df=pd.concat([df,pd.get_dummies(df.pres_product)],axis=1,sort=False)
df.head()

Unnamed: 0,g_p_key,time,num,pres_product,pres_choice,pres_week_end,day_dt,visitid,num_Levitra,num_Viagra,num_Cialis,Cum_Lev,Cum_Via,Cum_Cia,Cialis,Levitra,Viagra
0,3,1,15,Levitra,2,2003-09-12,2003-09-11,6533581,2,0,0,2,0,0,0,1,0
1,3,2,15,Levitra,2,2003-12-05,2003-12-01,7431074,4,0,0,6,0,0,0,1,0
2,3,3,15,Cialis,3,2003-12-12,2003-12-08,7497237,0,0,1,6,0,1,1,0,0
3,3,4,15,Levitra,2,2003-12-12,2003-12-09,7510596,1,0,0,7,0,1,0,1,0
4,3,5,15,Viagra,1,2003-12-19,2003-12-15,7593560,0,0,0,7,0,1,0,0,1


In [106]:
#Random Draw
D=10
RD1=np.random.random(D*df.shape[0]).reshape(D,df.shape[0])
RD2=np.random.random(D*df.shape[0]).reshape(D,df.shape[0])

In [134]:
RD1[:,1]

array([0.03990187, 0.1589869 , 0.0605135 , 0.02954185, 0.07159782,
       0.17865023, 0.27270029, 0.18866067, 0.72800891, 0.07177571])

#### Bayesian Learning

In [107]:
grouped=df.groupby("g_p_key").mean()

In [135]:
def crit(params):
    '''
    Function computes the negative log likelihood of Bayesian Learning Model
    '''  
    #Step 0: Starting values
    IQP_L, TQP_L, DSV_L, IQP_C, TQP_C, DSV_C,TQP_V,\
    Pers_L, Pers_C, Pers_V, ADSV_L, ADSV_C=params
    
    #Step 1: Standard normal draws
    #Have created RD1 for Levitra and RD2 for Cialis

    #New dataframe for storing likelihood for each physician
    global liktab
    liktab=pd.DataFrame({"g_p_key":grouped.index})
    
    #Step 2: transform distributions
    
    for i in range(D):
        df["NDr_L "+str(i)]=TQP_L*df["Cum_Lev"]+np.sqrt(ADSV_L*df["Cum_Lev"])*RD1[i]
        df["NDr_C "+str(i)]=TQP_C*df["Cum_Cia"]+np.sqrt(ADSV_C*df["Cum_Cia"])*RD2[i]
    
    #Step3: Posterior belief:
        df["qp_L "+str(i)]=(IQP_L+df["NDr_L "+str(i)]/ADSV_L)/(1+df["Cum_Lev"]/ADSV_L)
        df["qp_C "+str(i)]=(IQP_C+df["NDr_C "+str(i)]/ADSV_C)/(1+df["Cum_Cia"]/ADSV_C)
       
        
    #Step 4-7: Likelihood of adoption
        exp_L=np.exp(df["qp_L "+str(i)]+df["num_Levitra"]*Pers_L)
        exp_C=np.exp(df["qp_C "+str(i)]+df["num_Cialis"]*Pers_C)
        exp_V=np.exp(TQP_V+df["num_Viagra"]*Pers_V)
       
        df["prp "+str(i)]=(exp_L*df["Levitra"]+exp_C*df["Cialis"]+
                           exp_V*df["Viagra"])/(exp_L+exp_C+exp_V)
        
    #Step 8:
        liktab[i]=df.groupby("g_p_key")[["prp "+str(i)]].prod()
        
    #Step 9: Physicians' average likelihood
    liktab=liktab.drop(columns=['g_p_key'])
    liktab["avg"]=liktab.mean(axis=1)
    
    #Step 10: Sum of log likelihood
    liktab["LL"]=np.log(liktab["avg"])
    LL=np.sum(np.log(liktab["avg"]))
    
    return -LL

In [94]:
-0.714421618999881
0.675465212636976
-0.656960770622585
-1.82315894038913
1.38239627273411
-0.239824712994106
0.479406013445692
0.139745295932796
0.139745295932796
0.139745295932796
0.518424553727328
0.786765758794559


0.786765758794559

In [136]:
#Step 11: Maximize the LL 
para_init=[-1,1,-1,-2,1,0.1,0.1,0.1,0.1,0.1,1,1]
results = opt.minimize(crit, para_init, tol=1e-10,method='L-BFGS-B')
results.x

ValueError: operands could not be broadcast together with shapes (9900,) (10,) 

In [123]:
results.fun

2774.7052136279362

In [122]:
liktab

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,avg,LL
0,,,,,,,,,,,,
1,,,,,,,,,,,,
2,,,,,,,,,,,,
3,1.313184e-07,2.678204e-07,2.365305e-07,1.869370e-07,1.835802e-07,2.675260e-07,2.047775e-07,1.744827e-07,1.716554e-07,2.796774e-07,2.104305e-07,-15.374110
4,,,,,,,,,,,,
5,,,,,,,,,,,,
6,,,,,,,,,,,,
7,2.547690e-50,3.151147e-50,3.928042e-50,2.659976e-50,3.090051e-50,3.856779e-50,1.624563e-50,5.978162e-50,3.131645e-50,8.439666e-51,3.081202e-50,-114.003935
8,,,,,,,,,,,,
9,5.299073e-07,4.276913e-07,2.308098e-07,3.925624e-07,3.104530e-07,5.339803e-07,3.386613e-07,3.088978e-07,5.837119e-07,2.810757e-07,3.937751e-07,-14.747486


In [132]:
for i in range(D):
    dd[i]=df.groupby("g_p_key")[["prp "+str(i)]].prod())

In [133]:
dd

Unnamed: 0,g_p_key,0,1,2,3,4,5,6,7,8,9
0,3,,,,,,,,,,
1,7,,,,,,,,,,
2,9,,,,,,,,,,
3,12,-15.845641,-15.132949,-15.257188,-15.492494,-15.510614,-15.134049,-15.401342,-15.561440,-15.577777,-15.089629
4,14,,,,,,,,,,
5,17,,,,,,,,,,
6,23,,,,,,,,,,
7,24,-114.194068,-113.981488,-113.761114,-114.150937,-114.001067,-113.779422,-114.644015,-113.341141,-113.987696,-115.298897
8,30,,,,,,,,,,
9,31,-14.450564,-14.664864,-15.281672,-14.750570,-14.985233,-14.442907,-14.898265,-14.990255,-14.353858,-15.084642


In [129]:
dd=pd.DataFrame({"g_p_key":grouped.index})
dd

Unnamed: 0,g_p_key
0,3
1,7
2,9
3,12
4,14
5,17
6,23
7,24
8,30
9,31
