In [2]:
import pandas as pd
import numpy as np

#Student number is 425288 so brand 88 (B&B, FLSMN, PKY, REST BRAND margarine tube market 3)
brand = "brand88"

data = pd.DataFrame({
    "logsales": pd.read_excel("sales.xls")[brand].apply(np.log),
    "logprice": pd.read_excel("price.xls")[brand].apply(np.log),
    "display": pd.read_excel("displ.xls")[brand],
    "coupon": pd.read_excel("coupon.xls")[brand]
})

data["intercept"] = 1

y =  data["logsales"]
X = data[["intercept", "logprice", "display", "coupon"]]

data.describe()

Unnamed: 0,logsales,logprice,display,coupon,intercept
count,124.0,124.0,124.0,124.0,124.0
mean,4.536251,0.296617,0.08871,0.854839,1.0
std,0.256217,0.016849,0.285478,0.353692,0.0
min,3.828641,0.195238,0.0,0.0,1.0
25%,4.356709,0.288781,0.0,1.0,1.0
50%,4.564348,0.297397,0.0,1.0,1.0
75%,4.718499,0.304982,0.0,1.0,1.0
max,5.257495,0.330023,1.0,1.0,1.0


Frequentist estimation

In [3]:
from sklearn.linear_model import LinearRegression

reg = LinearRegression(fit_intercept=False).fit(X, y)

print("R2 ",reg.score(X, y))

print("\nCoefficients",reg.coef_)

R2  0.04064190974067994

Coefficients [ 4.12687887  1.57347144  0.14798799 -0.08244147]


Conditional distribution drawing

In [4]:
def draw_sigma(X,y,beta):
    #N + k
    df = len(y)+ len(beta)
    
    #(y-XB)
    e = y - np.dot(X,beta)
    
    #(y-XB)'(y-XB) + B'B
    alpha = np.dot(e.T,e) + np.dot(beta,beta)
    
    return alpha / np.random.chisquare(df)

def draw_beta(X,y,var):
    # X'X + I4
    inv = np.dot(X.T,X)+np.identity(4)
    # (X'X + I4)-1
    inv = np.linalg.inv(inv)
    
    # (X'X + I4)-1 X'y
    mean = np.dot(np.dot(inv, X.T) ,y)
    
    # σ2 (X'X + I4)-1
    cov = np.dot(var, inv)
    
    return np.random.multivariate_normal(mean, cov)

Gibbs Sampler

In [41]:
def gibbs_sampler(X,y,n):
    estimates =  np.zeros((n+1, 5))
    for i in range(n):
        estimates[i+1,4] = draw_sigma(X,y, estimates[i,0:4])
        estimates[i+1,0:4] = draw_beta(X,y,estimates[i+1,4])
        
    return pd.DataFrame(estimates, columns=['B0', 'B1', 'B2', 'B3', 'Sigma'])
    
samples = gibbs_sampler(X,y,500000)
display(samples.head())

Unnamed: 0,B0,B1,B2,B3,Sigma
0,0.0,0.0,0.0,0.0,0.0
1,4.679233,-0.348117,-2.476817,0.342874,20.700421
2,3.890466,1.710345,0.071939,0.11823,0.827477
3,3.902835,1.533521,0.207367,0.170906,0.197948
4,3.765064,2.089872,-2e-06,0.159605,0.20576


Estimates

In [116]:
burnin = 10000
cleaned = samples.iloc[burnin:,:]

#no thin value because it makes the estimates less precise, and plotting or limited space are not of interest

print("Estimates of the full sample\n")
display(samples.agg(["mean", "var"]).transpose())

print("\nEstimates cleaned of burnin of ",burnin," samples\n")
display(cleaned.agg(["mean", "var"]).transpose())

print("\nDifference between full sample and cleaned sample\n")

(samples.agg(["mean", "var"])-cleaned.agg(["mean", "var"])).transpose()

Estimates of the full sample



Unnamed: 0,mean,var
B0,4.018096,0.026472
B1,1.217255,0.192008
B2,0.131402,0.019854
B3,0.132278,0.012794
Sigma,0.215273,0.001613



Estimates cleaned of burnin of  10000  samples



Unnamed: 0,mean,var
B0,4.018103,0.026441
B1,1.217236,0.192028
B2,0.131424,0.019838
B3,0.132277,0.012793
Sigma,0.215236,0.000773



Difference between full sample and cleaned sample



Unnamed: 0,mean,var
B0,-7e-06,3.059521e-05
B1,1.9e-05,-1.982134e-05
B2,-2.2e-05,1.544635e-05
B3,1e-06,8.236014e-07
Sigma,3.7e-05,0.0008401857
