Preparing the data

In [1]:
import numpy as np
import pandas as pd
from scipy.stats import norm

data = pd.read_csv("data425288.csv")

T = 25
N = data.ID.max()

y = np.log(data.Sales.values.reshape((N,T)))

X = np.ones((T,2))
X[:,1] = np.log(data.Price[:T])

Log-likelihood evaluation

In [282]:
def LogL(theta, pi, y, X):
    mu = np.dot(X,theta.T) #(25, 3)
    mu = np.repeat(mu[np.newaxis, :, :], N, axis=0)  #(500,25,K)
    y = np.repeat(y[:, :, np.newaxis], K, axis=2)  #(500,25,K)
    
    #pdfs
    probs = norm.pdf(y,mu,1) #(500,25,K)
    
    #prod^T probs
    segments = np.prod(probs, axis=1) #(500,K)
    
    #sum^K pi_c prod^T probs
    combined = np.dot(segments, pi) #(500,)
    
    #sum^N log sum^K pi_c prod^T probs
    LogL = np.log(combined).sum() #(1,)
    return LogL

Expectation step

In [296]:
def EStep(theta,pi,y,X):
    mu = np.dot(X,theta.T) #(25, K)
    mu = np.repeat(mu[np.newaxis, :, :], N, axis=0)  #(500,25,K)
    y = np.repeat(y[:, :, np.newaxis], K, axis=2)  #(500,25,K)

    #pdfs
    probs = norm.pdf(y,mu,1) #(500,25,K)
    
    #prod^T probs
    segments = np.prod(probs, axis=1) #(500,K)
    
    # prod^T probs times diagonal of pi
    numerators = np.dot(segments, np.diag(pi))
    
    #divide numeratats by denominators (= sum of row)
    W = numerators / numerators.sum(axis=1, keepdims=True)
    
    return W

W = EStep(theta, pi, y, X)
display(W.shape)
display(W)

  app.launch_new_instance()


(500, 3)

array([[nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan],
       ...,
       [nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan]])

Maximization step

In [291]:
def MStep(W,y,X):
    # 1/N sum^N w_ic
    pi = W.mean(axis=0) #(K,)
    
    # sum^N w_ic sum^T log S_it
    numerator = np.dot(y.sum(axis=1), W) #(K,)
    
    #  / T * sum^N w_ic
    alpha = numerator / T * W.sum(axis=0)
    
    # / sum^N w_ic * sum^T log p_t (scalar)
    beta =  numerator /  W.sum(axis=0) - X[:,1].sum()
    
    # [a', b']
    theta = np.stack((alpha, beta), axis=-1)
    
    return theta, pi
    
theta, pi = MStep(W,y,X)

display(pi)    

display(theta)    



array([1.28456541e-62, 1.00000000e+00, 1.41244539e-48])

array([[3.42025539e-118, 1.82517534e+002],
       [2.33222867e+006, 2.08465800e+002],
       [4.12633583e-090, 1.82076605e+002]])

EM algorithm

In [297]:
#test params
K = 3

np.random.seed(1234)
alpha = np.random.rand(K) * 3 + 3
beta = np.random.rand(K) - 1
theta = np.stack((alpha, beta), axis=-1)

pi = np.ones(K)/K

    
likelihood = LogL(theta, pi, y, X)
display(likelihood.shape)
display(likelihood)

()

-158825.53215931126

In [None]:
def EM(K,y,X):
    
    

Estimation implementation

In [None]:
def Estimate(K, X=X, y=y, seed=1234):