In [3]:
from __future__ import division
from ps4_utils import load_data,load_experiment
from ps4_utils import AbstractGenerativeModel
from ps4_utils import save_submission
from scipy.special import logsumexp
import numpy as np
data_fn = "datasets-ps4.h5"
MAX_OUTER_ITER = 15

In [100]:
jwn = np.array([[1,2,3],[4,5,6]])
print(np.sum(jwn, axis=0))

[5 7 9]


In [101]:
class MixtureModel(AbstractGenerativeModel):
    def __init__(self, CLASSES, NUM_FEATURES, NUM_MIXTURE_COMPONENTS, MAX_ITER=50, EPS=1e-7):
        super().__init__(CLASSES, NUM_FEATURES)
        
        self.epsilon = EPS              # clamp theta in [eps, 1-eps]
        self.max_iter = MAX_ITER        # iterations of EM algo
        self.num_mixture_components = NUM_MIXTURE_COMPONENTS
        
        self.params = {}
        self.params['pi'] = [np.repeat(1/k, k) for k in self.num_mixture_components] # mixture probabilities
        self.params['theta'] = [np.zeros((self.num_features, k)) for k in self.num_mixture_components] # component probabilities
        
    def pack_params(self, X, class_idx):
        pi, theta = self.fit(X[class_idx], class_idx)
        self.params['pi'][class_idx] = pi
        self.params['theta'][class_idx] = theta
        
    def classify(self, X):
        P = list()
        pi = self.params['pi']
        theta = self.params['theta']
        for c in range(self.num_classes):
            _, Pc = self.findP(X, pi[c], theta[c])
            P.append(Pc)
        return np.vstack(P).T.argmax(-1)

    # --- E-step
    def updateLatentPosterior(self, X, pi, theta, _):
        logits, norm = self.findP(X, pi, theta)
        return np.exp(logits - norm[:, np.newaxis]) 
    
    # --- M-step (1)
    @staticmethod
    def updatePi(gamma):
        return np.sum(gamma, axis=0) / len(gamma)
    
    # -- M-step (2)
    @staticmethod
    def updateTheta(X, gamma):
        return X.T @ gamma / np.sum(gamma, axis=0)
    
    @staticmethod
    def findP(X, pi, theta):
        res = np.repeat([np.log(pi)], len(X), axis=0) # NxK
        
        one_prob = np.log(theta)                      # DxK
        zero_prob = np.log(1 - theta)                 # DxK
        res += X @ one_prob + (1 - X) @ zero_prob     # NxK
        
        return res, logsumexp(res, axis=1)            # NxK, N
    
    @staticmethod
    def randomAssignment(X, nmm):
        theta = np.zeros((nmm, len(X[0])))
        
        # imperative for now
        for x in X:
            theta[np.random.randint(0, nmm)] += x
            
        return (theta / np.sum(theta, axis=1)[:, np.newaxis]).T
        
    # --- execute EM procedure
    def fit(self, X, class_idx):
        N = X.shape[0]
        
        nmm = self.num_mixture_components[class_idx]
        pi = self.params['pi'][class_idx]
        theta = np.clip(self.randomAssignment(X, nmm), self.epsilon, 1 - self.epsilon)
        
        for i in range(self.max_iter):
            gamma = self.updateLatentPosterior(X, pi, theta, nmm)
            pi = self.updatePi(gamma)
            theta = np.clip(self.updateTheta(X, gamma), self.epsilon, 1-self.epsilon)
            
        return pi, theta

In [65]:
class NaiveBayesModel(AbstractGenerativeModel):
    def __init__(self, CLASSES, NUM_FEATURES, EPS=1e-12):
        super().__init__(CLASSES, NUM_FEATURES)
        
        # for numerical stability
        self.eps = EPS
        
        # p[i][j] = p (jth feature = 1 | class i)
        self.params = {'p': [np.zeros((NUM_FEATURES))] * self.num_classes} 
        
    def pack_params(self, X, class_idx):
        self.params['p'][class_idx] = self.fit(X[class_idx])
        
    def classify(self, X):
        res = np.zeros(len(X))
        
        # each of these is CxD
        one_prob = np.log(np.array(self.params['p']) + self.eps) 
        zero_prob = np.log(1 + self.eps - np.array(self.params['p']))
        
        # this is NxC
        logits = X @ one_prob.T + (1 - X) @ zero_prob.T
        
        # take maximum along rows
        return np.argmax(logits, axis=1)
    
    def fit(self, X) -> np.ndarray:
        return np.sum(X, axis=0) / len(X)

In [102]:
experiment_name = "sentiment_analysis"
# --- SENTIMENT ANALYSIS setup
Xtrain, Xval, num_classes, num_features = load_experiment(data_fn, experiment_name)

# -- build naive bayes model for sentiment analysis
print("SENTIMENT ANALYSIS -- NAIVE BAYES MODEL:")
nbm = NaiveBayesModel(num_classes, num_features)
nbm.train(Xtrain)
print("ACCURACY ON VALIDATION: " + str(nbm.val(Xval)))

# -- build mixture model for sentiment analysis
print("SENTIMENT ANALYSIS -- MIXTURE MODEL:")
for i in range(MAX_OUTER_ITER):
    num_mixture_components =  np.random.randint(2,15,num_classes)
    print("COMPONENTS: " + " ".join(str(i) for i in num_mixture_components))
    mm = MixtureModel(num_classes, num_features, num_mixture_components)
    mm.train(Xtrain)
    print("ACCURACY ON VALIDATION: " + str(mm.val(Xval)))

# submit to kaggle
Xkaggle = load_data(data_fn, experiment_name, "kaggle")
save_submission("mm-{}-submission.csv".format(experiment_name), mm.classify(Xkaggle))

SENTIMENT ANALYSIS -- NAIVE BAYES MODEL:
ACCURACY ON VALIDATION: 0.74
SENTIMENT ANALYSIS -- MIXTURE MODEL:
COMPONENTS: 12 5
ACCURACY ON VALIDATION: 0.714
COMPONENTS: 6 9
ACCURACY ON VALIDATION: 0.68
COMPONENTS: 8 6
ACCURACY ON VALIDATION: 0.712
COMPONENTS: 14 4
ACCURACY ON VALIDATION: 0.714
COMPONENTS: 4 13
ACCURACY ON VALIDATION: 0.696
COMPONENTS: 8 6
ACCURACY ON VALIDATION: 0.742
COMPONENTS: 4 3
ACCURACY ON VALIDATION: 0.712
COMPONENTS: 4 6
ACCURACY ON VALIDATION: 0.746
COMPONENTS: 12 9
ACCURACY ON VALIDATION: 0.698
COMPONENTS: 12 11
ACCURACY ON VALIDATION: 0.736
COMPONENTS: 9 13
ACCURACY ON VALIDATION: 0.728
COMPONENTS: 8 9
ACCURACY ON VALIDATION: 0.71
COMPONENTS: 14 14
ACCURACY ON VALIDATION: 0.71
COMPONENTS: 7 7
ACCURACY ON VALIDATION: 0.718
COMPONENTS: 13 3
ACCURACY ON VALIDATION: 0.714
Saved: mm-sentiment_analysis-submission.csv


In [103]:
experiment_name = "mnist"
# --- MNIST DIGIT CLASSIFICATION setup
Xtrain, Xval, num_classes, num_features = load_experiment(data_fn, experiment_name)

# -- build naive bayes model for mnist digit classification
print("MNIST DIGIT CLASSIFICATION -- NAIVE BAYES MODEL:")
nbm = NaiveBayesModel(num_classes, num_features)
nbm.train(Xtrain)
print("ACCURACY ON VALIDATION: " + str(nbm.val(Xval)))
print()

# -- build mixture model for mnist digit classification
print("MNIST DIGIT CLASSIFICATION -- MIXTURE MODEL:")
for i in range(MAX_OUTER_ITER):
    num_mixture_components =  np.random.randint(2, 15, num_classes)
    print("COMPONENTS: " + " ".join(str(i) for i in num_mixture_components))
    mm = MixtureModel(num_classes, num_features, num_mixture_components)
    mm.train(Xtrain)
    print("ACCURACY ON VALIDATION: " + str(mm.val(Xval)))
    
# submit to kaggle
Xkaggle = load_data(data_fn, experiment_name, "kaggle")
save_submission("mm-{}-submission.csv".format(experiment_name), mm.classify(Xkaggle))

MNIST DIGIT CLASSIFICATION -- NAIVE BAYES MODEL:
ACCURACY ON VALIDATION: 0.7355

MNIST DIGIT CLASSIFICATION -- MIXTURE MODEL:
COMPONENTS: 3 7 14 12 9 11 14 8 8 10
ACCURACY ON VALIDATION: 0.7865
COMPONENTS: 5 14 6 13 3 4 9 14 2 8
ACCURACY ON VALIDATION: 0.779
COMPONENTS: 2 7 9 10 5 8 8 9 7 9
ACCURACY ON VALIDATION: 0.7855
COMPONENTS: 12 4 13 5 14 3 14 14 13 6
ACCURACY ON VALIDATION: 0.7615
COMPONENTS: 9 4 4 7 2 6 3 8 7 14
ACCURACY ON VALIDATION: 0.7755
COMPONENTS: 12 13 5 7 14 4 13 2 5 7
ACCURACY ON VALIDATION: 0.771
COMPONENTS: 14 10 8 2 2 2 14 5 12 11
ACCURACY ON VALIDATION: 0.757
COMPONENTS: 13 10 9 7 8 13 3 12 12 14
ACCURACY ON VALIDATION: 0.772
COMPONENTS: 9 12 5 3 9 12 13 10 14 5
ACCURACY ON VALIDATION: 0.7705
COMPONENTS: 5 3 9 14 2 10 2 11 8 7
ACCURACY ON VALIDATION: 0.781
COMPONENTS: 2 5 3 7 10 12 2 4 10 14
ACCURACY ON VALIDATION: 0.761
COMPONENTS: 9 11 9 13 9 5 7 9 9 8
ACCURACY ON VALIDATION: 0.7855
COMPONENTS: 14 8 14 11 8 13 4 8 6 10
ACCURACY ON VALIDATION: 0.775
COMPONENTS: 