In [3]:
from __future__ import division
from ps4_utils import load_data,load_experiment
from ps4_utils import AbstractGenerativeModel
from ps4_utils import save_submission
from scipy.special import logsumexp
import numpy as np
data_fn = "datasets-ps4.h5"
MAX_OUTER_ITER = 15

In [None]:
class MixtureModel(AbstractGenerativeModel):
    def __init__(self, CLASSES, NUM_FEATURES, NUM_MIXTURE_COMPONENTS, MAX_ITER=50, EPS=10**(-7)):
        AbstractGenerativeModel.__init__(self, CLASSES, NUM_FEATURES)
        self.num_mixture_components = NUM_MIXTURE_COMPONENTS # list of num_mixture_components (length num_classes)
        self.max_iter = MAX_ITER # max iterations of EM
        self.epsilon = EPS # help with stability, to be used according to hint given at end of pset4.pdf
        self.params = { # lists of length CLASSES
            'pi': [np.repeat(1/k,k) for k in self.num_mixture_components], # with pi_c for each class
            'theta': [np.zeros((self.num_features,k)) for k in self.num_mixture_components], # with theta_c for each class
        }
    def pack_params(self, X, class_idx):
        pi,theta = self.fit(X[class_idx],class_idx) # fit parameters
        self.params['pi'][class_idx] = pi # update member variable pi
        self.params['theta'][class_idx] = theta #update member variable theta
        
    # make classification based on which mixture model gives higher probability to generating point xi
    def classify(self, X):
        P = list()
        pi = self.params['pi']
        theta = self.params['theta']
        for c in range(self.num_classes):
            _,Pc = self.findP(X, pi[c], theta[c])
            P.append(Pc)
        return np.vstack(P).T.argmax(-1) # np.array of class predictions for each data point in X

    # --- E-step
    def updateLatentPosterior(self, X, pi, theta, num_mixture_components): # update the latent posterior
        # YOUR CODE HERE
        # --- gamma: responsibilities (probabilities), np.array (matrix)
        # ---        shape: number of data points in X (where X consists of datapoints from class c) by NUM_MIXTURE_COMPONENTS[c]
        # note: can use output of findP here (with care taken to return gamma containing proper probabilities)
        return gamma
    
    # --- M-step (1)
    @staticmethod
    def updatePi(gamma): #update the pi component using the posteriors (gammas)
        # YOUR CODE HERE
        # --- pi_c: class specific pi, np.array (vector)
        # ---        shape: NUM_MIXTURE_COMPONENTS[c]
        return pi_c
    
    # -- M-step (2)
    @staticmethod
    def updateTheta(X, gamma): #update theta component using posteriors (gammas)
        # YOUR CODE HERE
        # --- theta_c: class specific theta, np.array matrix
        # ---        shape: NUM_FEATURES by NUM_MIXTURE_COMPONENTS[c]
        return theta_c 
    
    @staticmethod
    def findP(X, pi, theta):
        # YOUR CODE HERE
        # NOTE: you can also use t as a probability, just change "logsumexp(t,axis=1)" to "logsumexp(np.log(t),axis=1)"
        # --- t: logprobabilities of x given each component of mixture
        # ---        shape: number of data points in X (where X consists of datapoints from class c) by NUM_MIXTURE_COMPONENTS[c] 
        # --- logsumexp(t,axis=1): (for convenience) once exponentiated, gives normalization factor over all mixture components
        # ---        shape: number of data points in X (where X consists of datapoints from class c)
        return t, logsumexp(t, axis=1)
        
    # --- execute EM procedure
    def fit(self, X, class_idx):
        max_iter = self.max_iter
        eps = self.epsilon
        N = X.shape[0]
        pi = self.params['pi'][class_idx]
        theta = self.params['theta'][class_idx]
        num_mixture_components = self.num_mixture_components[class_idx]
        # INITIALIZE theta, note theta is currently set to zeros but needs to be officially initialized here
        for i in range(max_iter):
            # YOUR CODE HERE, E-step: gamma = self.updateLatentPosterior
            # YOUR CODE HERE, M-step(1): pi = self.updatePi 
            # YOUR CODE HERE, M-step(2): theta = self.updateTheta
        return pi, theta #pi and theta, given class_idx

In [42]:
class NaiveBayesModel(AbstractGenerativeModel):
    def __init__(self, CLASSES, NUM_FEATURES, EPS=10**(-12)):
        super().__init__(CLASSES, NUM_FEATURES)
        
        # for numerical stability
        self.eps = EPS
        
        # p[i][j] = p (jth feature = 1 | class i)
        self.params = {'p': [np.zeros((NUM_FEATURES))] * self.num_classes} 
        
    def pack_params(self, X, class_idx):
        self.params['p'][class_idx] = self.fit(X[class_idx])
        
    def classify(self, X):
        res = np.zeros(len(X))
        
        # each of these is CxD
        one_prob = np.log(np.array(self.params['p']) + self.eps) 
        zero_prob = np.log(1 + self.eps - np.array(self.params['p']))
        
        # this is NxC
        logits = X @ one_prob.T + (1 - X) @ zero_prob.T
        
        # take maximum along rows
        return np.argmax(logits, axis=1)
    
    def fit(self, X) -> np.ndarray:
        return np.sum(X, axis=0) / len(X)

In [14]:
experiment_name = "sentiment_analysis"
# --- SENTIMENT ANALYSIS setup
Xtrain, Xval, num_classes, num_features = load_experiment(data_fn, experiment_name)

# -- build naive bayes model for sentiment analysis
print("SENTIMENT ANALYSIS -- NAIVE BAYES MODEL:")
nbm = NaiveBayesModel(num_classes, num_features)
nbm.train(Xtrain)
print("ACCURACY ON VALIDATION: " + str(nbm.val(Xval)))

# -- build mixture model for sentiment analysis
print("SENTIMENT ANALYSIS -- MIXTURE MODEL:")
for i in range(MAX_OUTER_ITER):
    num_mixture_components =  np.random.randint(2,15,num_classes)
    print("COMPONENTS: " + " ".join(str(i) for i in num_mixture_components))
    mm = MixtureModel(num_classes, num_features, num_mixture_components)
    mm.train(Xtrain)
    print("ACCURACY ON VALIDATION: " + str(mm.val(Xval)))

# submit to kaggle
Xkaggle = load_data(data_fn, experiment_name, "kaggle")
save_submission("mm-{}-submission.csv".format(experiment_name), mm.classify(Xkaggle))

SENTIMENT ANALYSIS -- NAIVE BAYES MODEL:
(827, 4287)
(1448, 4287)


NameError: name 'predictions' is not defined

In [43]:
experiment_name = "mnist"
# --- MNIST DIGIT CLASSIFICATION setup
Xtrain, Xval, num_classes, num_features = load_experiment(data_fn, experiment_name)

# -- build naive bayes model for mnist digit classification
print("MNIST DIGIT CLASSIFICATION -- NAIVE BAYES MODEL:")
nbm = NaiveBayesModel(num_classes, num_features)
nbm.train(Xtrain)
print("ACCURACY ON VALIDATION: " + str(nbm.val(Xval)))

# -- build mixture model for mnist digit classification
print("MNIST DIGIT CLASSIFICATION -- MIXTURE MODEL:")
for i in range(MAX_OUTER_ITER):
    num_mixture_components =  np.random.randint(2,15,num_classes)
    print("COMPONENTS: " + " ".join(str(i) for i in num_mixture_components))
    mm = MixtureModel(num_classes, num_features, num_mixture_components)
    mm.train(Xtrain)
    print("ACCURACY ON VALIDATION: " + str(mm.val(Xval)))
    
# submit to kaggle
Xkaggle = load_data(data_fn, experiment_name, "kaggle")
save_submission("mm-{}-submission.csv".format(experiment_name), mm.classify(Xkaggle))

MNIST DIGIT CLASSIFICATION -- NAIVE BAYES MODEL:
ACCURACY ON VALIDATION: 0.7355


'\n# -- build mixture model for mnist digit classification\nprint("MNIST DIGIT CLASSIFICATION -- MIXTURE MODEL:")\nfor i in range(MAX_OUTER_ITER):\n    num_mixture_components =  np.random.randint(2,15,num_classes)\n    print("COMPONENTS: " + " ".join(str(i) for i in num_mixture_components))\n    mm = MixtureModel(num_classes, num_features, num_mixture_components)\n    mm.train(Xtrain)\n    print("ACCURACY ON VALIDATION: " + str(mm.val(Xval)))\n    \n# submit to kaggle\nXkaggle = load_data(data_fn, experiment_name, "kaggle")\nsave_submission("mm-{}-submission.csv".format(experiment_name), mm.classify(Xkaggle))\n'