In [8]:
%matplotlib inline
from scipy import stats
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn

seaborn.set_context("talk")
np.set_printoptions(precision=4)

In [None]:
"""
1.
"""
def p12():
    x = np.linspace(0, 15, 1000)
    fig, ax = plt.subplots(1, 1)

    deg_of_freedom = [1, 2, 5, 10, 25, 50, 100]
    for df in deg_of_freedom:
      ax.plot(x, stats.chi.pdf(x, df), label=r'$df=%i$' % df)

    plt.xlim(0, 15)
    plt.ylim(0, 1)

    plt.xlabel('$x$')
    plt.ylabel(r'$f(x)$')
    plt.title(r'$\chi\ \mathrm{Distribution}$')

    plt.legend()
    plt.show()
    
def p13(df):
    x = np.linspace(stats.chi.ppf(0.001, df), stats.chi.ppf(0.999, df), 100)
    rv = stats.chi(df)
        
    plt.figure()
    plt.plot(x, rv.cdf(x))
    plt.xlabel('$x$')
    plt.ylabel(r'$F(x)$')
    plt.title(r'$\chi\ \mathrm{Distribution}$')
    plt.show()

p12()
p13(df=100)

In [None]:
"""
4.
""" 
class NaiveBayesClassifier:
    def __init__(self, alpha, beta, n_features):
        # 1 x C vector; dirichlet prior for class distr.
        self.alpha = alpha 
        self.alpha0 = sum(alpha)
        
        # 1 x K vector; dirichlet prior for class conditional distr.
        self.beta = beta   
        self.beta0 = sum(beta)
        
        # dimensions of data
        self.C = len(self.alpha)
        self.K = len(self.beta)
        self.D = n_features
        
        # counts
        self.N = 0
        self.N_c = np.zeros(self.C, dtype=int)
        self.N_cj = np.zeros((self.C, self.D), dtype=int)
        self.N_ckj = np.zeros((self.C, self.K, self.D), dtype=int)
        
        self.flushed = False
        
    def fit(self, X, y):
        X = X.astype(int)
        N, _D = X.shape
        self.N += N
        
        print("Fitting model")
        for c in range(self.C):
            msk = y == c
            self.N_c[c] += np.sum(msk)
            self.N_cj[c] += np.sum(X[msk], dtype=int, axis=0)
            self.N_ckj[c] += np.apply_along_axis(np.bincount, 0, X[msk], minlength=self.K)

        self.flushed = False
        
    def predict(self, X):
        X = X.astype(int)
        
        if not self.flushed:
            print("Flushing")
            self.pi = np.array([
                np.log(self.N_c[c] + self.alpha[c]) - np.log(self.N + self.alpha0)
                for c in range(self.C)])
            self.mu = np.fromfunction(
                lambda c, j, k: np.log(self.N_ckj[c, k, j] + self.beta[c]) - np.log(self.N_c[c] + self.beta0),
                (self.C, self.D, self.K), dtype=int
            )
            self.flushed = True
        
        print("Predicting labels")
        p_for_x = lambda x: [self.pi[c] + np.sum([self.mu[c, j, x[j]] for j in range(len(x))]) for c in range(self.C)]
        ps = np.apply_along_axis(p_for_x, 1, X)
        return np.apply_along_axis(np.argmax, 1, ps)
        
import utils
    
def p4():
    # load data
    train_iter, val_iter, test_iter, text_field = utils.load_imdb(batch_size=1000)
    
    # initialize classifier
    alpha = np.ones(2)
    beta = np.ones(281 + 1)
    n_features = 245703
    nb = NaiveBayesClassifier(alpha, beta, n_features)
    
    # train
    i = 0
    for batch in train_iter:
        if i > 1: break
        print(i)
        X = utils.bag_of_words(batch, text_field).data.numpy()
        y = batch.label.data.numpy() - 1
        nb.fit(X, y)
        i += 1

    # test
    n, n_corr = 0, 0
    i = 0
    for batch in test_iter:
        print(i)
        
        X = utils.bag_of_words(batch, text_field).data.numpy()
        y_pred = nb.predict(X)
        y = batch.label.data.numpy() - 1     
        
        n += len(y)
        n_err += sum(abs(y_pred - y))       
        i += 1
        print(1 - n_err / n)
    
    return nb, 1 - n_err / n

In [22]:
"""
5. & 6.
"""
import utils
import torch
from torch.autograd import Variable
from torch import optim
from collections import OrderedDict

def build_model(input_dim, output_dim):
    model = torch.nn.Sequential(OrderedDict([
                ("linear", torch.nn.Linear(input_dim, output_dim)), # computes w_c^T x + b_c 
#                 ('tanh', torch.nn.Tanh()), # tanh
                ('relu', torch.nn.ReLU()), # relu
                ("softmax", torch.nn.LogSoftmax()) # log softmax term
            ]))
    return model

def train(model, loss, reg_weight, optimizer, x_val, y_val):
    # Take in x and y and make variable.
    x = Variable(x_val)
    y = Variable(y_val)

    # Resets the gradients to 0
    optimizer.zero_grad()

    # Computes the function above. (log softmax w_c^T x + b_c)
    fx = model.forward(x)

    # Computes loss. Gives a scalar. 
    output = loss.forward(fx, y)
    l1_crit = torch.nn.L1Loss(size_average=False)
    target = Variable(torch.zeros(2,245703), requires_grad=False)
    param = next(model.parameters())
    reg_loss = l1_crit(param, target)
    output += reg_weight * reg_loss

    # Magically computes the gradients. 
    output.backward()

    # updates the weights
    optimizer.step()
    return output.data[0]

def predict(model, x_val):
    x = Variable(x_val, requires_grad=False)
    output = model.forward(x)
    return output.data.numpy().argmax(axis=1)

def main():
    torch.manual_seed(42)
    n_features = 245703
    n_classes = 2
    reg_weights = [0,0.001,0.01,0.1,1]
    
    for reg_weight in reg_weights:
        train_iter, val_iter, test_iter, text_field = utils.load_imdb(batch_size=1000)

        # build model
        model = build_model(n_features, n_classes)

        # Loss here is negative log-likelihood 
        loss = torch.nn.NLLLoss(size_average=True)

        # Optimizer. SGD stochastic gradient. 
        optimizer = optim.Adam(model.parameters())

        cost = 0.
        num_batches = 0
        for batch in train_iter:
            X = utils.bag_of_words(batch, text_field).data
            y = batch.label.data - 1
            cost += train(model, loss, reg_weight, optimizer, X, y)
            num_batches += 1
            
        n, n_corr = 0, 0
        for batch in test_iter:
            X = utils.bag_of_words(batch, text_field).data
            y = batch.label.data - 1
            y_pred = predict(model, X)
            
            n += len(y.numpy())
            n_corr += sum(y_pred == y.numpy())

        print("Lambda %f, cost = %f, acc = %.2f%%"
              % (reg_weight, cost / num_batches, 100. * n_corr / n))
        weights = next(model.parameters()).data.numpy()
        heaviest = [[text_field.vocab.itos[word_id] for word_id in np.argsort(w)[-5:][::-1]] for w in weights]
        lightest = [[text_field.vocab.itos[word_id] for word_id in np.argsort(w)[:5][::-1]] for w in weights]
        sparsity = np.sum(np.abs(weights) < 1e-4) / (weights.shape[0] * weights.shape[1])
        print(">>> Heaviest Words\nClass 0: {}\nClass 1: {}".format(heaviest[0], heaviest[1]))
        print(">>> Lightest Words\nClass 0: {}\nClass 1: {}".format(lightest[0], lightest[1]))
        print(">>> Sparsity: {}".format(sparsity))

In [19]:
main()

Loading Data
2
1




Lambda 0.000000, cost = 0.591833, acc = 85.40%
>>> Heaviest Words
Class 0: ['boring', 'wasted', 'waste', 'guess', 'horrible']
Class 1: ['excellent', 'entertaining', 'favorite', 'loved', 'shows']
>>> Lightest Words
Class 0: ['great.', 'excellent', 'wonderful.', 'favorite', 'loved']
Class 1: ['poor', 'save', 'boring', 'horrible', 'waste']
>>> Sparsity: 0.011467096453848752
Loading Data
2
1


KeyboardInterrupt: 