# Day 2

## Exercise 2.1

In [1]:
import lxmls.readers.sentiment_reader as srs
from lxmls.deep_learning.utils import AmazonData

corpus = srs.SentimentCorpus("books")
data = AmazonData(corpus=corpus)

corpus, data

(<lxmls.readers.sentiment_reader.SentimentCorpus at 0x74e75eb13290>,
 <lxmls.deep_learning.utils.AmazonData at 0x74e75ebd82f0>)

In [2]:
import numpy as np
from lxmls.deep_learning.utils import glorot_weight_init, index2onehot
from lxmls.deep_learning.utils import logsumexp, Model

class NumpyLogLinear(Model):
    def __init__(self, **config):
        # Initialize parameters
        weight_shape = (config['input_size'], config['num_classes'])

        # after Xavier Glorot et al
        self.weight = glorot_weight_init(weight_shape, 'softmax')
        self.bias = np.zeros((1, config['num_classes']))
        self.learning_rate = config['learning_rate']

    def log_forward(self, input=None):
        """Forward pass of the computation graph"""

        # Linear transformation
        z = np.dot(input, self.weight.T) + self.bias

        # Softmax implemented in log domain
        log_tilde_z = z - logsumexp(z, axis=1, keepdims=True)

        return log_tilde_z
    
    def predict(self, input=None):
        """Prediction: most probable class index"""
        return np.argmax(np.exp(self.log_forward(input)), axis=1)
    
    def update(self, input=None, output=None):
        """Stochastic Gradient Descent update"""

        # Probabilities of each class
        # Extract probabilities to use them later
        class_probabilities = np.exp(self.log_forward(input))
        batch_size, num_classes = class_probabilities.shape

        # Error derivative at softmax layer
        # Calculate the gradient of the loss regarding logits
        I = index2onehot(output, num_classes)
        error = (class_probabilities - I) / batch_size

        # Weight gradient
        # Calculate the gradient of the loss regarding weights
        # Accumulate the gradient of the loss regarding logits with the
        # gradient of the logits regarding weights
        gradient_weight = np.zeros(self.weight.shape)
        for l in range(batch_size):
            gradient_weight += np.outer(error[l, :], input[l, :])

        # Bias gradient
        # Calculate the gradient of the loss regarding bias
        gradient_bias = np.sum(error, axis=0, keepdims=True)

        # SGD update
        # Update weights and bias with the gradients
        self.weight = self.weight - self.learning_rate * gradient_weight
        self.bias = self.bias - self.learning_rate * gradient_bias

In [3]:
# Instantiate model
model = NumpyLogLinear(
    input_size=corpus.nr_features,
    num_classes=2,
    learning_rate=0.05
)

# Define number of epochs and batch size
num_epochs = 10
batch_size = 30

# Instantiate data iterators
train_batches = data.batches('train', batch_size=batch_size)
test_set = data.batches('test', batch_size=None)[0]

# Check initial accuracy
hat_y = model.predict(input=test_set['input'])
accuracy = 100*np.mean(hat_y == test_set['output'])
print("Initial accuracy %2.2f %%" % accuracy)

Initial accuracy 51.25 %


In [4]:
# Epoch loop
for epoch in range(num_epochs):
    # Batch loop
    for batch in train_batches:
        model.update(input=batch['input'], output=batch['output'])
        # Prediction for this epoch
        hat_y = model.predict(input=test_set['input'])
        # Evaluation
        accuracy = 100*np.mean(hat_y == test_set['output'])
        print("Epoch %d: accuracy %2.2f %%" % (epoch+1, accuracy))

Epoch 1: accuracy 52.75 %
Epoch 1: accuracy 52.25 %
Epoch 1: accuracy 53.25 %
Epoch 1: accuracy 51.25 %
Epoch 1: accuracy 56.00 %
Epoch 1: accuracy 57.25 %
Epoch 1: accuracy 55.25 %
Epoch 1: accuracy 57.00 %
Epoch 1: accuracy 56.75 %
Epoch 1: accuracy 57.25 %
Epoch 1: accuracy 57.00 %
Epoch 1: accuracy 57.50 %
Epoch 1: accuracy 60.00 %
Epoch 1: accuracy 59.75 %
Epoch 1: accuracy 59.25 %
Epoch 1: accuracy 59.50 %
Epoch 1: accuracy 60.50 %
Epoch 1: accuracy 62.50 %
Epoch 1: accuracy 62.00 %
Epoch 1: accuracy 63.00 %
Epoch 1: accuracy 62.75 %
Epoch 1: accuracy 63.50 %
Epoch 1: accuracy 63.00 %
Epoch 1: accuracy 63.00 %
Epoch 1: accuracy 65.00 %
Epoch 1: accuracy 66.50 %
Epoch 1: accuracy 66.25 %
Epoch 1: accuracy 60.75 %
Epoch 1: accuracy 62.75 %
Epoch 1: accuracy 67.25 %
Epoch 1: accuracy 66.50 %
Epoch 1: accuracy 64.50 %
Epoch 1: accuracy 69.00 %
Epoch 1: accuracy 67.50 %
Epoch 1: accuracy 68.00 %
Epoch 1: accuracy 68.00 %
Epoch 1: accuracy 67.00 %
Epoch 1: accuracy 68.25 %
Epoch 1: acc