# Homework 2

## Part 1

In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable

In [2]:
v_data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]
vector = torch.Tensor(v_data)
x = Variable(vector, requires_grad=True)

In [3]:
y = x.max()
print(y.data)


 10
[torch.FloatTensor of size 1]



In [4]:
target = Variable(torch.Tensor([12]), requires_grad=True)
error = torch.abs(y - target)
print(error.data)


 2
[torch.FloatTensor of size 1]



Now we have the function in the form `f(x) = max(x) - 12`, where `f(x)` is `error`.

Lets use `error.backward()` to compute all the gradient values in relation to the absolute error.

In [5]:
error.backward()
print(x.grad)

Variable containing:
 0
 0
 0
 0
 0
 0
 0
 0
 0
-1
[torch.FloatTensor of size 10]



Which is correct because the last element in the array was the maximum so only that will have an effect on the error, and it is 1 because when that element changes by one, so will the error.

## Part 2

In [6]:
import torch.nn as nn
import torch.nn.functional as F

* `vocab_size` - number of input words
* `embedding_dim` - dimension of a word vector
* `dropout_prob` - how large part of the layer is being used
* `num_classes` - number of possible outputs
* `sum_rows(x)` - returns the sum of rows, i.e. sum of word embeddings 
* `mean_rows(x)` - similar to sum_rows, but also divides with the number of rows/words

In [7]:
class CbowSum(nn.Module):
    
    def __init__(self, num_classes, vocab_size, embedding_dim, dropout_prob):
        super(CbowSum, self).__init__()
        self.embed = nn.Embedding(vocab_size, embedding_dim)      
        self.lin1 = nn.Linear(embedding_dim, 100)
        self.dropout = nn.Dropout(dropout_prob)
        self.fc = nn.Linear(100, num_classes)
    
    def forward(self, x):
        x = self.embed(x)        
        x = self.sum_rows(x)
        x = F.relu(self.lin1(x))
        x = self.dropout(x)        
        return self.fc(x)    
    
    def sum_rows(self, x):
        return torch.sum(x, 1)

In [8]:
class CbowMean(nn.Module):
    
    def __init__(self, num_classes, vocab_size, embedding_dim, dropout_prob):
        super(CbowMean, self).__init__()
        self.embed = nn.Embedding(vocab_size, embedding_dim)      
        self.lin1 = nn.Linear(embedding_dim, 100)
        self.dropout = nn.Dropout(dropout_prob)
        self.fc = nn.Linear(100, num_classes)
    
    def forward(self, x):
        x = self.embed(x)        
        x = self.mean_rows(x)
        x = F.relu(self.lin1(x))
        x = self.dropout(x)        
        return self.fc(x)    
    
    def mean_rows(self, x):
        n_rows = x.shape[0]
        x = torch.sum(x, 1)
        return torch.div(x, n_rows)

## Part 3

In [9]:
def evaluate_interpolated(data_iter, models):

    for model in models:
        model.eval()
        
    corrects, avg_loss = 0, 0
    for batch in data_iter:
        text, target = batch.text, batch.label
        probabilities = 0

        # subtract one from label ID because we don't have <unk> labels
        target -= 1
        for model in models:
            probabilities = probabilities + F.softmax(model(text), dim=1)
        probabilities = probabilities/len(models)
        
        loss = F.cross_entropy(probabilities, target, size_average=False)
                                   
        avg_loss += loss.data[0]
        corrects += (torch.max(probabilities, 1)[1].view(target.size()).data \
                     == target.data).sum()

    size = len(data_iter.dataset)
    avg_loss /= size
    accuracy = 100.0 * corrects/size
    print('\nEvaluation - loss: {:.6f}  acc: {:.4f}%({}/{}) \n'.format(avg_loss, 
                                                                     accuracy, 
                                                                     corrects, 
                                                                     size))
    return accuracy


In [None]:
class CnnText(nn.Module):
  
    def __init__(self, num_classes, vocab_size, embedding_dim, dropout_prob):
        super(CnnText, self).__init__()
        self.embed = nn.Embedding(vocab_size, embedding_dim)
        self.conv1 = nn.Conv1d(embedding_dim, 32, kernel_size=3, stride=1)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=3, stride=1)
        self.conv3 = nn.Conv1d(64, 64, kernel_size=3, stride=1)
        self.dropout = nn.Dropout(dropout_prob)
        self.fc = nn.Linear(64, num_classes)
    
    def forward(self, x):
        # Conv1d takes in (batch, channels, seq_len), but raw embedded is (batch, seq_len, channels)
        x = self.embed(x).permute(0, 2, 1)
        #print(x.shape)
        x = F.relu(self.conv1(x))
        #print(x.shape)
        x = F.max_pool1d(x, 2)
        #print(x.shape)
        x = F.relu(self.conv2(x))
        #print(x.shape)
        x = F.relu(self.conv3(x))
        #print(x.shape)
        x = F.max_pool1d(x, x.size(2))
        #print(x.shape)
        x = x.view(-1, 64)
        #print(x.shape)
        x = self.dropout(x) 
        logit = self.fc(x)
        return logit