### Affine Maps

a function f(x) where f(x) = Ax + b for matrix A and vectors x,b.

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x7fda47ab0490>

In [7]:
lin = nn.Linear(5, 3)
data = torch.randn(2, 5)
print(data)

tensor([[ 0.3133, -1.1352,  0.3773, -0.2824, -2.5667],
        [-1.4303,  0.5009,  0.5438, -0.4057,  1.1341]])


In [8]:
print(lin(data))

tensor([[ 0.7629, -0.9630,  0.0438],
        [ 0.2494,  0.6150,  1.1286]])


### Non Linearities
#### Most common ones:
    * tanh(x)
    * sigmoid(x)
    * ReLU(x)

In [12]:
data = torch.randn(2, 2)
print(data)
print(F.relu(data))

tensor([[ 0.0939,  1.2381],
        [-1.3459,  0.5119]])
tensor([[ 0.0939,  1.2381],
        [ 0.0000,  0.5119]])


### Logistic Regression Bag-of-Words classifier

In [15]:
data = [("gano el mundial francia".split(), "SPANISH"),
        ("give me the ball".split(), "ENGLISH"),
        ("no creo que sea una buena idea".split(), "SPANISH"),
        ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]

test_data = [("Yo creo que si".split(), "SPANISH"),
             ("I sure hope so".split(), "ENGLISH")]

word_to_idx = {}
for sent, _ in data + test_data:
    for word in sent:
        if word not in word_to_idx:
            word_to_idx[word] = len(word_to_idx)
print(word_to_idx)

VOCAB_SIZE = len(word_to_idx)
NUM_LABELS = 2

class BoWClassifier(nn.Module):
    def __init__(self, num_labels, vocab_size):
        super(BoWClassifier, self).__init__()
        
        self.linear = nn.Linear(vocab_size, num_labels)
        
    def forward(self, bow_vec):
        return F.log_softmax(self.linear(bow_vec), dim=1)

def make_bow_vector(sentence, word_to_idx):
    vec = torch.zeros(len(word_to_idx))
    for word in sentence:
        vec[word_to_idx[word]] += 1
    return vec.view(1, -1)

def make_target(label, label_to_idx):
    return torch.LongTensor([label_to_idx[label]])

model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)

for param in model.parameters():
    print(param)
    
with torch.no_grad():
    sample = data[0]
    bow_vector = make_bow_vector(sample[0], word_to_idx)
    log_probs = model(bow_vector)
    print(log_probs)

{'gano': 0, 'el': 1, 'mundial': 2, 'francia': 3, 'give': 4, 'me': 5, 'the': 6, 'ball': 7, 'no': 8, 'creo': 9, 'que': 10, 'sea': 11, 'una': 12, 'buena': 13, 'idea': 14, 'No': 15, 'it': 16, 'is': 17, 'not': 18, 'a': 19, 'good': 20, 'to': 21, 'get': 22, 'lost': 23, 'at': 24, 'Yo': 25, 'si': 26, 'I': 27, 'sure': 28, 'hope': 29, 'so': 30}
Parameter containing:
tensor([[-0.0208, -0.0062, -0.1695, -0.1156, -0.1049, -0.0768,  0.1277,
         -0.0587, -0.1342,  0.0691,  0.0575,  0.1163, -0.0929,  0.0389,
         -0.0654, -0.0404, -0.1431, -0.0819, -0.0550,  0.0768,  0.0328,
          0.0444,  0.1793,  0.1750,  0.1225,  0.0057, -0.1242,  0.1404,
         -0.0449, -0.0145, -0.1547],
        [-0.0355, -0.1158,  0.1650, -0.1553, -0.1400, -0.0061, -0.0971,
          0.0643, -0.0691, -0.0844,  0.0102,  0.1300, -0.1263,  0.0843,
          0.1154,  0.1757, -0.1257,  0.0435, -0.1328,  0.1533, -0.0697,
          0.1082,  0.0053, -0.0140, -0.0057,  0.0305,  0.0847,  0.0288,
          0.0548, -0.1616,  0

In [16]:
label_to_idx = {"SPANISH": 0, "ENGLISH": 1}

# run on test data before training to see before and after
with torch.no_grad():
    for instance, label in test_data:
        bow_vec = make_bow_vector(instance, word_to_idx)
        log_probs = model(bow_vec)
        print(log_probs)
        
print(next(model.parameters())[:, word_to_idx["creo"]])

loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

for epoch in range(100):
    for instance, label in data:
        # clear gradients
        model.zero_grad()
        
        bow_vec = make_bow_vector(instance, word_to_idx)
        target = make_target(label, label_to_idx)
        
        log_probs = model(bow_vec)
        
        loss = loss_function(log_probs, target)
        loss.backward()
        optimizer.step()
        
with torch.no_grad():
    for instance, label in test_data:
        bow_vec = make_bow_vector(instance, word_to_idx)
        log_probs = model(bow_vec)
        print(log_probs)
        
print(next(model.parameters())[:, word_to_idx["creo"]])

tensor([[-0.7056, -0.6808]])
tensor([[-0.7542, -0.6356]])
tensor(1.00000e-02 *
       [ 6.9103, -8.4358])
tensor([[-0.1498, -1.9726]])
tensor([[-0.6581, -0.7295]])
tensor([ 0.4835, -0.4988])
