In [2]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x7f32908ce588>

In [2]:
lin = nn.Linear(5, 3) # maps x from R^5 to R^3
data = autograd.Variable(torch.randn(2, 5)) # shape (2, 5)
print(lin(data)) # shape (2, 3)

Variable containing:
 0.3130  0.2576  1.3546
 1.0007  0.6433  0.4951
[torch.FloatTensor of size 2x3]



In [3]:
data = autograd.Variable(torch.randn(2, 2)) # shape(2, 2)
print(data)
print(F.relu(data))

Variable containing:
 1.2182  0.2117
-1.0613 -1.9441
[torch.FloatTensor of size 2x2]

Variable containing:
 1.2182  0.2117
 0.0000  0.0000
[torch.FloatTensor of size 2x2]



In [4]:
data = autograd.Variable(torch.randn(5))
print(data)
print(F.softmax(data))
print(F.softmax(data).sum())
print(F.log_softmax(data))

Variable containing:
-0.9596
 0.5489
-0.9901
-0.3826
 1.5037
[torch.FloatTensor of size 5]

Variable containing:
 0.0500
 0.2258
 0.0485
 0.0890
 0.5868
[torch.FloatTensor of size 5]

Variable containing:
 1
[torch.FloatTensor of size 1]

Variable containing:
-2.9964
-1.4880
-3.0269
-2.4195
-0.5331
[torch.FloatTensor of size 5]



In [3]:
data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
        ("Give it to me".split(), "ENGLISH"),
        ("No creo que sea una buena idea".split(), "SPANISH"),
        ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]

test_data = [("Yo creo que si".split(), "SPANISH"),
             ("it is lost on me".split(), "ENGLISH")]

word2idx = {}
for sent, _ in data + test_data:
    for word in sent:
        if word not in word2idx:
            word2idx[word] = len(word2idx)
print(word2idx)

vocab_size = len(word2idx) # 26
n_labels = 2

class BowClassifier(nn.Module): # inheriting from nn.Module
    def __init__(self, n_labels, vocab_size):
        super(BowClassifier, self).__init__()
        self.linear = nn.Linear(vocab_size, n_labels) # maps input from R^vocab_size(26) to R^n_labels(2)
    
    def forward(self, bow_vec):
        return F.log_softmax(self.linear(bow_vec)) # shape (1, n_labels)

def make_bow_vec(sent, word2idx):
    vec = torch.zeros(len(word2idx)) # shape (vocab_size)
    for word in sent:
        vec[word2idx[word]] += 1
    return vec.view(1, -1) # shape (1, vocab_size)

def make_target(label, label2idx):
    return torch.LongTensor([label2idx[label]]) # shape (1)

model = BowClassifier(n_labels, vocab_size)

for param in model.parameters():
    print(param) # shape (2, 26) and (2)
    
sample = data[0] # (sent, label)
bow_vec = make_bow_vec(sample[0], word2idx) # shape (1, 26)
log_probs = model(autograd.Variable(bow_vec)) # shape (1, 2)
print(log_probs)


{'me': 0, 'gusta': 1, 'comer': 2, 'en': 3, 'la': 4, 'cafeteria': 5, 'Give': 6, 'it': 7, 'to': 8, 'No': 9, 'creo': 10, 'que': 11, 'sea': 12, 'una': 13, 'buena': 14, 'idea': 15, 'is': 16, 'not': 17, 'a': 18, 'good': 19, 'get': 20, 'lost': 21, 'at': 22, 'Yo': 23, 'si': 24, 'on': 25}
Parameter containing:

Columns 0 to 9 
-0.0325  0.1950  0.0864  0.1697 -0.1961 -0.1459 -0.0775  0.1957 -0.1386 -0.1035
 0.1483 -0.1061 -0.1854  0.0135  0.0669  0.1624 -0.0324 -0.0168  0.0230 -0.0272

Columns 10 to 19 
-0.1599 -0.0406 -0.1231 -0.0440 -0.0606  0.0666 -0.0405  0.1708  0.0152  0.1358
-0.1411  0.1722 -0.1184  0.1092  0.1180  0.0847  0.1837  0.1188 -0.0732 -0.1597

Columns 20 to 25 
-0.0317 -0.0732  0.0726  0.0096 -0.1159 -0.0222
 0.0754  0.0071  0.1476  0.1432  0.1548  0.1291
[torch.FloatTensor of size 2x26]

Parameter containing:
-0.1628
 0.1293
[torch.FloatTensor of size 2]

Variable containing:
-0.8630 -0.5480
[torch.FloatTensor of size 1x2]



In [5]:
label2idx = {"SPANISH": 0, "ENGLISH": 1}

for sent, label in test_data:
    bow_vec = make_bow_vec(sent, word2idx)
    log_probs = model(autograd.Variable(bow_vec))
    print(log_probs)

# print the W's column responding to "creo"
print(next(model.parameters())[:, word2idx["creo"]])

loss_fun = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

## trainning
for epoch in range(100):
    for sent, label in data:
        # step1. clear grad before each sentence
        model.zero_grad()
        
        # step2. make bow vecter and target vecter
        bow_vec = autograd.Variable(make_bow_vec(sent, word2idx))
        target = autograd.Variable(make_target(label, label2idx))
        
        # step3. forward and compute the loss
        log_probs = model(bow_vec)
        loss = loss_fun(log_probs, target)
        
        # step4. backward and update parameters
        loss.backward()
        optimizer.step()

## testing
for sent, label in test_data:
    bow_vec = autograd.Variable(make_bow_vec(sent, word2idx))
    log_probs = model(bow_vec)
    print(log_probs)
    
print(next(model.parameters())[:, word2idx["creo"]])

Variable containing:
-1.2611 -0.3332
[torch.FloatTensor of size 1x2]

Variable containing:
-1.1140 -0.3978
[torch.FloatTensor of size 1x2]

Variable containing:
-0.1599
-0.1411
[torch.FloatTensor of size 2]

Variable containing:
-0.2209 -1.6186
[torch.FloatTensor of size 1x2]

Variable containing:
-3.0979 -0.0462
[torch.FloatTensor of size 1x2]

Variable containing:
 0.3315
-0.6325
[torch.FloatTensor of size 2]

