In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchtext
from torchtext.vocab import Vectors, GloVe

In [2]:
TEXT = torchtext.data.Field()
LABEL = torchtext.data.Field(sequential=False)
train, val, test = torchtext.datasets.SST.splits(
    TEXT, LABEL,
    filter_pred=lambda ex: ex.label != 'neutral')

TEXT.build_vocab(train)
LABEL.build_vocab(train)
n_vocab = len(TEXT.vocab)

url = 'https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.simple.vec'
TEXT.vocab.load_vectors(vectors=Vectors('wiki.simple.vec', url=url))
n_comps = TEXT.vocab.vectors.size(1)

BATCH_SIZE = 50
train_iter, val_iter, test_iter = torchtext.data.BucketIterator.splits(
    (train, val, test), batch_size=BATCH_SIZE, device=-1, repeat=False)

In [90]:
class ConvNetClassifier(nn.Module):
    
    def __init__(self, vecs, dropout_rate=0.5):
        super(ConvNetClassifier, self).__init__()
        self.vecs = vecs
        self.conv3 = nn.Conv2d(in_channels=1, out_channels=100, kernel_size=(3, n_comps))
        self.conv4 = nn.Conv2d(in_channels=1, out_channels=100, kernel_size=(4, n_comps))
        self.conv5 = nn.Conv2d(in_channels=1, out_channels=100, kernel_size=(5, n_comps))
        self.dropout_rate = dropout_rate
        self.dropout = nn.Dropout2d(p=dropout_rate)
        self.linear = nn.Linear(300, 1)
    
    def forward(self, text, training=False):
        while text.size(0) < 5:
            text = torch.cat([text, torch.ones((1, text.size(1))).long()], 0)
        sent_length, batch_size = text.size()
        X = self.vecs[text.data.view(-1,)].view(sent_length, batch_size, n_comps)
        X = X.permute(1, 0, 2)
        X = X.data.unsqueeze_(1)
        X = Variable(X)
        
        # Extract and pool convolutional features
        X3 = F.relu(self.conv3(X))
        X3 = F.max_pool2d(X3, (X3.size(2), 1))
        X4 = F.relu(self.conv4(X))
        X4 = F.max_pool2d(X4, (X4.size(2), 1))
        X5 = F.relu(self.conv5(X))
        X5 = F.max_pool2d(X5, (X5.size(2), 1))
        
        # Dropout for regularization
        if training:
            X3 = self.dropout(X3)
            X4 = self.dropout(X4)
            X5 = self.dropout(X5) 
        
        # Final layer
        X = torch.cat([X3, X4, X5], 1).squeeze()
        probs = F.sigmoid(self.linear(X))
        return torch.cat([probs, 1-probs], 1)

In [91]:
vecs = Variable(TEXT.vocab.vectors, requires_grad=True)
cn = ConvNetClassifier(vecs)
loss_function = nn.NLLLoss()
optimizer = optim.Adam(cn.parameters(), lr=0.001)
optimizer2 = optim.Adam([cn.vecs], lr=0.0001)
#optimizer = optim.SGD(cn.parameters(), lr=0.03, weight_decay=0.01)
#optimizer = optim.Adadelta(cn.parameters(), lr=0.1)
#max_vec_size = 5

for i in range(100):
    train_iter.init_epoch()
    for batch in train_iter:
        cn.zero_grad()
        probs = cn(batch.text, training=True)
        log_probs = torch.log(probs)
        y = batch.label - 1
        loss = loss_function(log_probs, y)
        loss.backward()
        optimizer.step()
#         optimizer2.step()
        
        # Regularization
#         for w in cn.parameters():
#             w_2norm = w.data.norm(2)
#             if w_2norm > max_vec_size:
#                 w.data = max_vec_size / w_2norm * w.data
    print('Iteration #{}: {}'.format(i, loss.data.numpy()[0]))
#cn.linear.weight.data *= 0.5

Iteration #0: 0.5692576766014099
Iteration #1: 0.5276330709457397
Iteration #2: 0.4444142282009125
Iteration #3: 0.4883195161819458
Iteration #4: 0.3524026572704315
Iteration #5: 0.3241563141345978
Iteration #6: 0.42746302485466003
Iteration #7: 0.19237355887889862
Iteration #8: 0.2106371819972992
Iteration #9: 0.08412794768810272
Iteration #10: 0.060294173657894135
Iteration #11: 0.21543025970458984
Iteration #12: 0.09555546939373016


KeyboardInterrupt: 

In [42]:
cn.linear.weight.data *= 0.5

In [101]:
lengths = []
train_iter.init_epoch()
for batch in train_iter:
    lengths.append(batch.text.size(0))

In [105]:
np.max(lengths)

52

In [18]:
def evaluate(model, data_iter):
    data_iter.init_epoch()
    N = len(data_iter.data())
    n_correct = 0
    data_iter.init_epoch()
    for batch in data_iter:
        probs = model(batch.text)
        _, y_predicted = probs.max(1)
        y_true = batch.label - 1
        n_correct += (y_true == y_predicted).sum().float()
    return (n_correct / N).data.numpy()[0]

In [92]:
evaluate(cn, train_iter)

0.99407512

In [95]:
evaluate(cn, test_iter)

0.78583199

In [229]:
cn.linear.weight.data.norm(2)

2.999999962180317

In [239]:
test_linear.weight.data = test_linear.weight.data * 0.5

In [248]:
cn.linear.weight.data.norm(2)

1.4999999810901585

In [241]:
test_linear.weight

Parameter containing:

Columns 0 to 9 
 0.0919  0.0995 -0.0591 -0.0713 -0.0962 -0.0985 -0.0943 -0.0954  0.0753 -0.0675

Columns 10 to 19 
-0.0731 -0.0407 -0.0846  0.0974 -0.0862 -0.0913  0.0820 -0.0800  0.0970  0.0920

Columns 20 to 29 
 0.0867  0.0855 -0.0874  0.1046  0.0522  0.1021 -0.0905 -0.0797  0.0556 -0.0818

Columns 30 to 39 
-0.0948 -0.0248  0.0984  0.0363 -0.0818 -0.0781 -0.0681  0.0765 -0.0971  0.0563

Columns 40 to 49 
-0.0753 -0.1091 -0.0936 -0.0554 -0.0742 -0.0950 -0.1096  0.0906 -0.0903 -0.0933

Columns 50 to 59 
-0.0866 -0.0796 -0.0287 -0.0763 -0.1002 -0.0852 -0.0730 -0.0770 -0.1007  0.0883

Columns 60 to 69 
-0.0883 -0.1000 -0.0548 -0.0860  0.0809 -0.0816  0.0910 -0.0962  0.0917  0.0973

Columns 70 to 79 
-0.0786  0.1067  0.0942  0.0807 -0.0860 -0.0528  0.0952  0.0870  0.0752 -0.0895

Columns 80 to 89 
-0.1118  0.0762  0.0893 -0.0818  0.0850 -0.1047 -0.1015  0.0950  0.0759 -0.0389

Columns 90 to 99 
-0.0879 -0.0937  0.0425 -0.0923 -0.0821 -0.0937  0.0584 -0.0809  0.072

In [125]:
f = cn.linear.weight * 0.5

In [142]:
max_vec_size = 3
w_2norm = cn.linear.weight.data.norm(2)
cn.linear.weight.data = max_vec_size / w_2norm * cn.linear.weight.data

In [129]:
cn

TypeError: cannot assign 'torch.autograd.variable.Variable' as parameter 'weight' (torch.nn.Parameter or None expected)

In [123]:
g = f.weight * 0.5

In [97]:
test_iter.init_epoch()
N = len(test_iter.data())
n_correct = 0
test_iter.init_epoch()
for batch in test_iter:
    probs = cn(batch.text)
    _, y_predicted = probs.max(1)
    y_true = batch.label - 1
    if (y_true == y_predicted).sum() != 50:
        raise ValueError()
    n_correct += (y_true == y_predicted).sum().float()

RuntimeError: bool value of Variable objects containing non-empty torch.ByteTensor is ambiguous

In [115]:
probs[:, 0][~(y_true == y_predicted).data]

Variable containing:
 0.3969
 0.4813
 0.5928
 0.4208
 0.4645
 0.2857
[torch.FloatTensor of size 6]