Deep learning consists of composing linearities with non-linearities in clever ways. The introduction of non-linearities allows for powerful models. In this section, we will play with these core components, make up an objective function, and see how the model is trained.

In [1]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x7fecb3649eb0>

In [2]:
lin = nn.Linear(5, 3) # maps from R^5 to R^3
lin

Linear(in_features=5, out_features=3, bias=True)

In [4]:
data = autograd.Variable(torch.randn(2, 5))
print(lin(data))

tensor([[ 0.1755, -0.3268, -0.5069],
        [-0.6602,  0.2260,  0.1089]], grad_fn=<ThAddmmBackward>)


In [5]:
# In pytorch, most non-linearities are in torch.functional (we have it imported as F)
# Note that non-linearites typically don't have parameters like affine maps do.
# That is, they don't have weights that are updated during training.
data = autograd.Variable(torch.randn(2, 2))
print(data)
print(F.relu(data))

tensor([[-0.5404, -2.2102],
        [ 2.1130, -0.0040]])
tensor([[0.0000, 0.0000],
        [2.1130, 0.0000]])


In [8]:
# Softmax is also in torch.nn.functional
data = autograd.Variable(torch.randn(5))
print(data)
print(F.softmax(data))
print(F.softmax(data).sum())  # Sums to 1 because it is a distribution!
print(F.log_softmax(data))  # theres also log_softmax

tensor([ 2.2820, -1.2080,  1.1120,  2.2174, -0.4269])
tensor([0.4264, 0.0130, 0.1324, 0.3998, 0.0284])
tensor(1.)
tensor([-0.8523, -4.3423, -2.0222, -0.9168, -3.5611])


  after removing the cwd from sys.path.
  """
  


In [9]:
# Example: Logistic Regression bag of Words Classifier. 

data = [("me gusta comer en la cafeteria".split(), "SPANISH"), 
        ("Give it to me".split(), "ENGLISH"), 
        ("No creo que sea una buena idea".split(), "SPANISH"),
        ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]

testData = [("Yo creo que si".split(), "SPANISH"),
            ("it is lost on me".split(), "ENGLISH")]

# mapping each word in the vocab to a unique integer which will be
# its index into the bag of words vector
wordToIx = {}

for sent, _ in data + testData:
    for word in sent: 
        if word not in wordToIx:
            wordToIx[word] = len(wordToIx)
            
print(wordToIx)

VOCAB_SIZE = len(wordToIx)
NUM_LABELS = 2


{'me': 0, 'gusta': 1, 'comer': 2, 'en': 3, 'la': 4, 'cafeteria': 5, 'Give': 6, 'it': 7, 'to': 8, 'No': 9, 'creo': 10, 'que': 11, 'sea': 12, 'una': 13, 'buena': 14, 'idea': 15, 'is': 16, 'not': 17, 'a': 18, 'good': 19, 'get': 20, 'lost': 21, 'at': 22, 'Yo': 23, 'si': 24, 'on': 25}


In [11]:
class BoWClassifier(nn.Module): # inheriting from nn.Module
    
    def __init__(self, numLabels, vocabSize):
        # Calls the init function of nn.Module
        super(BoWClassifier, self).__init__()

        # Define the parameters that you will need.  In this case, we need A and b,
        # the parameters of the affine mapping.
        # nn.Linear() provides the affine map.
        self.linear = nn.Linear(vocabSize, numLabels)
        
    def forward(self, bowVector):
        # Pass the input through the linear layer, then pass
        # that through log_softmax
        return F.log_softmax(self.linear(bowVector))
    
    
    
def makeBowVector(sentence, wordToIx):
    

In [13]:
model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)


# BoWClassifier (our module) will store knowledge of the nn.Linear's
# parameters (? when you assign a component to a class variable in the
# __init__ function of our module?)
for param in model.parameters():
    print(param) # parameters are: numlabels, and vocabsize
    

Parameter containing:
tensor([[ 0.1692,  0.1300,  0.1222,  0.1394,  0.1240,  0.0507, -0.1341, -0.1647,
         -0.0899, -0.0228, -0.1202,  0.0717,  0.0607, -0.0444,  0.0754,  0.0634,
          0.1197,  0.1321, -0.0664,  0.1916, -0.0227, -0.0067, -0.1851, -0.1262,
         -0.1146, -0.0839],
        [ 0.1394, -0.0641, -0.1466,  0.0755,  0.0628,  0.1270, -0.1015,  0.0425,
         -0.0714, -0.0441, -0.1563, -0.0894, -0.0601,  0.0839,  0.0358,  0.0484,
          0.1957,  0.1911,  0.1338,  0.0062, -0.1357,  0.1533, -0.0490, -0.0159,
         -0.1690, -0.0387]], requires_grad=True)
Parameter containing:
tensor([-0.1265,  0.1802], requires_grad=True)


In [14]:
# To run the model, pass in a BoW vector but wrapped in an
# autograd.Variable
sample = data[0]

bowVector = make_bow_vector(sample[0], wordToIx)
logProbabilities = model(autograd.Variable(bowVector))
print("bowVector: ", bowVector)
print("logprobs: ", logProbabilities)

NameError: name 'make_bow_vector' is not defined