### Basic steps of setting up a pytorch model
1. __Data:__ preprocessing is outside the scope of this tutorial, the input is expect to be some torch tensor type
2. __Network:__ input parameters, target parameters
3. __Loss function__
4. __Optimiser__
5. __Training function__
6. __Prediction function:__ use the trained network to make predictions on unseen input
7. __Train the network__ using the given loss function and optimiser
8. __Plot__ the losses, accuracy, etc..


##### 1 - Data
Working with torch tensors

In [12]:
import torch

# every seed produces the same random array
torch.manual_seed(0)
tensor_1 = torch.rand(2,2)

tensor_2 = torch.rand(2,2)

torch.manual_seed(0)
tensor_3 = torch.rand(2,2)

print('Random tensor as seed 0:', tensor_1)
print('Different random tensor without seed 0:',tensor_2)
print('Same tensor as the first one, because we use seed 0:',tensor_3)


Random tensor as seed 0: 
 0.5488  0.5928
 0.7152  0.8443
[torch.FloatTensor of size 2x2]

Different random tensor without seed 0: 
 0.6028  0.8579
 0.5449  0.8473
[torch.FloatTensor of size 2x2]

Same tensor as the first one, because we use seed 0: 
 0.5488  0.5928
 0.7152  0.8443
[torch.FloatTensor of size 2x2]



In [26]:
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(0)
    tensor_4 = torch.rand(2,2).cuda()

if torch.cuda.is_available():
    torch.cuda.manual_seed_all(0)
    tensor_5 = torch.rand(2,2).cuda()
    
print('Random tensor as seed 0:', tensor_4)
print('Random tensor as seed 0:', tensor_5)

Random tensor as seed 0: 
 0.0202  0.3682
 0.8326  0.9572
[torch.cuda.FloatTensor of size 2x2 (GPU 0)]

Random tensor as seed 0: 
 0.7782  0.1404
 0.8700  0.8701
[torch.cuda.FloatTensor of size 2x2 (GPU 0)]



In [34]:
# Tensors on CPU vs GPU

if torch.cuda.is_available():
    tensor_1 = tensor_1.cuda()  # convert to gpu tensor with .cuda()

print(type(tensor_1))

tensor_1 = tensor_1.cpu()  # back to CPU with .cpu()
print(type(tensor_1))

<class 'torch.cuda.FloatTensor'>
<class 'torch.FloatTensor'>


#### (re)size
###### inplace is faster due to memory efficiency

In [82]:
%timeit tensor_4 - tensor_5

4.94 µs ± 30.2 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [83]:
%timeit tensor_4.sub(tensor_5)

4.61 µs ± 15.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [84]:
%timeit tensor_4.sub_(tensor_5)

3.29 µs ± 5.21 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


###### subtract

In [45]:
print(tensor_1.size()) 
print(type(tensor_4)) # size works the same on cuda tensors
print(tensor_4.size())

torch.Size([2, 2])
<class 'torch.cuda.FloatTensor'>
torch.Size([2, 2])


In [54]:
print(tensor_4.add(tensor_1.cuda())) # to add two tensor, they have to be the same type
print(tensor_1.add(torch.rand(2,2).float())) # both GPU/CPU and float/long/double


 0.5690  0.9611
 1.5478  1.8014
[torch.cuda.FloatTensor of size 2x2 (GPU 0)]


 1.0050  0.8094
 1.2836  0.9795
[torch.FloatTensor of size 2x2]



In [64]:
print(tensor_4.sub(tensor_5))
print(tensor_4)                 # sub subtracts tensor_4 with tensor_5 == tensor_4 - tensor_5
print(tensor_4.sub_(tensor_5))
print(tensor_4)                 # sub_ replaces tensor_4 with tensor_5


-5.9757 -1.2071
-5.9727 -5.9777
[torch.cuda.FloatTensor of size 2x2 (GPU 0)]


-5.1975 -1.0667
-5.1026 -5.1076
[torch.cuda.FloatTensor of size 2x2 (GPU 0)]


-5.9757 -1.2071
-5.9727 -5.9777
[torch.cuda.FloatTensor of size 2x2 (GPU 0)]


-5.9757 -1.2071
-5.9727 -5.9777
[torch.cuda.FloatTensor of size 2x2 (GPU 0)]



###### multiply elementwise

In [86]:
print('tensor_4:',tensor_4)
print('tensor_5:',tensor_5)
# mul multiplies tensor_4 with tensor_5 == tensor_4 - tensor_5
print('Multiply elementwise:',tensor_4.mul(tensor_5))    
print('Multiply elementwise:',tensor_4 * tensor_5)
    # mul_ without using additional memory 
print('Multiply inplace:',tensor_4.mul_(tensor_5))    
    

tensor_4: 
1.00000e+06 *
 -1.4362 -0.0468
 -1.8477 -1.8479
[torch.cuda.FloatTensor of size 2x2 (GPU 0)]

tensor_5: 
 0.7782  0.1404
 0.8700  0.8701
[torch.cuda.FloatTensor of size 2x2 (GPU 0)]

Multiply elementwise: 
1.00000e+06 *
 -1.1176 -0.0066
 -1.6075 -1.6078
[torch.cuda.FloatTensor of size 2x2 (GPU 0)]

Multiply elementwise: 
1.00000e+06 *
 -1.1176 -0.0066
 -1.6075 -1.6078
[torch.cuda.FloatTensor of size 2x2 (GPU 0)]

Multiply inplace: 
1.00000e+06 *
 -1.1176 -0.0066
 -1.6075 -1.6078
[torch.cuda.FloatTensor of size 2x2 (GPU 0)]



#### 2 - Network

In [1]:
#parameters
vocab_size = len(vocabulary)

class Net(nn.Module):
    def __init__(self, vocab_size, max_len, embedding_size, num_channels, output_size, dropout):
        super(Net, self).__init__()
        
        # initialise a Network object containing e.g. the following layers  
            # an embedding layer 
        self.embedding = nn.Embedding(vocab_size, embedding_size)
            # a Linear hidden layer 
        self.hidden = nn.Linear(input_size, output_size)
            # a convolutional layer taking input with 1 channel
        self.conv = nn.Conv2d(in_channels=1, out_channels=num_channels, kernel_size=(3,embedding_size))
            # an LSTM taking word embeddings as inputs, outputting hidden states
        self.lstm = nn.LSTM(embedding_dimension, hidden_dimension)
            # a quasi-recurrent NN input: 100, output: 200, dropout
        self.qrnn = QRNN(100, 200, dropout=.5) 
        
    
    def init_hidden(self):
        # Initialise hidden states for an lstm
        # The axes semantics are (num_layers, minibatch_size, hidden_dim)
        return (autograd.Variable(torch.zeros(1, 1, self.hidden_dim)),
                autograd.Variable(torch.zeros(1, 1, self.hidden_dim)))
    
    
    def forward(self, x):
        # determine how the input is passed through the layers
        # in example for input x, where x is a sentence        
            # convert the vector of words to a vector of word embeddings
        word_embeddings = self.embedding(x)
            # run the word_embedding vector through an lstm
        lstm_out, self.hidden = self.lstm(word_embeddings.view(len(x), 1, -1), self.hidden)
            # activation function for the convolutional layer: relu
            # the squeeze removes 1 argument / decrease the arity, 
            #      e.g. [batch_size,num_channels, Height, Width]
            #      becomes [batch_size,num_channels, Height]
        x_conv = F.relu(self.conv(x)).squeeze(3)
            # 
        
        

NameError: name 'vocabulary' is not defined