# Official Tutorial

In [1]:
import torch

In [2]:
x = torch.empty(5, 3)
print(x)

tensor([[ 0.0000e+00, -1.0842e-19,  2.7031e+01],
        [ 2.5250e-29,  2.6822e-06,  1.3556e-19],
        [ 1.3563e-19,  7.7992e+17,  1.3563e-19],
        [ 4.5071e+16,  8.1153e+17,  1.1720e-19],
        [ 2.9503e-39,  0.0000e+00,  0.0000e+00]])


In [3]:
x = torch.rand(5, 3)
print(x)

tensor([[0.4623, 0.1123, 0.5099],
        [0.7040, 0.8428, 0.0811],
        [0.8695, 0.4567, 0.3373],
        [0.3029, 0.3624, 0.9264],
        [0.2278, 0.7987, 0.7506]])


In [4]:
x = torch.zeros(5, 3, dtype=torch.long)
print(x)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])


In [5]:
x = torch.tensor([5.5, 3])
print(x)

tensor([5.5000, 3.0000])


In [6]:
x = x.new_zeros(5, 3, dtype=torch.double)
print(x)
x = torch.rand_like(x, dtype=torch.float)
print(x)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]], dtype=torch.float64)
tensor([[0.6795, 0.5048, 0.8795],
        [0.5588, 0.9571, 0.6739],
        [0.4770, 0.7770, 0.1838],
        [0.9943, 0.8883, 0.1376],
        [0.5609, 0.7841, 0.9244]])


In [7]:
print(x.size())

torch.Size([5, 3])


In [8]:
y = torch.rand(5, 3)
print(x + y)

tensor([[1.4854, 1.3192, 1.2581],
        [0.8637, 1.5184, 1.5421],
        [1.1069, 0.8176, 0.3216],
        [1.7459, 1.4823, 0.6049],
        [0.9067, 1.1304, 1.0031]])


In [9]:
print(torch.add(x, y))

tensor([[1.4854, 1.3192, 1.2581],
        [0.8637, 1.5184, 1.5421],
        [1.1069, 0.8176, 0.3216],
        [1.7459, 1.4823, 0.6049],
        [0.9067, 1.1304, 1.0031]])


In [10]:
result = torch.empty(5, 3)
torch.add(x, y, out=result)
print(result)

tensor([[1.4854, 1.3192, 1.2581],
        [0.8637, 1.5184, 1.5421],
        [1.1069, 0.8176, 0.3216],
        [1.7459, 1.4823, 0.6049],
        [0.9067, 1.1304, 1.0031]])


In [11]:
print(y)

tensor([[0.8059, 0.8145, 0.3786],
        [0.3049, 0.5613, 0.8682],
        [0.6298, 0.0406, 0.1378],
        [0.7515, 0.5940, 0.4673],
        [0.3458, 0.3463, 0.0787]])


In [12]:
y.add_(x)
print(y)

tensor([[1.4854, 1.3192, 1.2581],
        [0.8637, 1.5184, 1.5421],
        [1.1069, 0.8176, 0.3216],
        [1.7459, 1.4823, 0.6049],
        [0.9067, 1.1304, 1.0031]])


In [13]:
x[:, 1]

tensor([0.5048, 0.9571, 0.7770, 0.8883, 0.7841])

In [14]:
x = torch.rand(4, 4)
y = x.view(16)
z = x.view(-1, 8)
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


In [15]:
x = torch.rand(1)
print(x)
print(x.item())

tensor([0.0996])
0.09964334964752197


In [16]:
a = torch.ones(5)
print(a)

tensor([1., 1., 1., 1., 1.])


In [17]:
b = a.numpy()
print(b)

[1. 1. 1. 1. 1.]


In [18]:
a.add_(1)
print(a, b)

tensor([2., 2., 2., 2., 2.]) [2. 2. 2. 2. 2.]


In [19]:
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


# [Community Tutorial](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/pytorch_basics/main.py)

In [20]:
import torch
import torchvision
import torch.nn as nn
import numpy as np
import torchvision.transforms as transforms

### Autograd

In [21]:
x = torch.tensor(1., requires_grad=True)
w = torch.tensor(2., requires_grad=True)
b = torch.tensor(3., requires_grad=True)

y = w * x + b # build computational graph

y.backward() # compute gradients

print(x.grad)
print(w.grad)
print(b.grad)

tensor(2.)
tensor(1.)
tensor(1.)


In [22]:
x = torch.randn(10, 3)
y = torch.randn(10, 2)

linear = nn.Linear(3, 2) # FC layer
print('w: ', linear.weight)
print('b: ', linear.bias)

criterion = nn.MSELoss() # choose loss function
optimizer = torch.optim.SGD(linear.parameters(), lr=0.01)

pred = linear(x) # forward pass
loss = criterion(pred, y) # loss
print('loss: ', loss.item())

loss.backward()
print('dL/dw: ', linear.weight.grad)
print('dL/db: ', linear.bias.grad)

optimizer.step()
# linear.weight.data.sub_(0.01 * linear.weight.grad.data)
# linear.bias.data.sub_(0.01 * linear.bias.grad.data)


pred = linear(x)
loss = criterion(pred, y)
print('loss after 1 step optim: ', loss.item())

w:  Parameter containing:
tensor([[-0.5238, -0.0291, -0.4118],
        [ 0.1364, -0.3697,  0.0664]], requires_grad=True)
b:  Parameter containing:
tensor([ 0.4930, -0.1225], requires_grad=True)
loss:  2.039693832397461
dL/dw:  tensor([[-0.5169, -0.6291, -0.9528],
        [-0.0033, -0.7767, -0.0712]])
dL/db:  tensor([0.6300, 0.0826])
loss after 1 step optim:  2.014127731323242


### Numpy

In [23]:
x = np.array([[1, 2], [3, 4]])
y = torch.from_numpy(x)
z = y.numpy()

print(x)
print(y)
print(z)

[[1 2]
 [3 4]]
tensor([[1, 2],
        [3, 4]])
[[1 2]
 [3 4]]


### Input pipeline

In [24]:
train_dataset = torchvision.datasets.CIFAR10(root='./data',
                                             train=True,
                                             transform=transforms.ToTensor(),
                                             download=True)
image, label = train_dataset[0]
print(image.size())
print(label)

Files already downloaded and verified
torch.Size([3, 32, 32])
6


In [25]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=64,
                                           shuffle=True)

data_iter = iter(train_loader)
images, labels = data_iter.next()
print("size of one mini-batch: ", images.size(), labels.size())

size of one mini-batch:  torch.Size([64, 3, 32, 32]) torch.Size([64])


In [26]:
for images, labels in train_loader:
    pass

### Create custom dataset pipeline

In [27]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self):
        # Initialize file path
        pass
    
    def __getitem__(self, index):
        # 1. Read one batch from file, numpy.fromfile
        # 2. Pre-process the data, using torchvision.transform
        # 3. Return a data pair
        return (1, 2)
    
    def __len__(self):
        # total size of dataset
        return 1

custom_dataset = CustomDataset()
train_loader = torch.utils.data.DataLoader(dataset=custom_dataset,
                                           batch_size=64,
                                           shuffle=True)
data_iter = iter(train_loader)
images, labels = data_iter.next()
print(images, labels)

tensor([1]) tensor([2])


### Pretrained model

In [28]:
# Download and load pretrained ResNet-18
resnet = torchvision.models.resnet18(pretrained=True)

In [29]:
# Only tune the top layer of the model
for param in resnet.parameters():
    param.requires_grad = False

# Replace the FC for finetuning.
resnet.fc = nn.Linear(resnet.fc.in_features, 100)

images = torch.randn(64, 3, 224, 224)
outputs = resnet(images)
print(outputs.size())

torch.Size([64, 100])


### Save and load models

In [30]:
# Save and load the entire model.

# torch.save(resnet, 'model.ckpt')
# model = torch.load('model.ckpt')

In [31]:
# !du -sh model.ckpt

In [32]:
# Save and load only the model params. Recommended.

# torch.save(resnet.state_dict(), 'params.ckpt')
# resnet.load_state_dict(torch.load('params.ckpt'))

In [33]:
# !du -sh params.ckpt

In [34]:
import torch

x = torch.tensor([1., 2., 3], requires_grad=True)

# With requires_grad=True, you can still do all the operations you previously
# could
y = torch.tensor([4., 5., 6], requires_grad=True)
z = x + y
print(z)

# BUT z knows something extra.
print(z.grad_fn)

tensor([5., 7., 9.], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x121d3c5c0>


In [35]:
s = z.sum()
print(s)
print(s.grad_fn)

tensor(21., grad_fn=<SumBackward0>)
<SumBackward0 object at 0x121d3c0f0>


In [36]:
s.backward()
print(x.grad)
print(y.grad)

tensor([1., 1., 1.])
tensor([1., 1., 1.])


In [37]:
x = torch.randn(2, 2)
y = torch.randn(2, 2)
print(x.requires_grad, y.requires_grad)
z = x + y
# So you can't backprop through z
print(z.grad_fn)

False False
None


In [38]:
x = x.requires_grad_()
y = y.requires_grad_()
# z contains enough information to compute gradients, as we saw above
z = x + y
print(z.grad_fn)

<AddBackward0 object at 0x121d3c1d0>


In [39]:
print(z.requires_grad)

True


In [40]:
new_z = z.detach()
print(new_z.grad_fn)

None


In [41]:
print(x.requires_grad)
print((x**2).requires_grad)

True
True


In [42]:
with torch.no_grad():
    print((x ** 2).requires_grad)

False


In [43]:
import torch.nn.functional as F
data = torch.randn(5)
print(data)
print(F.softmax(data, dim=0))
print(F.log_softmax(data, dim=0))

tensor([-0.0467,  1.4598, -0.1100,  1.4990, -2.1744])
tensor([0.0888, 0.4006, 0.0834, 0.4166, 0.0106])
tensor([-2.4213, -0.9147, -2.4846, -0.8756, -4.5490])


In [44]:
data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
        ("Give it to me".split(), "ENGLISH"),
        ("No creo que sea una buena idea".split(), "SPANISH"),
        ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]

test_data = [("Yo creo que si".split(), "SPANISH"),
             ("it is lost on me".split(), "ENGLISH")]

# word_to_ix maps each word in the vocab to a unique integer, which will be its
# index into the Bag of words vector
word_to_ix = {}
for sent, _ in data + test_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
print(word_to_ix)


VOCAB_SIZE = len(word_to_ix)
NUM_LABELS = 2

{'me': 0, 'gusta': 1, 'comer': 2, 'en': 3, 'la': 4, 'cafeteria': 5, 'Give': 6, 'it': 7, 'to': 8, 'No': 9, 'creo': 10, 'que': 11, 'sea': 12, 'una': 13, 'buena': 14, 'idea': 15, 'is': 16, 'not': 17, 'a': 18, 'good': 19, 'get': 20, 'lost': 21, 'at': 22, 'Yo': 23, 'si': 24, 'on': 25}


In [45]:
import torch.nn as nn
import torch.optim as optim

class BoWClassifier(nn.Module):  # inheriting from nn.Module!

    def __init__(self, num_labels, vocab_size):
        # calls the init function of nn.Module.  Dont get confused by syntax,
        # just always do it in an nn.Module
        super(BoWClassifier, self).__init__()

        # Define the parameters that you will need.  In this case, we need A and b,
        # the parameters of the affine mapping.
        # Torch defines nn.Linear(), which provides the affine map.
        # Make sure you understand why the input dimension is vocab_size
        # and the output is num_labels!
        self.linear = nn.Linear(vocab_size, num_labels)

        # NOTE! The non-linearity log softmax does not have parameters! So we don't need
        # to worry about that here

    def forward(self, bow_vec):
        # Pass the input through the linear layer,
        # then pass that through log_softmax.
        # Many non-linearities and other functions are in torch.nn.functional
        return F.log_softmax(self.linear(bow_vec), dim=1)

In [46]:
def make_bow_vector(sentence, word_to_ix):
    vec = torch.zeros(len(word_to_ix))
    for word in sentence:
        vec[word_to_ix[word]] += 1
    return vec.view(1, -1)

def make_target(label, label_to_ix):
    return torch.LongTensor([label_to_ix[label]])

In [47]:
model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)

In [48]:
for param in model.parameters():
    print(param)

Parameter containing:
tensor([[-0.1038,  0.1838, -0.1130, -0.1359, -0.1720, -0.1334, -0.0376,  0.1931,
         -0.1626,  0.0117,  0.0868, -0.1569, -0.1342, -0.0620, -0.0829,  0.0184,
          0.1634,  0.1766, -0.1519, -0.0599,  0.0041,  0.1608,  0.0227, -0.0122,
          0.0574,  0.0859],
        [ 0.1924, -0.1295, -0.0802,  0.0099, -0.1556,  0.0938, -0.0824, -0.1131,
         -0.1594,  0.0292, -0.0626, -0.0591,  0.1925,  0.0847,  0.0426,  0.1206,
         -0.1187, -0.0515, -0.1537, -0.1297, -0.0676,  0.0928,  0.1558, -0.1668,
         -0.0299, -0.1631]], requires_grad=True)
Parameter containing:
tensor([0.1314, 0.1217], requires_grad=True)


In [49]:
with torch.no_grad():
    sample = data[0]
    bow_vector = make_bow_vector(sample[0], word_to_ix)
    log_probs = model(bow_vector)
    print(log_probs)

tensor([[-0.9103, -0.5149]])


In [50]:
label_to_ix = {"SPANISH": 0, "ENGLISH": 1}


In [51]:
with torch.no_grad():
    for instance, label in test_data:
        bow_vec = make_bow_vector(instance, word_to_ix)
        log_probs = model(bow_vec)
        print(log_probs)

# Print the matrix column corresponding to "creo"
print(next(model.parameters()).size())

print(next(model.parameters())[:, word_to_ix["creo"]])

tensor([[-0.5530, -0.8562]])
tensor([[-0.4308, -1.0497]])
torch.Size([2, 26])
tensor([ 0.0868, -0.0626], grad_fn=<SelectBackward>)


In [52]:
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

# Usually you want to pass over the training data several times.
# 100 is much bigger than on a real data set, but real datasets have more than
# two instances.  Usually, somewhere between 5 and 30 epochs is reasonable.
for epoch in range(100):
    for instance, label in data:
        # Step 1. Remember that PyTorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Step 2. Make our BOW vector and also we must wrap the target in a
        # Tensor as an integer. For example, if the target is SPANISH, then
        # we wrap the integer 0. The loss function then knows that the 0th
        # element of the log probabilities is the log probability
        # corresponding to SPANISH
        bow_vec = make_bow_vector(instance, word_to_ix)
        target = make_target(label, label_to_ix)

        # Step 3. Run our forward pass.
        log_probs = model(bow_vec)

        # Step 4. Compute the loss, gradients, and update the parameters by
        # calling optimizer.step()
        loss = loss_function(log_probs, target)
        loss.backward()
        optimizer.step()

In [53]:
with torch.no_grad():
    for instance, label in test_data:
        bow_vec = make_bow_vector(instance, word_to_ix)
        log_probs = model(bow_vec)
        print(log_probs)

# Index corresponding to Spanish goes up, English goes down!
print(next(model.parameters())[:, word_to_ix["creo"]])

tensor([[-0.0781, -2.5889]])
tensor([[-2.2109, -0.1161]])
tensor([ 0.5741, -0.5499], grad_fn=<SelectBackward>)
