In [1]:
import torch
import torch.autograd
from torch.autograd import Variable

In [2]:
learning_rate = 1e-6
N = 64
D_in = 1000
H = 100
D_out = 10

In [4]:
# produce random input data, but we dont need gradients on them
x = Variable(torch.randn(N, D_in), requires_grad = False)

In [5]:
# the data, as a tensor are accessible via .data (here retrieving only 
# first ten entries of second column)
x.data[:10:,1]

tensor([ 0.1561,  0.4000, -0.8615, -0.4835, -0.3076,  0.3016, -0.9939,  0.4709,
         0.4287,  0.5394])

In [6]:
## variables have all the convenience methods of tensors
## https://pytorch.org/docs/stable/generated/torch.clamp.html
x.clamp(min=0.)[:10:,1]

tensor([0.1561, 0.4000, 0.0000, 0.0000, 0.0000, 0.3016, 0.0000, 0.4709, 0.4287,
        0.5394])

In [7]:
# produce random labels, and no gradients needed on them, either
y = Variable(torch.randn(N, D_out), requires_grad = False)

In [8]:
# the weights however, should remember their gradients
w1 = Variable (torch.randn (D_in, H), requires_grad = True)
w2 = Variable (torch.randn (H, D_out), requires_grad = True)

In [9]:
# the gradient is also a Variable, and accessible via .grad
#(but only after backprop)
print (w1.grad)

None


In [10]:
##https://pytorch.org/docs/stable/generated/torch.mm.html?highlight=mm#torch.mm
y_pred = x.mm(w1).clamp(min=0.).mm(w2)
loss = (y_pred - y).pow(2).sum()

In [11]:
# all variables track their provenance
loss.backward()

In [12]:
# now we have a gradient
w1.grad[:10:,1]

tensor([-12386.5020,  -2801.8186,  -1365.5881,  10750.7256,  -3033.4141,
        -10002.8525,  15974.6494,  -7102.8984,  -2417.5420,   5025.6353])

In [16]:
## this class implements a ReLU function, the backward pass implements
# the partial derivative (1 for x > 0., 0 for x < 0.). 
class ReLU(torch.autograd.Function):
    @staticmethod
    def forward(self, x):
        self.save_for_backward(x)
        return x.clamp(min=0.)
    
    @staticmethod
    def backward(self, grad_y):
        x, = self.saved_tensors
        grad_input = grad_y.clone()
        grad_input[x<0.]=0.
        return grad_input

In [17]:
f = ReLU()

In [20]:
# "calling" the object with 'apply' invokes the .forward member function
f.apply(torch.autograd.Variable(torch.Tensor([3.,-3])))

tensor([3., 0.])

In [21]:
# create a random input tensor
x = torch.randn(N, D_in)

In [22]:
# print it
x

tensor([[ 0.4367, -1.5982, -0.2146,  ...,  0.3118, -0.2496, -0.7362],
        [-1.4542, -0.5148,  0.9165,  ...,  0.0098,  1.6181, -0.9094],
        [-0.2675, -0.2521,  0.1953,  ...,  0.7910,  1.2410, -0.0043],
        ...,
        [-1.2094,  1.1401,  0.7764,  ...,  1.4210, -0.9036,  1.1800],
        [-1.4131,  0.2246, -0.4104,  ..., -0.9007, -1.2825,  0.5247],
        [-0.7374, -0.0160, -0.3361,  ...,  1.4680,  1.3157, -0.8704]])

In [23]:
# the shape of your tensor
x.shape

torch.Size([64, 1000])

In [24]:
# the type of your tensor
x.type()

'torch.FloatTensor'

In [25]:
# on what device does your tensor live?
x.device

device(type='cpu')

In [26]:
# send the object to the GPU / CPU
x = x.to("cpu")

In [27]:
# reinterpret the second order tensor as one large first order tensor
x.view(64000)

tensor([ 0.4367, -1.5982, -0.2146,  ...,  1.4680,  1.3157, -0.8704])

In [28]:
# create a random input tensor
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
w1 = torch.randn(D_in, H)
w2 = torch.randn(H, D_out)

In [29]:
# perform forward step in a neural network manually
h = x.mm(w1)

In [30]:
h.shape

torch.Size([64, 100])

In [31]:
# implement a "RELU" activation function manually
h_relu = h.clamp(min=0.)

In [32]:
# second linear layer
y_pred = h_relu.mm(w2)

In [33]:
# compute the "loss" of the network
loss = (y_pred - y).pow(2).sum()

In [34]:
loss

tensor(24643262.)

In [35]:
# manually compute the gradient of the loss
grad_y_pred = 2 * (y_pred - y)
grad_w2 = h_relu.t().mm(grad_y_pred)
grad_h_relu = grad_y_pred.mm(w2.t())
grad_h = grad_h_relu.clone()
grad_h[h<0] = 0.
grad_w1 = x.t().mm(grad_h)
learning_rate = 1e-6

In [36]:
# perform a learning step
w1 -= learning_rate * grad_w1
w2 += learning_rate * grad_w2

In [37]:
# create a 2nd order tensor, fill with zeroes
torch.FloatTensor(3,3).zero_()

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

In [40]:
# create a tensor from a list
t = torch.Tensor([[2.],[1.]])
t

tensor([[2.],
        [1.]])

In [42]:
# squeeze tensor, i.e turn a tensor with shape 
# of Ax1 into a tensor with shape A.
ts = t.squeeze()
ts

tensor([2., 1.])

In [43]:
# dot product (special case of matrix multiplication)
ts.dot(ts)

tensor(5.)

In [44]:
# transpose tensor
t.t()

tensor([[2., 1.]])

In [45]:
## concatenate tensors, but show only first 5 elements
torch.cat([t]*5)[:5]

tensor([[2.],
        [1.],
        [2.],
        [1.],
        [2.]])