In [9]:
from __future__ import print_function
import torch

In [2]:
x = torch.empty(3,2)
x

tensor([[5.8270e-10, 5.9669e-07],
        [8.2990e-33, 1.3563e-19],
        [1.3563e-19, 1.3563e-19]])

In [98]:
x = torch.tensor([[2,3],[1,0]])
y = torch.tensor([[2,1],[4,6]])
print(x)

z = x.mm(y)
print(y)
print(z)



tensor([[2, 3],
        [1, 0]])
tensor([[2, 1],
        [4, 6]])
tensor([[16, 20],
        [ 2,  1]])


In [99]:
x = torch.zeros(2, 3, dtype=torch.int32)
print(x)

tensor([[0, 0, 0],
        [0, 0, 0]], dtype=torch.int32)


In [100]:
y = torch.tensor([[2,3],[4,6], [3,1]])
y

tensor([[2, 3],
        [4, 6],
        [3, 1]])

In [101]:
x = torch.randn_like(y, dtype=torch.float)    # override dtype!
print(x) 

tensor([[ 0.1565, -1.0627],
        [-0.8772, -1.2116],
        [-1.9453,  1.4866]])


In [102]:
print(torch.add(x, y))

tensor([[2.1565, 1.9373],
        [3.1228, 4.7884],
        [1.0547, 2.4866]])


In [103]:
x = torch.randn(4, 4)
y = x.view(2,8)
z = x.view(-1, 1)  # the size -1 is inferred from other dimensions
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([2, 8]) torch.Size([16, 1])


In [10]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")          # a CUDA device object
    y = torch.ones_like(x, device=device)  # directly create a tensor on GPU
    x = x.to(device)                       # or just use strings ``.to("cuda")``
    z = x + y
    print(z)
    print(z.to("cpu", torch.double))

In [11]:
dtype = torch.float
# device = torch.device("cpu")
device = torch.device("cuda:0") # Uncomment this to run on GPU

In [None]:
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random input and output data
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)

# Randomly initialize weights
w1 = torch.randn(D_in, H, device=device, dtype=dtype)
w2 = torch.randn(H, D_out, device=device, dtype=dtype)

learning_rate = 1e-6

In [None]:
# method 1
for t in range(500):
    # Forward pass: compute predicted y
    h = x.mm(w1)
    h_relu = h.clamp(min=0)
    y_pred = h_relu.mm(w2)

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.t().mm(grad_y_pred)
    grad_h_relu = grad_y_pred.mm(w2.t())
    grad_h = grad_h_relu.clone()
    grad_h[h < 0] = 0
    grad_w1 = x.t().mm(grad_h)

    # Update weights using gradient descent
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

In [None]:
#method 2: Autograd
for t in range(500):
    # Forward pass
    y_pred = x.mm(w1).clamp(min=0).mm(w2)

    # Compute loss using Tensors. Loss is a Tensor of shape (1,). loss.item() gets the scalar value held in the loss.
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())
    loss.backward()
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad

        # Manually zero the gradients after updating weights
        w1.grad.zero_()
        w2.grad.zero_()