In [1]:
import torch

In [3]:
x = torch.empty(5, 3) 
x

tensor([[ 0.0000e+00, -2.0000e+00, -1.2186e-01],
        [-4.6577e-10,  1.1210e-44,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00]])

In [4]:
x = torch.rand(5, 3) # 随机一个初始化矩阵
x

tensor([[0.2478, 0.0824, 0.1978],
        [0.4305, 0.6214, 0.7132],
        [0.2340, 0.5285, 0.0092],
        [0.2450, 0.1403, 0.8357],
        [0.5795, 0.2510, 0.5374]])

In [6]:
x = torch.zeros(5, 3, dtype=torch.long) # torch.long int64
x

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])

In [15]:
x = torch.tensor([5.5, 3])
x

tensor([5.5000, 3.0000])

In [16]:
x = x.new_ones(5, 3, dtype=torch.double)
x

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)

In [17]:
x = torch.randn_like(x, dtype=torch.float64)
x

tensor([[-1.2030,  0.6490, -2.1166],
        [-0.0439,  1.2690, -0.8937],
        [-0.7852, -0.4742, -0.1192],
        [-1.1624,  1.0911,  0.3486],
        [-0.8253,  0.6827,  0.7319]], dtype=torch.float64)

In [18]:
x.size()

torch.Size([5, 3])

In [19]:
y = torch.rand(5, 3)
x + y

tensor([[-0.6449,  0.7664, -1.9098],
        [ 0.1971,  1.9429, -0.3246],
        [ 0.0136,  0.2684,  0.0570],
        [-0.4141,  2.0222,  0.8237],
        [-0.0473,  1.5313,  1.3927]], dtype=torch.float64)

In [20]:
torch.add(x, y)

tensor([[-0.6449,  0.7664, -1.9098],
        [ 0.1971,  1.9429, -0.3246],
        [ 0.0136,  0.2684,  0.0570],
        [-0.4141,  2.0222,  0.8237],
        [-0.0473,  1.5313,  1.3927]], dtype=torch.float64)

In [22]:
result = torch.empty(5,3)
torch.add(x, y, out=result)

tensor([[-0.6449,  0.7664, -1.9098],
        [ 0.1971,  1.9429, -0.3246],
        [ 0.0136,  0.2684,  0.0570],
        [-0.4141,  2.0222,  0.8237],
        [-0.0473,  1.5313,  1.3927]])

In [23]:
y.add_(x)

tensor([[-0.6449,  0.7664, -1.9098],
        [ 0.1971,  1.9429, -0.3246],
        [ 0.0136,  0.2684,  0.0570],
        [-0.4141,  2.0222,  0.8237],
        [-0.0473,  1.5313,  1.3927]])

In [26]:
y[:, 1]

tensor([0.7664, 1.9429, 0.2684, 2.0222, 1.5313])

In [27]:
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8)
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


In [36]:
x = torch.randn(1)
print(x.item())
print(y.data.numpy())

-0.48633334040641785
[ 1.632343    1.4551424   0.46008462 -0.13353217  0.25555608  0.39920273
 -0.16383615 -0.3670207  -0.8976133   0.4620882   2.3067892   0.5209202
 -0.51228076  1.901018   -2.1824098   0.45676297]


In [38]:
a = torch.ones(5)
a

tensor([1., 1., 1., 1., 1.])

In [39]:
b = a.numpy()
b

array([1., 1., 1., 1., 1.], dtype=float32)

In [40]:
a.add_(1)
print(a)
print(b)

tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]


In [41]:
import numpy as np

In [42]:
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


In [43]:
if torch.cuda.is_available():
    device = torch.device('cuda')
    y = torch.ones_like(x, device=device)
    x = x.to(device)
    z = x + y
    print(z)
    print(z.to('cpu', torch.double))

In [44]:
torch.cuda.is_available()

False

### numpy 两层神经网络

In [49]:
import numpy as np

N, D_in, H, D_out = 64, 1000, 100, 10

x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6

for t in range(500):
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)
    
    loss = np.square(y_pred - y).sum()
    if t % 100 == 99:
        print(t, loss)
    
    # backprop
    
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)
    
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

99 415.27842548826584
199 1.2939693810198238
299 0.006505725096035654
399 4.568204340976558e-05
499 4.210609435117912e-07


### Pytorch:Tensors

In [51]:
import torch

dtype = torch.float
device = torch.device('cpu')

N, D_in, H, D_out = 64, 1000, 100, 10

x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

w1 = torch.randn(D_in, H)
w2 = torch.randn(H, D_out)

learning_rate = 1e-6

for t in range(500):
    h = x.mm(w1)
    h_relu = h.clamp(min=0)
    y_pred = h_relu.mm(w2)
    
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)
    
    # backprop
    
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.t().mm(grad_y_pred)
    grad_h_relu = grad_y_pred.mm(w2.t())
    grad_h = grad_h_relu.clone()
    grad_h[h < 0] = 0
    grad_w1 = x.T.mm(grad_h)
    
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2



99 533.8038940429688
199 2.9597513675689697
299 0.02357589267194271
399 0.0004286114126443863
499 6.083580228732899e-05


### Pytorch: Tensor and autograd

In [53]:
import torch

dtype = torch.float
device = torch.device('cpu')

N, D_in, H, D_out = 64, 1000, 100, 10

x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

# requires_grad 记录梯度
w1 = torch.randn(D_in, H, requires_grad=True)
w2 = torch.randn(H, D_out, requires_grad=True)

learning_rate = 1e-6

for t in range(500):
    y_pred = x.mm(w1).clamp(min=0).mm(w2)
    
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())
    
    # backprop
    loss.backward()
    
    # 节约内存 不计算梯度
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad
        
        # 梯度归零
        w1.grad.zero_()
        w2.grad.zero_()
        



99 818.8666381835938
199 5.86128044128418
299 0.05427195876836777
399 0.0008007416035979986
499 7.849973917473108e-05


### Pytorch:nn

In [62]:
import torch

N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out)
)

loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-3

for t in range(500):
    y_pred = model(x)
    
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())
        
    model.zero_grad()
    
    loss.backward()
    
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

99 4.05460518493328e-08
199 3.5683730276181436e-12
299 1.5318963545590814e-12
399 1.2887943750400366e-12
499 1.7085662312038563e-12


### Pytorch:optim

In [71]:
import torch

N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out)
)

loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-3

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for t in range(500):
    y_pred = model(x)
    
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())
    
    optimizer.zero_grad()
    
    loss.backward()
    
    optimizer.step()
    

99 0.005109058693051338
199 1.4961180738737312e-07
299 1.5158239247137217e-11
399 9.082659971348939e-12
499 9.398692761564131e-12


### PyTorch: 自定义 nn Modules

In [73]:
import torch

class TwoLayerNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        super(TwoLayerNet, self).__init__()
        self.linear1 = torch.nn.Linear(D_in, H)
        self.linear2 = torch.nn.Linear(H, D_out)
        
    def forward(self, x):
        h_relu = self.linear1(x).clamp(min=0)
        y_pred = self.linear2(h_relu)
        return y_pred
    
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

model = TwoLayerNet(D_in, H, D_out)

criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

for t in range(500):
    y_pred = model(x)
    
    loss = criterion(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())
        
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

99 1.414453691950257e-07
199 1.3299141518943736e-12
299 1.8025613553879216e-12
399 1.2347064560006005e-12
499 1.5774735033849407e-12
