In [1]:
# !conda install pytorch torchvision -c pytorch -y
import torch
print(torch.__version__)
torch.cuda.is_available()

1.1.0


False

In [5]:
# 2-d
tensor_array = torch.Tensor([[1,2],[4,5]])
tensor_array

tensor([[1., 2.],
        [4., 5.]])

In [6]:
tensor_uninitialized = torch.Tensor(3,3)

In [7]:
# calculate the total number of elements: shape
torch.numel(tensor_uninitialized)

9

In [8]:
# initialize the tensor directly via torch.rand(allocate memory for valid values)
tensor_initialized = torch.rand(2,3)
tensor_initialized

tensor([[0.1392, 0.5731, 0.0988],
        [0.2479, 0.1450, 0.4872]])

In [9]:
# randn for initializing normal distributed values?
tensor_int = torch.randn(5,3).type(torch.IntTensor)
tensor_int

tensor([[ 0,  0, -1],
        [ 0,  1,  0],
        [ 1,  0,  0],
        [-1, -1,  0],
        [ 0,  1,  0]], dtype=torch.int32)

In [10]:
# initialize long value tensor
tensor_long = torch.LongTensor([1.0,2.0,3.0])
tensor_long

tensor([1, 2, 3])

In [11]:
# byte: unsigned integer: 0~255
tensor_byte = torch.ByteTensor([0,261,1,-5])
tensor_byte

tensor([  0,   5,   1, 251], dtype=torch.uint8)

In [12]:
tensor_ones = torch.ones(10)
tensor_ones

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [13]:
tensor_zeroes = torch.zeros(10)
tensor_zeroes

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [14]:
tensor_eye = torch.eye(3)
tensor_eye

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [15]:
non_zero = torch.nonzero(tensor_eye) # to find the index of all elements in tensor that are non zero
non_zero

tensor([[0, 0],
        [1, 1],
        [2, 2]])

In [16]:
tensor_ones_shape_eye = torch.ones_like(tensor_eye)
tensor_ones_shape_eye

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

In [17]:
tensor_ones_shape_xyz = torch.ones_like(non_zero)
tensor_ones_shape_xyz

tensor([[1, 1],
        [1, 1],
        [1, 1]])

In [18]:
# inplace(modify existing tensor) tensor operation is with _
initial_tensor = torch.rand(3,3)
initial_tensor.fill_(3)

tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]])

In [19]:
# out of place operation: give a new tensor, origin not modified
new_tensor = initial_tensor.add(4)
new_tensor

tensor([[7., 7., 7.],
        [7., 7., 7.],
        [7., 7., 7.]])

In [20]:
#inplace again
initial_tensor.add_(5)
initial_tensor

tensor([[8., 8., 8.],
        [8., 8., 8.],
        [8., 8., 8.]])

In [21]:
# convert with numpy
import numpy as np
numpy_arr = np.array([1,2,3])
numpy_arr

array([1, 2, 3])

In [22]:
tensor = torch.from_numpy(numpy_arr)
tensor

tensor([1, 2, 3], dtype=torch.int32)

In [23]:
numpy_from_tensor = tensor.numpy()
numpy_from_tensor

array([1, 2, 3])

In [24]:
numpy_arr[1] = 4
numpy_arr

array([1, 4, 3])

In [25]:
tensor

tensor([1, 4, 3], dtype=torch.int32)

In [15]:
# slice function
initial_tensor = torch.rand(2,3)
initial_tensor

tensor([[0.0094, 0.9486, 0.7098],
        [0.4454, 0.2457, 0.5283]])

In [28]:
one_element = initial_tensor[0,2]
one_element

tensor(0.5047)

In [29]:
partial_matrix = initial_tensor[:,1:]
partial_matrix

tensor([[0.5179, 0.5047],
        [0.7637, 0.7021]])

In [30]:
initial_tensor.size()

torch.Size([2, 3])

In [32]:
initial_tensor.shape

torch.Size([2, 3])

In [35]:
# torch view like numpy reshape
resized_tensor = initial_tensor.view(3,2)
resized_tensor.shape

torch.Size([3, 2])

In [36]:
resized_tensor = initial_tensor.view(-1,6)
resized_tensor.shape

torch.Size([1, 6])

In [37]:
resized_tensor

tensor([[0.7986, 0.5179, 0.5047, 0.0437, 0.7637, 0.7021]])

In [16]:
initial_tensor

tensor([[0.0094, 0.9486, 0.7098],
        [0.4454, 0.2457, 0.5283]])

In [18]:
sorted_tensor, sorted_indices = torch.sort(initial_tensor)

In [19]:
sorted_tensor

tensor([[0.0094, 0.7098, 0.9486],
        [0.2457, 0.4454, 0.5283]])

In [20]:
sorted_indices

tensor([[0, 2, 1],
        [1, 0, 2]])

In [21]:
x = torch.randn(3, 4)
sorted, indices = torch.sort(x,dim=0)
sorted

tensor([[-1.5325, -0.5147, -0.0353, -1.8224],
        [-0.9944, -0.2572,  0.4852, -0.0822],
        [ 0.1894,  1.2826,  0.9643,  0.1617]])

In [23]:
indices

tensor([[2, 0, 0, 2],
        [0, 1, 2, 1],
        [1, 2, 1, 0]])

# 热身：用numpy实现两层神经网络

$h = W_1X + b_1$
$a = max(0, h)
${y_hat} = W_2a + b_2

In [1]:
import numpy as np

N, D_in, H, D_out = 64, 1000, 100, 10

# 随机创建训练数据
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6
for it in range(500):
    # Forward pass
    h = x.dot(w1) # N * H
    h_relu = np.maximum(h, 0) # N * H
    y_pred = h_relu.dot(w2) # N * D_out
    
    # compute loss
    loss = np.square(y_pred - y).sum()
    print(it, loss)
    
    # Backward pass
    # compute the gradiant
    #     y = ax + b
    #     dy / dx = a
    #     dy / da = x
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h<0] = 0
    grad_w1 = x.T.dot(grad_h)
    
    # update weights of w1 and w2
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 26711451.265976667
1 21393256.89459321
2 18417729.024671633
3 15714239.33957896
4 12759011.19823733
5 9730331.645010393
6 7051155.374207037
7 4942573.136024397
8 3434908.022678569
9 2411698.6524730762
10 1736187.5452568007
11 1290814.5374552938
12 993240.4241178567
13 788844.9384683988
14 643843.2653339806
15 537128.3616961685
16 455904.4511038303
17 392181.96362324106
18 340899.17659566866
19 298709.2135219119
20 263416.2580423041
21 233528.07886463858
22 207994.05071831855
23 185954.15296971885
24 166838.19109721488
25 150115.69501523697
26 135419.22983223337
27 122522.43404054674
28 111097.21004149289
29 100947.95061644295
30 91904.65990683803
31 83817.6343186279
32 76571.407577541
33 70070.02830536626
34 64218.767054005366
35 58940.28617750328
36 54167.23492453193
37 49847.37943528049
38 45927.949975485986
39 42372.16797663368
40 39136.7283890004
41 36186.27731196976
42 33491.6960247533
43 31027.97340521956
44 28772.012302034913
45 26703.794768295684
46 24805.70249013509
47 23061

436 0.0016687917440608986
437 0.0016103937948510165
438 0.0015540644207315505
439 0.0014997170386311539
440 0.0014472708155018207
441 0.0013966739220545032
442 0.0013478610111159724
443 0.0013007502872133718
444 0.0012552944462782994
445 0.0012114491315727482
446 0.0011691289928890288
447 0.0011283025552919752
448 0.0010889046240254339
449 0.0010508841403587346
450 0.0010142094245556956
451 0.0009788101125698676
452 0.0009446592022519673
453 0.0009117069865898169
454 0.0008798981240069844
455 0.0008492154966952158
456 0.000819604546231861
457 0.0007910240488567588
458 0.0007634567382728881
459 0.0007368503114828951
460 0.0007111673909488548
461 0.0006863895428321836
462 0.0006624817797112176
463 0.0006394046590291453
464 0.0006171348028907205
465 0.0005956520385995858
466 0.0005749128320285316
467 0.0005548971436981323
468 0.0005355844934506449
469 0.0005169500508109225
470 0.0004989611343414821
471 0.0004816007151845353
472 0.0004648517710732606
473 0.00044868262244254233
474 0.000433

In [None]:
import torch

N, D_in, H, D_out = 64, 1000, 100, 10

# 随机创建训练数据
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

w1 = torch.randn(D_in, H)
w2 = torch.randn(H, D_out)

learning_rate = 1e-6
for it in range(500):
    # Forward pass
    h = x.mm(w1) # N * H
    h_relu = h.clamp(min=0) # N * H
    y_pred = h_relu.mm(w2) # N * D_out
    
    # compute loss
    loss = np.square(y_pred - y).sum()
    print(it, loss)
    
    # Backward pass
    # compute the gradiant
    #     y = ax + b
    #     dy / dx = a
    #     dy / da = x
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.mm(grad_y_pred)
    grad_h_relu = grad_y_pred.mm(w2.T)
    grad_h = grad_h_relu.clone()
    grad_h[h<0] = 0
    grad_w1 = x.T.dot(grad_h)
    
    # update weights of w1 and w2
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

In [3]:
import torch
x = torch.tensor(1., requires_grad=True)
w = torch.tensor(2., requires_grad=True)
b = torch.tensor(3., requires_grad=True)

y = w*x + b # y = 2*1 + 3

y.backward()

# dy / dw = x
print(w.grad)
print(x.grad)
print(b.grad)

tensor(1.)
tensor(2.)
tensor(1.)
