In [1]:
import torch

#### Tensor 是一个类似于Numpy的ndarray，唯一的区别Tensor可以在GPU上进行加速训练

In [2]:
x = torch.ones(2,2,requires_grad = True)
print(x)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)


#### 构造一个未初始化的5*3 的矩阵：


In [3]:
torch.empty(5,3)

tensor([[2.5788e-09, 4.1304e-08, 1.6617e+22],
        [5.3367e-08, 1.6613e-07, 2.1008e+20],
        [1.7013e-04, 1.0570e+21, 1.3224e+22],
        [5.4667e+22, 1.3672e-05, 8.1726e+20],
        [4.3678e-05, 2.6435e-06, 0.0000e+00]])

#### 因为没有被初始化 所以构建的都是一些随机数

#### 构建一个随机初始化矩阵

In [4]:
x = torch.rand(5,3) #构建的矩阵都在0-1之间
x

tensor([[0.5206, 0.1800, 0.2630],
        [0.7142, 0.4689, 0.7405],
        [0.9030, 0.4611, 0.8944],
        [0.5284, 0.5886, 0.4217],
        [0.3416, 0.1060, 0.1795]])

#### 构建一个全部为0，类型为long的矩阵 

In [8]:
x = torch.zeros(5,3,dtype = torch.long)
x

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])

In [9]:
x.dtype

torch.int64

#### 从数据直接构建tensor


In [10]:
x = torch.tensor([5.5,3])
x 

tensor([5.5000, 3.0000])

#### 也可以从已有的tensor中构建一个tensor。这些方法会重用原来的这些特征，例如，数据类型，除非是提供新的数据

In [14]:
x = x.new_ones(5,3)
x.dtype #重用了x原来的数据类型

torch.float32

In [15]:
x = torch.randn_like(x,dtype=torch.float) #产生了跟上一个形状相同的tensor
x 

tensor([[-0.5507, -0.8117,  0.7284],
        [-1.6397,  0.6817,  1.3135],
        [-1.1479,  1.4565, -0.1731],
        [ 1.1901,  2.0387, -0.7863],
        [-0.3298, -0.6135,  0.7787]])

#### 得到tensor的形状

In [17]:
x.shape

torch.Size([5, 3])

## 基本的运算

In [21]:
y = torch.rand(5,3)
y

tensor([[0.6233, 0.8947, 0.1430],
        [0.6617, 0.4435, 0.0874],
        [0.1702, 0.6965, 0.4901],
        [0.3424, 0.2567, 0.6223],
        [0.8773, 0.0601, 0.8111]])

In [22]:
x

tensor([[-0.5507, -0.8117,  0.7284],
        [-1.6397,  0.6817,  1.3135],
        [-1.1479,  1.4565, -0.1731],
        [ 1.1901,  2.0387, -0.7863],
        [-0.3298, -0.6135,  0.7787]])

In [23]:
x + y

tensor([[ 0.0726,  0.0831,  0.8714],
        [-0.9780,  1.1253,  1.4008],
        [-0.9777,  2.1530,  0.3170],
        [ 1.5325,  2.2954, -0.1640],
        [ 0.5475, -0.5535,  1.5898]])

In [24]:
torch.add(x,y)

tensor([[ 0.0726,  0.0831,  0.8714],
        [-0.9780,  1.1253,  1.4008],
        [-0.9777,  2.1530,  0.3170],
        [ 1.5325,  2.2954, -0.1640],
        [ 0.5475, -0.5535,  1.5898]])

In [26]:
result = torch.empty(5,3)
torch.add(x,y,out=result)

tensor([[ 0.0726,  0.0831,  0.8714],
        [-0.9780,  1.1253,  1.4008],
        [-0.9777,  2.1530,  0.3170],
        [ 1.5325,  2.2954, -0.1640],
        [ 0.5475, -0.5535,  1.5898]])

#### in-place 加法

In [27]:
y.add_(x)
y  #y 产生了变化

tensor([[ 0.0726,  0.0831,  0.8714],
        [-0.9780,  1.1253,  1.4008],
        [-0.9777,  2.1530,  0.3170],
        [ 1.5325,  2.2954, -0.1640],
        [ 0.5475, -0.5535,  1.5898]])

#### 各种类似numpy的indexing都可以在pytorch tensor上使用

In [28]:
x[:,1:]

tensor([[-0.8117,  0.7284],
        [ 0.6817,  1.3135],
        [ 1.4565, -0.1731],
        [ 2.0387, -0.7863],
        [-0.6135,  0.7787]])

#### resizing: 如果希望resize/reshape一个tensor,可以使用torch.view:

In [31]:
x = torch.rand(4,4)
y = x.view(16)
y  #y 变成一行16个的数组
z = x.view(8,2)
z   # z 变成8*2  矩阵

tensor([[0.4983, 0.3611],
        [0.6749, 0.1142],
        [0.6845, 0.9593],
        [0.9854, 0.3119],
        [0.5491, 0.5969],
        [0.2018, 0.5009],
        [0.2706, 0.6939],
        [0.9848, 0.2650]])

## numpy和torch的转换

#### 在Torch Tensor 和 Numpy array 之间相互转换非常容易
把Torch tensor转换 numpy array

In [33]:
a = torch.ones(5)
a

tensor([1., 1., 1., 1., 1.])

In [35]:
b = a.numpy()
b

array([1., 1., 1., 1., 1.], dtype=float32)

#### 改变numpy array里面的值

In [36]:
b[1] = 2
a 

tensor([1., 2., 1., 1., 1.])

#### 把numpy array 转变成 Torch tensor 

In [38]:
import numpy as np

In [40]:
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a,1,out=a)
a

array([2., 2., 2., 2., 2.])

In [41]:
b

tensor([2., 2., 2., 2., 2.], dtype=torch.float64)

#### cuda tensor
使用.to方法，Tensor可以转移到别的设备上去

In [44]:
torch.cuda.is_available()
    

False

## 热身，用numpy实现两层神经网络

#### 一个全连接的神经网络，一个隐藏层，没有bias,用来从x预测y,使用L2-loss
 - $h = w_1x + b_1$
 - $a = max(0,h)$
 - $y_{hat} = w_2a + b_2 $

这一实现完全使用numpy来计算前向神经网络，loss和反向传播。

 - forward pass
 - loss
 - backward pass

numpy ndarray是一个普通的n维array。它不知道关于深度学习或者梯度的知识，只是用来计算数学运算的数据结构

In [49]:
N,D_in,H,D_out = 64,1000,100,10  #64维的数据 1000层的输入 100是中间层 10是输出层

# 随机创建一些训练数据
x = np.random.randn(N,D_in)
y = np.random.randn(N,D_out)

w1 = np.random.randn(D_in,H)
w2 = np.random.randn(H,D_out)

learning_rate = 1e-6
for t in range(500):
    # forward pass
    h = x.dot(w1) # N*H
    h_relu = np.maximum(h,0) # N*H
    y_pred = h_relu.dot(w2) # N * D_out
    
    # compute loss 
    loss = np.square(y_pred - y).sum()
    print(t,loss)
    
    # backword pass 
    # computer the gradient
    grad_y_pred = 2.0*(y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h<0] = 0
    grad_w1 = x.T.dot(grad_h)
    
    #update weight of w1 and w2
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 36504339.138681434
1 35949663.97787457
2 38082329.89546281
3 35639241.24988638
4 26892047.607315607
5 15775396.030430816
6 7934941.234597689
7 3923012.319416119
8 2189426.7861144496
9 1430016.7063980596
10 1057365.9468906461
11 840297.3536702027
12 693424.1894314955
13 583928.9864209956
14 497650.6036430961
15 427433.6170698551
16 369361.8549814124
17 320841.28703885013
18 280017.0916126657
19 245447.47922297043
20 215949.40225287378
21 190657.31891412148
22 168864.00714111282
23 150010.7958113124
24 133612.1151428158
25 119296.79881233518
26 106793.20440647652
27 95814.6848345185
28 86151.53426018386
29 77620.25277674818
30 70061.08462130092
31 63353.23466509193
32 57383.403719194146
33 52060.980765674416
34 47307.147960038405
35 43050.93709731488
36 39232.70943887669
37 35802.2282229252
38 32715.289819326375
39 29933.22823767988
40 27421.602389779247
41 25150.076839476493
42 23093.569894191496
43 21228.36038354155
44 19533.605515188377
45 17992.418078416464
46 16591.335434518678
47

402 0.00022007423765389557
403 0.00021024017641365305
404 0.00020084601094368237
405 0.0001918773394575719
406 0.00018330640089571468
407 0.00017512440580106175
408 0.0001673072574711471
409 0.0001598418909147052
410 0.0001527088101854413
411 0.00014589655294161425
412 0.00013938752562670705
413 0.00013317093964512728
414 0.00012723386034244185
415 0.00012156460718295122
416 0.00011614663250122728
417 0.00011097167532028333
418 0.00010602732073449191
419 0.00010130518473431265
420 9.679259540148539e-05
421 9.24862811251911e-05
422 8.836871447764766e-05
423 8.443683063991418e-05
424 8.06804763123714e-05
425 7.709101245881192e-05
426 7.36607814295043e-05
427 7.038472718944876e-05
428 6.725511056135858e-05
429 6.426497596804871e-05
430 6.140784741925166e-05
431 5.8679940976154904e-05
432 5.60732374482131e-05
433 5.358197052252656e-05
434 5.1201733814445826e-05
435 4.8927940133492e-05
436 4.675512018608551e-05
437 4.467970907308357e-05
438 4.2696746540563186e-05
439 4.080204120135986e-05
4

#### 这次我们使用pytorch 中的nn这个库来构建网络。用pytorch autograd 来构建计算图和计算gradients,然后pytorch会自动帮我们计算gradient

In [50]:
import torch.nn as nn
N,D_in,H,D_out = 64,1000,100,10  #64维的数据 1000层的输入 100是中间层 10是输出层
# 随机训练一些数据
x = torch.randn(N,D_in)
y = torch.randn(N,D_out)

model = torch.nn.Sequential( # 构建模型
    torch.nn.Linear(D_in,H), # w_1*x + b_1  线性层
    torch.nn.ReLU(), # 隐藏层
    torch.nn.Linear(H,D_out), # 一个线性层
)

loss_fn = nn.MSELoss(reduction='sum')

learning_rate = 1e-6
for it in range(500):
    #Forward pass
    y_pred = model(x) # 定义模型
    
    # compute loss
    loss = loss_fn(y_pred,y) # computation graph  寻找损失函数
    print(it,loss.item())
    
    model.zero_grad() #每次对grad进行清0 
    
    #Backward pass 
    loss.backward() #返回隐藏层 继续迭代
    
    #update weights of w1 and w2  更新权重与阈值
    with torch.no_grad():
        for param in model.parameters(): #自身进行迭代
            param -= learning_rate * param.grad

0 708.3529052734375
1 707.78369140625
2 707.2152099609375
3 706.6475830078125
4 706.0809936523438
5 705.5150146484375
6 704.9498291015625
7 704.3856201171875
8 703.8226928710938
9 703.2606811523438
10 702.6993408203125
11 702.1386108398438
12 701.5784912109375
13 701.0191650390625
14 700.4606323242188
15 699.9027099609375
16 699.3457641601562
17 698.7896728515625
18 698.2343139648438
19 697.6795043945312
20 697.1258544921875
21 696.5729370117188
22 696.0213012695312
23 695.4711303710938
24 694.921875
25 694.3734130859375
26 693.82568359375
27 693.278076171875
28 692.7316284179688
29 692.1864624023438
30 691.6425170898438
31 691.0997924804688
32 690.5579833984375
33 690.0169677734375
34 689.4769287109375
35 688.9374389648438
36 688.398681640625
37 687.860595703125
38 687.3232421875
39 686.7870483398438
40 686.2529907226562
41 685.7197265625
42 685.1868896484375
43 684.6546630859375
44 684.123046875
45 683.5923461914062
46 683.0623168945312
47 682.5328369140625
48 682.0039672851562
49 68

470 510.54425048828125
471 510.22882080078125
472 509.9132995605469
473 509.5981750488281
474 509.2832336425781
475 508.96856689453125
476 508.6540832519531
477 508.3397521972656
478 508.0257873535156
479 507.71160888671875
480 507.39776611328125
481 507.0845031738281
482 506.771240234375
483 506.45855712890625
484 506.1472473144531
485 505.8358459472656
486 505.5245666503906
487 505.2135314941406
488 504.90264892578125
489 504.5924072265625
490 504.28265380859375
491 503.9732666015625
492 503.6639404296875
493 503.354736328125
494 503.0457763671875
495 502.7371520996094
496 502.42864990234375
497 502.1207580566406
498 501.8130187988281
499 501.5054626464844
