## 如何学好pytorch
 - 学好深度学习的基础知识
 - 学好PyTorch官方tutorial
 - 学习Github以及各种博客上的教程（别人创建好的list)
 - 阅读documentation,使用论坛https://discuss.pytorch.org/
 - 跑通以及学习开源PyTorch项目
 - 阅读深度学习模型paper,学习别人的模型实现
 - 通过阅读paper,自己实现模型
 - 自己创造模型（也可一些paper)

In [1]:
import torch 
import time
import numpy as np

In [2]:
# 验证时间
# print(torch.cuda.is_available())  # False 因为没有。。。。
# 

a = torch.randn(10000,1000)
b = torch.randn(1000,2000)

t0 = time.time()
c = torch.matmul(a,b)
t1 = time.time()
print(a.device,t1 - t0, c.norm(2))

# device = torch.device('cuda')
# a = a.to(device)
# b = b.to(device)
# t2 = time.time()
# print(a.device,t2 - t0, c.norm(2))

cpu 0.3857862949371338 tensor(140395.5156)


In [3]:
# 一个简单公式的求导运算
from torch import autograd

x = torch.tensor(1.)
a = torch.tensor(1., requires_grad=True)
b = torch.tensor(2., requires_grad=True)
c = torch.tensor(3., requires_grad=True)

# 公式
y = a**2 * x + b*x + c
print('before: ',a.grad, b.grad, c.grad)
grads = autograd.grad(y, [a,b,c])
print('after :', grads[0], grads[1], grads[2])


before:  None None None
after : tensor(2.) tensor(1.) tensor(1.)


# 安装

1. Anaconda
2. CUDA
        测试：mvcc -V
3. PyTorch安装
        官网： http://pytorch.org
4. pycharm

MNIST 

        echo number owns 7000 images
        train/test 60k/10k

In [4]:
a=torch.randn(2,3) # 随机正态分布
a

tensor([[ 0.6103, -0.1381,  0.5713],
        [ 0.9627,  1.7847,  0.3872]])

In [8]:
print(a.shape)
print(a.size(0))
print(a.shape[1])

torch.Size([2, 3])
2
3


In [17]:
a=torch.rand(1,2,3) # 随机均匀分布
print(a)
print(a[0])

print('a.shape: ' + str(a.shape))
print('a.shape[0]: ' + str(a.shape[0]))
print('a.size(0): ' + str(a.size(0)))
print(list(a.shape))
print(a.numel())   # 1X2X3
print(a.dim())

tensor([[[0.6048, 0.1525, 0.4328],
         [0.2932, 0.4741, 0.8417]]])
tensor([[0.6048, 0.1525, 0.4328],
        [0.2932, 0.4741, 0.8417]])
a.shape: torch.Size([1, 2, 3])
a.shape[0]: 1
a.size(0): 1
[1, 2, 3]
6
3


## 创建tensor

In [30]:
# 从numpy
import numpy as np

a=np.array([2,3.3])
print(torch.from_numpy(a))

print('\n' + '-'*30 + '\n')
    
# torch.tensor & torch.FloatTensor 
# tensor()  传入的参数是 data列表
# FloatTensor()  大写的Tensor传入的是 shape (d1,d2) 

print(torch.tensor([2., 3.2]))
print(torch.FloatTensor(2,3))

print('\n' + '-'*30 + '\n')

# 未初始化数据 uninitialized
# 要用自己的数据覆盖掉 否则会出现问题 ，所初始化的数据并不是0 而是 nan，inf
print(torch.empty(1))
print(torch.Tensor(2,3))
print(torch.IntTensor(2,3))
print(torch.FloatTensor(2,3))

tensor([2.0000, 3.3000], dtype=torch.float64)

------------------------------

tensor([2.0000, 3.2000])
tensor([[4.6359e-26, 3.0889e-41, 2.7201e+23],
        [2.1625e+00, 0.0000e+00, 0.0000e+00]])

------------------------------

tensor([4.4278e-26])
tensor([[4.4539e-26, 3.0889e-41, 2.7201e+23],
        [2.1625e+00, 0.0000e+00, 0.0000e+00]])
tensor([[ 358237424,      22043, -454521616],
        [     32744,          0,          0]], dtype=torch.int32)
tensor([[4.6359e-26, 3.0889e-41, 2.7201e+23],
        [2.1625e+00, 0.0000e+00, 0.0000e+00]])


### set default type

```python
torch.tensor([1.2,3]).type #　‘torch.FloatTensor’
torch.set_default_tensor_type(torch.DoubleTensor)
```

### rand  rand_like randint 

In [36]:
# rand  rand_like randint 

# rand [0,1] 均匀分布
print(torch.rand(3,3))
a = torch.rand(3,3)

print('\n' + '-'*30 + '\n')

# rand_like
print(torch.rand_like(a))

print('\n' + '-'*30 + '\n')

# randint(min,max,[shape]) 不包含ｍａｘ值
torch.randint(1,10,[3,3])

tensor([[0.6990, 0.6400, 0.3875],
        [0.2574, 0.2591, 0.1052],
        [0.7650, 0.9496, 0.2416]])

------------------------------

tensor([[0.7429, 0.5448, 0.5964],
        [0.2916, 0.4956, 0.0968],
        [0.7257, 0.4174, 0.3393]])

------------------------------



tensor([[8, 2, 3],
        [6, 1, 3],
        [8, 8, 7]])

### full randn arange range

In [40]:
# torch.full([shape], num)
print(torch.full([2,3],7))

print(torch.full([],7)) # 标量
　
print(torch.full([1],7))　　# 向量

tensor([[7., 7., 7.],
        [7., 7., 7.]])
tensor(7.)
tensor([7.])


In [41]:
# randn

# N(0,1)
print(torch.randn(3,3))

torch.normal(mean=torch.full([10],0), std=torch.arange(1,0,-0.1))

tensor([[-0.4870, -0.0583, -0.8997],
        [-1.0850,  1.5969, -1.7848],
        [ 0.3626,  1.8062,  0.7521]])


tensor([ 0.5630, -0.0539, -0.3411, -1.4008, -0.5734,  0.1770,  0.1998, -0.5150,
         0.1741,  0.0118])

In [44]:
# range arange
print(torch.arange(0,10)) # [0,10)
print(torch.arange(0,10,2))

# range　不支持使用

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
tensor([0, 2, 4, 6, 8])


### linspace  logspace

In [45]:
# linspace(min,max,steps) 从min-max 分成steps份　，包括　min max

torch.linspace(0,10,steps=4)

tensor([ 0.0000,  3.3333,  6.6667, 10.0000])

In [48]:
# 10^min - 10^max steps份

torch.logspace(0,-1,steps=4)

tensor([1.0000, 0.4642, 0.2154, 0.1000])

### ones zeros eye对角

In [49]:
# ones() ones_like()
print(torch.ones(3,3))

# zeros()
print(torch.zeros(3,3))

# eye()
print(torch.eye(3,4))

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
tensor([[1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.]])


### randperm

In [52]:
# 与　random.shuffle 相似
# randperm(10)  生成是个随机索引，不包含１０
torch.randperm(10)

a=torch.rand(2,3)
print(a)
idx = torch.randperm(2)
print(idx)
print(a[idx])

tensor([[0.2334, 0.6470, 0.3559],
        [0.7644, 0.7731, 0.5610]])
tensor([1, 0])
tensor([[0.7644, 0.7731, 0.5610],
        [0.2334, 0.6470, 0.3559]])


### 索引和切片

In [10]:
# 基于　：　的切片方式　　无

# index_select(dim, tensor)  

a = torch.rand([4,3,28,28])
print("a.shape: " + str(a.shape))

# b = torch.tensor([0,2])
# a.index_select(0, b).shape 　传入的ｂ要为一个tensor

a.index_select(2,torch.arange(28)).shape
# torch.Size([4, 3, 28, 28])

a.index_select(2,torch.arange(8)).shape
# torch.Size([4, 3, 8, 28])

a.shape: torch.Size([4, 3, 28, 28])


torch.Size([4, 3, 8, 28])

In [16]:
# ... 

a[...].shape
# torch.Size([4, 3, 28, 28])

a[0,...].shape
# torch.Size([3, 28, 28])

a[:,1,...].shape
# torch.Size([4, 28, 28])

a[...,:2].shape
# torch.Size([4, 3, 28, 2])

a[0,...,::2].shape
# torch.Size([3, 28, 14])

torch.Size([3, 28, 14])

In [23]:
# masked_select(x, mask)   x,tensor    mask掩码


x = torch.randn(3,4)

mask = x.ge(0.5)
# tensor([[False, False,  True, False],
#         [False,  True,  True, False],
#         [False, False, False, False]])

torch.masked_select(x, mask)
# tensor([0.7614, 1.0010, 0.6398, 1.0508, 1.0553])

torch.masked_select(x, mask).shape

torch.Size([6])

In [24]:
src = torch.tensor([[4,3,5],
                   [6,7,8]])
# take将　src(2,3) -> (1,6)
torch.take(src, torch.tensor([0,2]))

tensor([4, 5])

### 维度变换
      
      View/reshape
      Squeeze/unsqueeze　挤压
      Transpose/t/permute
      Expand/repeat　　broadcasting/memory copied

#### view reshape

In [29]:
# view / reshape 都是相同的
# view是在早期版本的提供的ａｐｉ


# logic bug
a=torch.rand(4,1,28,28)
a.view(4,784)

tensor([[0.4620, 0.8063, 0.2376,  ..., 0.8099, 0.2015, 0.0292],
        [0.5138, 0.7195, 0.0693,  ..., 0.5622, 0.9657, 0.1176],
        [0.4308, 0.5260, 0.1161,  ..., 0.4229, 0.2340, 0.3305],
        [0.9029, 0.3899, 0.9379,  ..., 0.4319, 0.2347, 0.8156]])

#### squeeze unsqueeze

In [12]:
# squeeze挤压　减少维度 unsqueeze增加维度

a = torch.rand([4,1,28,28])

# 正数之前插入

a.unsqueeze(0).shape
# torch.Size([1, 4, 1, 28, 28])

a.unsqueeze(2).shape
# torch.Size([4, 1, 1, 28, 28])

a.unsqueeze(4).shape
# torch.Size([4, 1, 28, 28, 1])

# 负数之后插入

a.unsqueeze(-1).shape # -1　代表最后一个元素位置

a.unsqueeze(-5).shape
# torch.Size([1, 4, 1, 28, 28])


# example
b = torch.rand(32)
f = torch.rand(4,32,14,14)
b = b.unsqueeze(1).unsqueeze(2).unsqueeze(0)
b.shape

torch.Size([1, 32, 1, 1])

In [15]:
# squeeze
# 压缩　去掉维数为１的维度
# squeeze(a) 将ａ中所有为１的维度删除
# squeeze(a,N) ａ中去掉指定的维数为一的维度

b = torch.rand([1,32,1,1])

b.squeeze().shape
# torch.Size([32])

b.squeeze(0).shape
# torch.Size([32, 1, 1])

b.squeeze(-1).shape
# torch.Size([1, 32, 1])

torch.Size([1, 32, 1])

#### repeat expend

In [6]:

# repeat重复 
b = torch.rand([1,32,1,1])  # 传入的是重复的次数
b.shape
# torch.Size([1, 32, 1, 1])

b.repeat(4,32,1,1).shape
# torch.Size([4, 1024, 1, 1])

b.repeat(4,1,1,1).shape
# torch.Size([4, 32, 1, 1])


# expand 
# 返回tensor的新视图　不需要分配新内存　推荐

a = torch.rand(4,32,14,14)

# b torch.Size([1, 32, 1, 1])

b.expand(4,32,14,14).shape
# torch.Size([4, 32, 14, 14])

b.expand(-1,32,-1,-1).shape # -1表示　默认
# torch.Size([1, 32, 1, 1])


torch.Size([1, 32, 1, 1])

#### t transpose permute

In [25]:
# t矩阵转置　　只适用　2D tensor。

a=torch.rand(3,4)
a.t().shape
# torch.Size([4, 3])

#  transpose 

a=torch.rand(4,3,32,32)   #B　C H W

# a1 = a.transpose(1,3).view(4,3*32*32).view(4,3,32,32)
# a1.shape 4, 32, 32, 3  B W H C

# contiguous()  重新申请内存，让数据连续
a1 = a.transpose(1,3).contiguous().view(4,3*32*32).view(4,3,32,32)

a2 = a.transpose(1,3).contiguous().view(4,3*32*32).view(4,32,32,3).transpose(1,3)

a1.shape # [4, 3, 32, 32]
a2.shape # [4, 3, 32, 32]

torch.all(torch.eq(a,a1)) # tensor(False)

torch.all(torch.eq(a,a2)) # tensor(True)

tensor(True)

In [31]:
a = torch.rand(4,3,28,28)

a.transpose(1,3).shape # 4, 28, 28, 3

b = torch.rand(4,3,28,32)
b.transpose(1,3).shape  # 4, 32, 28, 3
b.transpose(1,3).transpose(1,2).shape  # 4, 28, 32, 3

b.permute(0,2,3,1).shape # 4, 28, 32, 3

torch.Size([4, 28, 32, 3])

### broadcast 自动扩展

expand    without copying data

        Insert 1 dim ahead
        
        expand dims with size 1 to same size
        
               
for actual demanding 

memory consumption


**match from last dim**
        
        if current dim =1  expand to same
        if either has no dim.insert one dim and expand to same
        otherwise, NOT broadcasting-able

### 拼接

       cat 
       stack

In [37]:
# cat

a = torch.rand(4,32,8)
b = torch.rand(5,32,8)

torch.cat([a,b],dim=0).shape # torch.Size([9, 32, 8])


# stack create new dim

a = torch.rand(32,8)
b = torch.rand(32,8)
torch.stack([a,b],dim=0).shape # torch.Size([2, 32, 8])

torch.Size([2, 32, 8])

### 拆分

    split
    chunk

In [52]:
# split ,by len
# 长度相同，给定长度
# 长度不同，给定一个list
c = torch.rand(3,32,8)
aa,bb,cc = c.split(1,dim=0)
a1,b1 = c.split([2,1],dim=0)

# chunk ,by num
zz,xx = c.chunk(2,dim=0)

### 基本运算

torch.add(a,b) +

torch.sub(a,b) -

torch.mul(a,b) *

torch.div(a,b) /

torch.matmul()  矩阵相乘  torch.mm() @

a.power() 次方

a.sqrt() 平方根

a.rsqrt() 平方根的倒数

torch.log(a) 默认e为底

Approximation：
        
        floor() ceil() trunc() frac()
        round() 近似
        
clamp : gradient clipping
        
        grad.clamp(）
        
        

### 统计属性

norm

mean sum min max prod

argmax()  argmin 返回索引

dim  keepdim

topk kthvalue

eq返回tensor equal返回True or False 

### advanced operation

torch.where(condition,x,y) -> tensor

    out = x_i if condition_i else y_i


torch.gather(input,dim,index.out=None) -> Tensor 采集查表操作