In [1]:
import torch
import numpy
from torch.autograd import Variable

In [2]:
# 创建空的variable,发现类型是tensor
a = Variable()
print(a)
print(type(a))

tensor([])
<class 'torch.Tensor'>


In [3]:
# 从tensor给variable赋值
b_ = torch.tensor([[1,2],[3,4]])
b = Variable(b_)
print(b)

tensor([[1, 2],
        [3, 4]])


In [4]:
# data为variable中tensor的值,grad为variable中tensor的梯度值,grad_fn表示此variale是通过什么计算得到的，用于记录反向传播
print(b.data)
print(b.grad)
print(b.grad_fn)

tensor([[1, 2],
        [3, 4]])
None
None


In [5]:
# 创建3个变量,x,w,b,值分别为1,10,5 ， 其中w和b的requires_grad = True意思是需要求梯度，默认为False
x = Variable(torch.Tensor([1]))
print(x)
w = Variable(torch.Tensor([10]),requires_grad = True)
print(w)
b = Variable(torch.Tensor([5]),requires_grad = True)
print(b)

tensor([1.])
tensor([10.], requires_grad=True)
tensor([5.], requires_grad=True)


In [16]:
w.grad.data.zero_()
b.grad.data.zero_() # 因为变量的梯度不会自动清除，而是每次累加，因此在运行之前清空w和b的梯度
z = w * x       # z变量由w×x的计算自动创建，由于w的requires_grad = True，z也会自动设置为true
y = 3 * z + 2 * b   # y变量由3×z+2×b的计算自动创建，同理y的requires_grad为true
z.register_hook(print)  # 由于z为中间变量,autograd机制中中间变量的grad不会被保存，z的梯度需要使用hook来输出
y.backward()    # 更新梯度
print("y's grad:{}".format(y.grad))
print("z's grad:{}".format(z.grad))
print("b's grad:{}".format(b.grad))
print("w's grad:{}".format(w.grad))
print("x's grad:{}".format(x.grad))

tensor([3.])
y's grad:None
z's grad:None
b's grad:tensor([2.])
w's grad:tensor([3.])
x's grad:None


In [7]:
# 可以看到y和z分别是由add（加）方法和mul（乘）方法得到
print(y.grad_fn)
print(z.grad_fn)

<AddBackward0 object at 0x7f15702bc278>
<MulBackward0 object at 0x7f14e0e705c0>


In [19]:
w.grad.data.zero_()
b.grad.data.zero_()
z = w * x       
y = 3 * z + 2 * b   
z.register_hook(print)  
y.backward(Variable(torch.tensor([2.],requires_grad = True)))  # 这里向backward传入一个值为2的tensor，可以看到w,b,z的梯度都扩大了2倍,
                                                               # 这里传入的tensor可以看做是上一个操作传给y的梯度
print("y's grad: {}".format(y.grad))
print("z's grad: {}".format(z.grad))
print("b's grad: {}".format(b.grad))
print("w's grad: {}".format(w.grad))
print("x's grad: {}".format(x.grad))

tensor([6.])
y's grad: None
z's grad: None
b's grad: tensor([4.])
w's grad: tensor([6.])
x's grad: None


In [23]:
# 对矩阵进行autograd,c是3*1的tensor，y2是对应点乘以2的同形tensor
c = torch.randn(3)
c = Variable(c,requires_grad = True)
print("c: {}".format(c))
y2 = c * 2
print(y2)
y2.backward(torch.FloatTensor([1, 0.1, 0.01]))
print(c.grad)

c: tensor([-1.0005, -1.3656,  1.0616], requires_grad=True)
tensor([-2.0009, -2.7313,  2.1232], grad_fn=<MulBackward0>)
tensor([2.0000, 0.2000, 0.0200])


In [33]:
# 对矩阵进行autograd,d是2*3的tensor，f是3*2的tensor，y3是矩阵相乘得到的2*2的tensor
d = Variable(torch.randn(2,3),requires_grad = True)
print("d: {}".format(d))
f = Variable(torch.randn(3,2),requires_grad = True)
print("f: {}".format(f))
y3 = d.mm(f)
print("y3: {}".format(y3))
y3.backward(torch.FloatTensor([[1,1],[1,1]])) # 传入的tensor必须和y同形，其实就是y3的梯度由后向前传递
print("d's grad: {}".format(d.grad))
print("f's grad: {}".format(f.grad))

d: tensor([[-0.6421,  0.7586,  0.9024],
        [-0.2263,  1.1835,  0.1213]], requires_grad=True)
f: tensor([[-0.1451,  1.8430],
        [ 0.5539, -0.9989],
        [ 2.8473, -2.2800]], requires_grad=True)
y3: tensor([[ 3.0827, -3.9986],
        [ 1.0339, -1.8760]], grad_fn=<MmBackward>)
d's grad: tensor([[ 1.6979, -0.4450,  0.5673],
        [ 1.6979, -0.4450,  0.5673]])
f's grad: tensor([[-0.8684, -0.8684],
        [ 1.9421,  1.9421],
        [ 1.0237,  1.0237]])


In [34]:
# 将variable送入gpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
g = f.to(device)
print(g)

tensor([[-0.1451,  1.8430],
        [ 0.5539, -0.9989],
        [ 2.8473, -2.2800]], device='cuda:0', grad_fn=<CopyBackwards>)
