# 图像卷积

互相关运算

In [1]:
import sys
sys.path.append('..')

In [2]:
from d2l import mindspore as d2l
from mindspore import nn, ops, value_and_grad

def corr2d(X, K):  
    """计算二维互相关运算。"""
    h, w = K.shape
    Y = ops.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = (X[i:i + h, j:j + w] * K).sum()
    return Y

验证上述二维互相关运算的输出

In [3]:
X = d2l.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
K = d2l.tensor([[0.0, 1.0], [2.0, 3.0]])
corr2d(X, K)



Tensor(shape=[2, 2], dtype=Float32, value=
[[ 1.90000000e+01,  2.50000000e+01],
 [ 3.70000000e+01,  4.30000000e+01]])

实现二维卷积层

In [4]:
class Conv2D(nn.Cell):
    def __init__(self, kernel_size):
        super().__init__()
        self.weight = Parameter(d2l.normal((kernel_size), 0, 1))
        self.bias = Parameter(ops.zeros(1))

    def construct(self, x):
        return corr2d(x, self.weight) + self.bias

卷积层的一个简单应用：
检测图像中不同颜色的边缘

In [5]:
X = d2l.ones((6, 8))
X[:, 2:6] = 0
X



Tensor(shape=[6, 8], dtype=Float32, value=
[[ 1.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00,  1.00000000e+00,  1.00000000e+00],
 [ 1.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00,  1.00000000e+00,  1.00000000e+00],
 [ 1.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00,  1.00000000e+00,  1.00000000e+00],
 [ 1.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00,  1.00000000e+00,  1.00000000e+00],
 [ 1.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00,  1.00000000e+00,  1.00000000e+00],
 [ 1.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00,  1.00000000e+00,  1.00000000e+00]])

In [6]:
K = d2l.tensor([[1.0, -1.0]])

输出`Y`中的1代表从白色到黑色的边缘，-1代表从黑色到白色的边缘

In [7]:
Y = corr2d(X, K)
Y



Tensor(shape=[6, 7], dtype=Float32, value=
[[ 0.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00, -1.00000000e+00,  0.00000000e+00],
 [ 0.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00, -1.00000000e+00,  0.00000000e+00],
 [ 0.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00, -1.00000000e+00,  0.00000000e+00],
 [ 0.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00, -1.00000000e+00,  0.00000000e+00],
 [ 0.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00, -1.00000000e+00,  0.00000000e+00],
 [ 0.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00, -1.00000000e+00,  0.00000000e+00]])

卷积核`K`只可以检测垂直边缘

In [8]:
corr2d(d2l.transpose(X), K)



Tensor(shape=[8, 5], dtype=Float32, value=
[[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
 [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
 [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
 ...
 [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
 [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
 [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00]])

学习由`X`生成`Y`的卷积核

In [9]:
conv2d = nn.Conv2d(1,1, kernel_size=(1, 2), has_bias=False, pad_mode='valid')

X = X.reshape((1, 1, 6, 8))
Y = Y.reshape((1, 1, 6, 7))
lr = 3e-2

loss_fn = nn.MSELoss()
optim = nn.SGD(conv2d.trainable_params(), lr)

# 定义前向传播函数
def forward_fn(x, y):
    z = conv2d(x)
    loss = loss_fn(z, y).mean()
    return loss

# 获取梯度函数
grad_fn = value_and_grad(forward_fn, None, weights=conv2d.trainable_params())

# 定义模型单步训练
def train(X, Y, optim):
    loss, grads = grad_fn(X, Y)
    loss = ops.depend(loss, optim(grads))
    return loss
    
for i in range(10):
    loss = train(X, Y, optim)
    if (i + 1) % 2 == 0:
        print(f'batch {i+1}, loss {loss.asnumpy():.3f}')

batch 2, loss 0.272
batch 4, loss 0.263
batch 6, loss 0.254
batch 8, loss 0.245
batch 10, loss 0.237


所学的卷积核的权重张量

In [10]:
d2l.reshape(conv2d.weight, (1, 2))

Tensor(shape=[1, 2], dtype=Float32, value=
[[ 1.00198783e-01, -9.42235067e-02]])