In [1]:
import torch
import torchvision
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import cv2

In [2]:
class conv2d(nn.Module):
    def __init__(self):
        super().__init__() 
        self.kernel = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]], np.float32)
        self.kernel = torch.as_tensor(self.kernel.reshape(1, 1, 3, 3))
    def forward(self, x):
        x = [F.conv2d(x[:, i:i + 1,:,:],self.kernel, padding=1) for i in range(3)]
        x = torch.cat(x, dim=1)
        return x

In [3]:
class edge_detect(nn.Module): 
    def __init__(self):
        super().__init__() 
        self.edge = np.array([[-1, -1, -1], [-1, 8, -1], [-1, -1, -1]], np.float32) #edge
        self.edge = torch.as_tensor(self.edge.reshape(1, 1, 3, 3))
        self.gray = np.array([0.299, 0.587, 0.114], np.float32).reshape(1, 3, 1, 1) # grayscale
        self.gray = torch.as_tensor(self.gray)
    def forward(self, x):
        gray = torch.sum(x * self.gray, dim=1, keepdim=True)   
        edge_image = F.conv2d(gray, self.edge, padding=1)
        return edge_image

In [4]:
class Padding(nn.Module):
    def __init__(self):
        super().__init__()
        self.kernel = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]], np.float32)
        self.kernel = torch.as_tensor(self.kernel.reshape(1, 1, 3, 3))
    def forward(self, x):
        x = [F.conv2d(x[:, i:i + 1,:,:],self.kernel, padding=10) for i in range(3)]  #padding
        x = torch.cat(x, dim=1)#
        return x

In [5]:
class Stride(nn.Module):
    def __init__(self):
        super().__init__() 
        self.kernel = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]], np.float32)
        self.kernel = torch.as_tensor(self.kernel.reshape(1, 1, 3, 3))
    def forward(self, x):
        x = [F.conv2d(x[:, i:i + 1,:,:],self.kernel, stride=3) for i in range(3)]  #stride
        x = torch.cat(x, dim=1)#Q1
        return x

In [6]:
class Pooling(nn.Module):
    def __init__(self):
        super().__init__() 
        self.Maxpool = nn.MaxPool2d(2,2) 
        self.Avgpool = nn.AvgPool2d(2,2) 
    def forward(self, x):
        Max_pooling =  self.Maxpool(x)
        Avg_pooling =  self.Avgpool(x)
        return Max_pooling,Avg_pooling

In [7]:
image = cv2.imread("test.jpg")
array = np.asarray(image, np.float32).transpose([2, 0, 1]) / 255.0
tensor = torch.as_tensor(np.expand_dims(array, axis=0)) 
print(image.shape)

(721, 1089, 3)


In [9]:
conv2d = conv2d() 
output = conv2d(tensor)
torchvision.utils.save_image(output[0,:,:,:], "conv2d.jpg")

In [10]:
edge = edge_detect() 
output = edge(tensor)
torchvision.utils.save_image(output[0,:,:,:], "edge.jpg")

In [11]:
pad = Padding() 
output = pad(tensor)
torchvision.utils.save_image(output[0,:,:,:], "padding.jpg")

In [12]:
stride = Stride() 
output = stride(tensor)
torchvision.utils.save_image(output[0,:,:,:], "stride.jpg")

In [13]:
pool = Pooling()
max_pool, avg_pool = pool(tensor)
torchvision.utils.save_image(max_pool[0,:,:,:], "Max_pooling.jpg")
torchvision.utils.save_image(avg_pool[0,:,:,:], "Avg_pooling.jpg")

In [14]:
def im2col(input,
           filter_height: int,
           filter_width: int,
           stride: int,
           padding: int,
          ):
    N, H, W, C = input.shape 

    # Calculate output shape
    out_height = (H + 2*padding - filter_height)//stride + 1
    out_width = (W + 2*padding - filter_width)//stride + 1

    # Padding
    input_pad = np.pad(input, 
                       [(0,0), (padding, padding), (padding, padding), (0,0)],
                       'constant')
   
    # Initialize array
    col = np.zeros((N,
                    filter_height,
                    filter_width,
                    out_height,
                    out_width,
                    C,
                   ))
    for y in range(filter_height):
        y_max = y + stride*out_height
        for x in range(filter_width):
            x_max = x + stride*out_width
            col[:, y, x, :, :, :] = input_pad[:, y:y_max:stride, x:x_max:stride, :]
    
    col = col.transpose(0, 3, 4, 1, 2, 5).reshape(N*out_height*out_width, -1)
    return col

In [15]:
class Convolution(object):
    def __init__(self, W, b, stride: int = 1, padding: int = 0):
        self.W = W # Wight
        self.b = b # Bias
        self.stride = stride
        self.padding = padding

    def forward(self, x):
        #Filter Number, Filter Height, Filter Width, Channel
        FN, FH, FW, C = self.W.shape
        N, H, W, C = x.shape
        out_height, out_width = self._shape(H, W, FH, FW)

        col = im2col(x, FH, FW, self.stride, self.padding)
        col_w = self.W.reshape(FN, -1).T
        out = np.dot(col, col_w) + self.b
    
        return out.reshape(N, out_height, out_width, -1)
    
    def _shape(self, H: int, W: int, FH: int, FW: int):
        out_height = int(1 + (H + 2*self.padding - FH) / self.stride)
        out_width = int(1 + (W + 2*self.padding - FW) / self.stride)
        return out_height, out_width

In [16]:
image = cv2.imread("test.jpg")
array = np.asarray(image, np.float32).transpose([2, 0, 1]) / 255.0
#tensor = torch.as_tensor(np.expand_dims(array, axis=0))
image = np.expand_dims(image, axis=0)
image = np.expand_dims(image[:,:,:,0], axis=-1)
print(image.shape)

(1, 721, 1089, 1)


In [17]:
kernel = np.array([[-1, -1, -1],
                   [-1, 8, -1], 
                   [-1, -1, -1],])
kernel = kernel.reshape(1, kernel.shape[0], kernel.shape[1], 1)
bias = np.zeros(1)


conv2d = Convolution(kernel, bias)
out = conv2d.forward(image)

print(out.shape)

cv2.imwrite("out.jpg",out[0, :, :, :])

(1, 719, 1087, 1)


True

In [18]:
kernel = np.ones((1, 5, 5, 1)) * -1
kernel[:, 2, 2, :] = 24

bias = np.zeros(1)

conv2d = Convolution(kernel, bias, stride=1)
out = conv2d.forward(image)

print(out.shape)

cv2.imwrite("edge_manual.jpg",out[0, :, :, :])

(1, 717, 1085, 1)


True