# Problem 3
## 1D CNN forward and backward propagation with numpy to replicate PyTorch
If $y$ is the output after filtering $x$ with $w$ and

$N$ = Number of samples <br>
$C_{in}$ = Number of input channels = Kernel depth<br>
$C_{out}$ = Number of output channels = Number of filters<br>
$l_{in}$ = Input width <br>
$l_{out}$ = $\frac{l_{in} - K}{s} + 1$  = Output width<br>
$K$ = Kernel length <br>
$s$ = Stride <br>

$x.shape = (N,C_{in}, l_{in})$ <br>
$w.shape = (C_{out},C_{in},K)$ <br>
$y.shape = (N,C_{out}, l_{out})$ <br>

then $y(n,c,i)$ is the $i$-th output of $c$-th output channel for $n$-th sample
$$ y(n,c,i) = \Big[ \sum_{k = 1}^{K} \sum_{j=1}^{C_{in}} w(c,j,k) x(n,j,si + k)\Big] + b(c) $$

$$ \sum_{n = 1}^{N} \sum_{i = 1}^{l_{out}} \frac{\partial L}{\partial b(c)} = \sum_{n = 1}^{N} \sum_{i = 1}^{l_{out}} \frac{\partial L}{\partial y(n,c,i)} \frac{\partial  y(n,c,i)}{\partial b(c)} = \sum_{n = 1}^{N} \sum_{i = 1}^{l_{out}} \frac{\partial L}{\partial y(n,c,i)} $$

$$ \sum_{n = 1}^{N} \sum_{i = 1}^{l_{out}} \frac{\partial L}{\partial w(c^\prime,j^\prime, k^\prime)} =  \sum_{n = 1}^{N} \sum_{i = 1}^{l_{out}} \frac{\partial  L} {\partial  y(n,c^\prime,i)} \frac{\partial  y(n,c^\prime,i)}{\partial w(c^\prime,j^\prime, k^\prime)} = \sum_{n = 1}^{N} \sum_{i = 1}^{l_{out}} \frac{\partial  L} {\partial  y(n,c^\prime,i)}  x(n,j^\prime,si + k^\prime) $$

$$  \sum_{c = 1}^{C_{in}} \sum_{i = 1}^{l_{out}} \frac{\partial L}{\partial x(n^\prime,j^\prime, si+k^\prime)} =\sum_{c = 1}^{C_{in}} \sum_{i = 1}^{l_{out}} \frac{\partial L}{\partial y(n^\prime,c, i)} \frac{\partial y(n^\prime,c, i)}{x(n^\prime,j^\prime, si+k^\prime)} = \sum_{c = 1}^{C_{in}} \sum_{i = 1}^{l_{out}} \frac{\partial L}{\partial y(n^\prime,c, i)} w(c,j^{\prime} k^{\prime})$$

In [13]:
#%% layers.py
#%%
import numpy as np
#%%
class Conv1D:
    
    def __init__(self, in_channel, out_channel, kernal_size, stride):
        self.in_channel = in_channel
        self.out_channel = out_channel
        self.kernal_size  = kernal_size
        self.stride = stride
        
        self.k = 1/(self.in_channel*self.kernal_size)
        self.W = np.random.uniform(-np.sqrt(self.k), np.sqrt(self.k), (self.out_channel, self.in_channel, self.kernal_size))
        self.b = np.random.uniform(-np.sqrt(self.k), np.sqrt(self.k), self.out_channel)
           

    def __call__(self, data):
        self.data = data
        self.samples, _, self.in_width = np.shape(self.data)           
        self.out_width = ((self.in_width-self.kernal_size)//self.stride)+1
        self.fprop = np.zeros((self.samples, self.out_channel, self.out_width))       
        
        for s in range(self.samples):
            for c in range(self.out_channel):
                for i in range(self.out_width):
                    self.fprop[s,c,i] = np.sum(self.data[s,:,(i*self.stride):((i*self.stride)+self.kernal_size)]*self.W[c]) + self.b[c]
        return self.fprop

   
    def backward(self, delta):        
        self.dW = np.zeros(self.W.shape)
        self.db = np.zeros(self.b.shape)
        self.dX = np.zeros(self.data.shape)
        
        
        for c in range(self.out_channel):
            for n in range(self.samples):
                for i in range(self.out_width):
                    self.db[c] += delta[n,c,i]
        
        for c in range(self.out_channel):
                for j in range(self.in_channel):
                    for k in range(self.kernal_size):
                        for n in range(self.samples):
                            for i in range(self.out_width):
                                self.dW[c,j,k] += delta[n,c,i]*self.data[n,j,(self.stride*i)+k]
                            
        for n in range(self.samples):
            for j in range(self.in_channel):
                for k in range(self.kernal_size):
                    for c in range(self.out_channel):
                        for i in range(self.out_width):
                            self.dX[n,j,((self.stride*i)+k)] += delta[n,c,i]*self.W[c,j,k]
                                  
        return self.db, self.dW, self.dX

In [14]:
#%% main_file.py
import numpy as np
# Torch Library
import torch
import torch.nn as nn
from torch.autograd import Variable 
# My Library
import layers as my
#%% Create Layers
np.random.seed(10)
net1 = my.Conv1D(8,12,3,2)          # mylib
net2 = torch.nn.Conv1d(8,12,3,2)    # torch
#%% Initialize Layers
x1 = np.random.rand(3,8,20)                             # mylib
x2 = Variable(torch.tensor(x1), requires_grad = True)   # torch
net2.weight = nn.Parameter(torch.tensor(net1.W))        # torch
net2.bias = nn.Parameter(torch.tensor(net1.b))          # torch
#%% Forward Propagation 
y_mylib = net1(x1)                                      # mylib
y_torch = net2(x2)                                      # torch
y_torch_np = y_torch.detach().numpy()                   # torch
#%%
b , c, w = y_mylib.shape
delta = np.random.randn(b,c,w) 
db_mylib, dW_mylib, dX_mylib = net1.backward(delta)
#%%
y_torch.backward(torch.tensor(delta))
dW_torch = net2.weight.grad.detach().numpy()
db_torch = net2.bias.grad.detach().numpy()
dX_torch = x2.grad.detach().numpy()
#%% Compare
def compare(x,y):
    return print(abs(x-y).max())
#%%
compare(y_mylib, y_torch_np)
compare(dX_mylib, dX_torch)
compare(dW_mylib, dW_torch)
compare(db_mylib, db_torch)

2.220446049250313e-16
3.3306690738754696e-16
5.329070518200751e-15
1.7763568394002505e-15
