<a href="https://colab.research.google.com/github/menasiraziz/Convnet/blob/strided-convolution-derivative/convnet2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np

In [0]:
from numpy.lib.stride_tricks import as_strided


def strided_convolution3D(image, weight, stride):
    m,ch,im_h, im_w = image.shape
    f,t,f_h, f_w = weight.shape
    out_shape = (m,ch,1 + (im_h - f_h) // stride, 1 + (im_w - f_w) // stride, f_h, f_w)
    out_strides = (image.strides[0],image.strides[1],image.strides[2] * stride, image.strides[3] * stride, image.strides[2], image.strides[3])
    windows = as_strided(image, shape=out_shape, strides=out_strides)
    return np.einsum('mcopjk,ecjk->meop',windows,weight)

def mx_pool(img,s): # function takes advatange of row order of numpy array
  m,ch,n,n=img.shape # flatten all images into stride 
  c=img.reshape(-1,s) #change s to change horizontal stride
  ind=np.argmax(c,axis=1)
  ind1=np.ravel_multi_index([np.arange(ind.shape[0]),ind], (ind.shape[0],s))
  d=img.flatten()[ind1]
  g=d.reshape(-1,s,int(n/s)) #probably i need to change s in the middle to change vertical stride

  h=g.argmax(axis=1)
  i1=np.arange(g.shape[0]) # varies along images in data 
  i2=np.arange(g.shape[2]) # varies along col axis in indices
  ss=i1[:,np.newaxis]*n+g.shape[2]*h+i2 # i2 is varying along col and i1 is varying along batch and row dim
  return ind1[ss.flatten()]


import numpy as np
from numpy.lib.stride_tricks import as_strided


def strided_convolution3D1_g(image, weight, stride):
    m,ch,im_h, im_w = image.shape
    f,t,f_h, f_w = weight.shape
    #print(m)
    out_shape = (m,ch,1 + (im_h - f_h) // stride, 1 + (im_w - f_w) // stride, f_h, f_w)
    out_strides = (image.strides[0],image.strides[1],image.strides[2] * stride, image.strides[3] * stride, image.strides[2], image.strides[3])
    windows = as_strided(image, shape=out_shape, strides=out_strides)
    #print(windows.shape)
    #print(windows)
    return np.einsum('meopjk,ecjk->mecop',windows,weight)
    #return np.einsum('meopjk,ecjk->meop',windows,weight)
    #return np.einsum('meopjk,ecjk->mecop',windows,weight)

def strided_convolution3D_grad1(image, weight, stride):
    m,ch,im_h, im_w = image.shape
    m1,m2,f_h, f_w = weight.shape
    
    #print(m)
    out_shape = (m,ch,1 + (im_h - f_h) // stride, 1 + (im_w - f_w) // stride, f_h, f_w)
    out_strides = (image.strides[0],image.strides[1],image.strides[2] * stride, image.strides[3] * stride, image.strides[2], image.strides[3])
    windows = as_strided(image, shape=out_shape, strides=out_strides)
    #print(windows.shape)
    #print(windows[0])
    #print(windows*gg1)
    return np.einsum('mcopjk,mejk->mecop',windows,weight)


In [0]:
class ConvLayer:
  def __init__(self,in_ch=1,out_ch=1,kernel=(2,2),stride=1):
    self.filters=np.random.randn(out_ch,in_ch,kernel[0],kernel[1])
    self.stride=stride

  def feedforward(self,x):
    self.out=strided_convolution3D(x,self.filters,self.stride)
    return self.out
  def grad(self,x,loss_grad):
    self.df=strided_convolution3D_grad1(x,loss_grad,self.stride).sum(axis=0)
    gg=np.rot90(loss_grad,2,axes=(2,3))
    #gg1=np.pad(gg, ((0, 0),(0, 0),(1, 1),(1,1)), 'constant', constant_values=(0))
    gg1=np.zeros((gg.shape[0],gg.shape[1],2*gg.shape[2]+1,2*gg.shape[3]+1)) # implements padding for strided convolution
    gg1[:,:,1:-1:2,1:-1:2]=gg # 2 is stride in the convolution append zeros and inbetween variables and apply usual conv op
    ooo=strided_convolution3D1_g(gg1,self.filters,1) # derive and implement 2d case to better understand
    return np.rot90(ooo,2,axes=(3,4)).sum(axis=1)

class FC_Layer:
  def __init__(self,i_dim,out_dim):
    self.W=np.random.randn(i_dim,out_dim)
    self.dw=np.zeros(self.W.shape)
  def feedforward(self,x):
    self.out=np.dot(x,self.W)
    return self.out
  def grad(self,x,loss_grad):
    return self.W*loss_grad[:,np.newaxis]

class Network:
  def __init__(self):
    self.n=6
    self.in_ch=1
    self.m=1
    self.f=2
    self.cs=2
    self.o_ch=1

    self.conv1=ConvLayer(in_ch=self.in_ch,out_ch=self.o_ch,kernel=(self.f,self.f),stride=self.cs)
    cos=(self.n-self.f)//self.cs+1
    self.FC1=FC_Layer(cos*cos,1)

  def gen_images(self):
    return np.random.randn(self.m,self.in_ch,self.n,self.n)

  def feedforward(self,x):
    c1=self.conv1.feedforward(x)
    f1=self.FC1.feedforward(c1.reshape(c1.shape[0],-1))
    return f1
  def backpropagate(self,x,y,yhat):
    loss_grad=2*(yhat-y)
    gradF1=self.FC1.grad(self.conv1.out.reshape(loss_grad.shape[0],-1),loss_grad).reshape(self.conv1.out.shape)
    print(gradF1.shape)
    gradC1=self.conv1.grad(x,gradF1)
    print(gradC1.shape)
    print(gradC1)

  def loss(self,y,yhat):
    return np.sum(np.square(y-yhat))

  def num_grad(self,x,y):
    ep=np.zeros(x.shape)
    dw=np.zeros(x.shape)
    for dd in range(x.shape[0]):
      for k in range(x.shape[1]):
       for i in range(x.shape[2]):
         for j in range(x.shape[3]):

           ep[dd,k,i,j]=1e-4
           yhat1=self.feedforward(x+ep)
           yhat2=self.feedforward(x-ep)
           #print(yhat1.shape)
           dw[dd,k,i,j]=(self.loss(y,yhat1)-self.loss(y,yhat2))/2e-4
           ep[dd,k,i,j]=0

    return dw

In [70]:
np.random.seed(100)
nn=Network()
img=nn.gen_images()*10
y=np.random.randn(img.shape[0],1)
yhat=nn.feedforward(img)
nn.backpropagate(img,y,yhat)
print(nn.num_grad(img,y))

(1, 1, 3, 3)
(1, 1, 6, 6)
[[[[ 43.17498986  -8.45554628  22.62399162  -4.4307644    9.73120116
     -1.90579366]
   [-28.45084661   6.22879093 -14.9084393    3.26392929  -6.41252985
      1.40390577]
   [-47.07849926   9.22002369  -8.33721315   1.63279     11.21925156
     -2.19721883]
   [ 31.02312625  -6.79194437   5.49393928  -1.20279722  -7.39310435
      1.61858457]
   [-20.15172889   3.94658752  19.14580781  -3.7495843  -25.6763239
      5.02854421]
   [ 13.27930243  -2.90725966 -12.61643472   2.76213693  16.91982222
     -3.70428468]]]]
[[[[ 43.17498986  -8.45554628  22.62399162  -4.4307644    9.73120115
     -1.90579365]
   [-28.45084661   6.22879093 -14.9084393    3.26392929  -6.41252985
      1.40390577]
   [-47.07849926   9.22002369  -8.33721315   1.63279     11.21925156
     -2.19721883]
   [ 31.02312625  -6.79194437   5.49393928  -1.20279722  -7.39310435
      1.61858457]
   [-20.15172889   3.94658752  19.14580781  -3.7495843  -25.6763239
      5.02854421]
   [ 13.27930243

In [23]:
np.random.seed(0)
n=5
lay=FC_Layer(n,1)
m=3
x=np.random.randn(m,n)
y=np.random.randn(m,1)
yhat=lay.feedforward(x)
loss_grad=2*(yhat-y)
lay.grad(x,loss_grad).reshape(-1,n)

array([[ 5.63290227,  1.27776619,  3.12526747,  7.15553164,  5.9634124 ],
       [ 7.15809189,  1.62373984,  3.97147876,  9.09299515,  7.57809241],
       [-2.72464564, -0.61805796, -1.51169788, -3.4611444 , -2.88451403]])

In [0]:
loss_grad

array([[5.45294691],
       [4.98561353],
       [2.94279091]])

In [0]:
ll=lay.W*loss_grad[:,np.newaxis]
ll.flatten()

array([9.61928379, 2.18203601, 8.79488324, 1.99502919, 5.1912372 ,
       1.17757899])

In [0]:
lay.W*loss_grad[0]

array([[9.61928379],
       [2.18203601]])

In [0]:
m=10
n=32
a=np.arange(m*n*n).reshape(m,n,n)+1

In [0]:
b=np.zeros((m,n+2,+n+2))
b[:,1:-1,1:-1]=a

In [50]:
%timeit b[:,1:-1,1:-1]=a

100000 loops, best of 3: 16.5 µs per loop


In [62]:
m=1
n=4
a=np.arange(4)+1
b=np.zeros(9)
b[1:-1:2]=a
b

array([0., 1., 0., 2., 0., 3., 0., 4., 0.])

In [65]:
m=1
n=4
a=np.arange(m*n*n).reshape(m,n,n)+1
b=np.zeros((m,2*n+1,2*n+1))
b[:,1:-1:2,1:-1:2]=a
b

array([[[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  2.,  0.,  3.,  0.,  4.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  5.,  0.,  6.,  0.,  7.,  0.,  8.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  9.,  0., 10.,  0., 11.,  0., 12.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0., 13.,  0., 14.,  0., 15.,  0., 16.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]]])

In [66]:
a

array([[[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12],
        [13, 14, 15, 16]]])