<a href="https://colab.research.google.com/github/menasiraziz/Convnet/blob/work/convnet2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np

In [0]:
from numpy.lib.stride_tricks import as_strided


def strided_convolution3D(image, weight, stride):
    m,ch,im_h, im_w = image.shape
    f,t,f_h, f_w = weight.shape
    out_shape = (m,ch,1 + (im_h - f_h) // stride, 1 + (im_w - f_w) // stride, f_h, f_w)
    out_strides = (image.strides[0],image.strides[1],image.strides[2] * stride, image.strides[3] * stride, image.strides[2], image.strides[3])
    windows = as_strided(image, shape=out_shape, strides=out_strides)
    return np.einsum('mcopjk,ecjk->meop',windows,weight)

def mx_pool(img,s): # function takes advatange of row order of numpy array
  m,ch,n,n=img.shape # flatten all images into stride 
  c=img.reshape(-1,s) #change s to change horizontal stride
  ind=np.argmax(c,axis=1)
  ind1=np.ravel_multi_index([np.arange(ind.shape[0]),ind], (ind.shape[0],s))
  d=img.flatten()[ind1]
  g=d.reshape(-1,s,int(n/s)) #probably i need to change s in the middle to change vertical stride

  h=g.argmax(axis=1)
  i1=np.arange(g.shape[0]) # varies along images in data 
  i2=np.arange(g.shape[2]) # varies along col axis in indices
  ss=i1[:,np.newaxis]*n+g.shape[2]*h+i2 # i2 is varying along col and i1 is varying along batch and row dim
  return ind1[ss.flatten()]


import numpy as np
from numpy.lib.stride_tricks import as_strided


def strided_convolution3D1_g(image, weight, stride):
    m,ch,im_h, im_w = image.shape
    f,t,f_h, f_w = weight.shape
    #print(m)
    out_shape = (m,ch,1 + (im_h - f_h) // stride, 1 + (im_w - f_w) // stride, f_h, f_w)
    out_strides = (image.strides[0],image.strides[1],image.strides[2] * stride, image.strides[3] * stride, image.strides[2], image.strides[3])
    windows = as_strided(image, shape=out_shape, strides=out_strides)
    #print(windows.shape)
    #print(windows)
    return np.einsum('meopjk,ecjk->mecop',windows,weight)
    #return np.einsum('meopjk,ecjk->meop',windows,weight)
    #return np.einsum('meopjk,ecjk->mecop',windows,weight)

def strided_convolution3D_grad1(image, weight, stride):
    m,ch,im_h, im_w = image.shape
    m1,m2,f_h, f_w = weight.shape
    
    #print(m)
    out_shape = (m,ch,1 + (im_h - f_h) // stride, 1 + (im_w - f_w) // stride, f_h, f_w)
    out_strides = (image.strides[0],image.strides[1],image.strides[2] * stride, image.strides[3] * stride, image.strides[2], image.strides[3])
    windows = as_strided(image, shape=out_shape, strides=out_strides)
    #print(windows.shape)
    #print(windows[0])
    #print(windows*gg1)
    return np.einsum('mcopjk,mejk->mecop',windows,weight)


In [0]:
class ConvLayer:
  def __init__(self,in_ch=1,out_ch=1,kernel=(2,2),stride=1):
    self.filters=np.random.randn(out_ch,in_ch,kernel[0],kernel[1])
    self.stride=stride

  def feedforward(self,x):
    if(x.shape[1]!=self.filters.shape[1]):
      print(x.shape[1])
      print(self.filters.shape[1])
      raise Exception("channels in input and output are not same")
    self.out=strided_convolution3D(x,self.filters,self.stride)
    return self.out
  def grad(self,x,loss_grad):
    self.df=strided_convolution3D_grad1(x,loss_grad,1).sum(axis=0)
    gg=np.rot90(loss_grad,2,axes=(2,3))
    gg1=np.pad(gg, ((0, 0),(0, 0),(1, 1),(1,1)), 'constant', constant_values=(0))
    ooo=strided_convolution3D1_g(gg1,self.filters,1)
    return np.rot90(ooo,2,axes=(3,4)).sum(axis=1)

class FC_Layer:
  def __init__(self,i_dim,out_dim):
    self.W=np.random.randn(i_dim,out_dim)
    self.dw=np.zeros(self.W.shape)
  def feedforward(self,x):
    self.out=np.dot(x,self.W)
    return self.out
  def grad(self,x,loss_grad):
    return self.W*loss_grad[:,np.newaxis]

class Network:
  def __init__(self):
    self.n=5
    self.in_ch=2
    self.m=2
    self.f1=2
    self.f2=2
    self.cs=1
    self.o_ch=2

    self.conv1=ConvLayer(in_ch=self.in_ch,out_ch=self.o_ch,kernel=(self.f1,self.f1),stride=self.cs)

    self.conv2=ConvLayer(in_ch=2,out_ch=1,kernel=(self.f2,self.f2),stride=self.cs)

    cos=self.n-self.f1+1-self.f2+1

    self.FC1=FC_Layer(cos*cos,1)

  def gen_images(self):
    return np.random.randn(self.m,self.in_ch,self.n,self.n)

  def feedforward(self,x):
    c1=self.conv1.feedforward(x)
    c2=self.conv2.feedforward(c1)
    f1=self.FC1.feedforward(c2.reshape(c2.shape[0],-1))
    return f1
  def backpropagate(self,x,y,yhat):
    loss_grad=2*(yhat-y)
    gradF1=self.FC1.grad(self.conv2.out.reshape(loss_grad.shape[0],-1),loss_grad).reshape(self.conv2.out.shape)
    print(gradF1.shape)
    gradC1=self.conv2.grad(self.conv1.out,gradF1)
    print(gradC1.shape)
    gradC2=self.conv1.grad(x,gradC1)
    print(gradC2)

  def loss(self,y,yhat):
    return np.sum(np.square(y-yhat))

  def num_grad(self,x,y):
    ep=np.zeros(x.shape)
    dw=np.zeros(x.shape)
    for dd in range(x.shape[0]):
      for k in range(x.shape[1]):
       for i in range(x.shape[2]):
         for j in range(x.shape[3]):

           ep[dd,k,i,j]=1e-4
           yhat1=self.feedforward(x+ep)
           yhat2=self.feedforward(x-ep)
           #print(yhat1.shape)
           dw[dd,k,i,j]=(self.loss(y,yhat1)-self.loss(y,yhat2))/2e-4
           ep[dd,k,i,j]=0

    return dw

In [0]:
np.random.seed(100)
nn=Network()
img=nn.gen_images()*10
y=np.random.randn(img.shape[0],1)
yhat=nn.feedforward(img)
nn.backpropagate(img,y,yhat)
nn.num_grad(img,y)

(2, 1, 3, 3)
(2, 2, 4, 4)
[[[[   5.2803453    11.0206044   -52.97564152  -30.45846987
      25.05535623]
   [  34.38736682 -121.81627972  195.19616791   84.20951284
      -4.37715752]
   [ -30.70732709  124.87614945   -4.52843334   -7.61742912
     -29.23420921]
   [ -29.90227119   18.09206879 -189.54489264 -100.60413848
       6.28783104]
   [  -3.98338212   16.62597898   72.53878344   36.95708494
       2.92936395]]

  [[ -12.43039288  -53.2927177    15.86903381  115.98102954
      60.07944476]
   [ -85.88490353  129.52220678  119.49321087 -111.53122486
     -80.26590254]
   [  25.84143401  123.83683525 -185.15255034  -74.86400971
     -25.4786504 ]
   [  -9.62560855  -55.21491631   -6.88767672  125.62680477
      91.07041943]
   [  -2.72172206   14.40285173   43.53304386  -74.09981528
     -44.64269   ]]]


 [[[  14.72393541   30.73031367 -147.71949178  -84.93166972
      69.86540196]
   [  95.88717013 -339.67760305  544.29314859  234.81332332
     -12.20544888]
   [ -85.62559361  3

array([[[[   5.2803453 ,   11.0206044 ,  -52.97564152,  -30.45846986,
            25.05535623],
         [  34.38736682, -121.81627972,  195.1961679 ,   84.20951284,
            -4.37715752],
         [ -30.70732709,  124.87614945,   -4.52843334,   -7.61742912,
           -29.23420921],
         [ -29.90227119,   18.09206879, -189.54489264, -100.60413848,
             6.28783104],
         [  -3.98338212,   16.62597898,   72.53878345,   36.95708494,
             2.92936395]],

        [[ -12.43039288,  -53.29271771,   15.86903381,  115.98102953,
            60.07944476],
         [ -85.88490353,  129.52220678,  119.49321087, -111.53122486,
           -80.26590254],
         [  25.84143401,  123.83683525, -185.15255033,  -74.86400971,
           -25.4786504 ],
         [  -9.62560855,  -55.21491631,   -6.88767673,  125.62680477,
            91.07041943],
         [  -2.72172206,   14.40285173,   43.53304386,  -74.09981528,
           -44.64269   ]]],


       [[[  14.72393541,   30.7303

In [0]:
np.random.seed(0)
n=5
lay=FC_Layer(n,1)
m=3
x=np.random.randn(m,n)
y=np.random.randn(m,1)
yhat=lay.feedforward(x)
loss_grad=2*(yhat-y)
lay.grad(x,loss_grad).reshape(-1,n)

array([[ 5.63290227,  1.27776619,  3.12526747,  7.15553164,  5.9634124 ],
       [ 7.15809189,  1.62373984,  3.97147876,  9.09299515,  7.57809241],
       [-2.72464564, -0.61805796, -1.51169788, -3.4611444 , -2.88451403]])

In [0]:
loss_grad

array([[5.45294691],
       [4.98561353],
       [2.94279091]])

In [0]:
ll=lay.W*loss_grad[:,np.newaxis]
ll.flatten()

array([9.61928379, 2.18203601, 8.79488324, 1.99502919, 5.1912372 ,
       1.17757899])

In [0]:
lay.W*loss_grad[0]

array([[9.61928379],
       [2.18203601]])

In [0]:
class ConvLayer:
  def __init__(self,in_ch=1,out_ch=1,kernel=(2,2),stride=1):
    self.filters=np.random.randn(out_ch,in_ch,kernel[0],kernel[1])
    self.stride=stride

  def feedforward(self,x):
    if(x.shape[1]!=self.filters.shape[1]):
      print(x.shape[1])
      print(self.filters.shape[1])
      raise Exception("channels in input and output are not same")
    self.out=strided_convolution3D(x,self.filters,self.stride)
    return self.out
  def grad(self,x,loss_grad):
    self.df=strided_convolution3D_grad1(x,loss_grad,1).sum(axis=0)
    gg=np.rot90(loss_grad,2,axes=(2,3))
    print(gg.shape)
    print(self.filters.shape)
    print(x.shape)
    padding=self.filters.shape[3]-1
    gg1=np.pad(gg, ((0, 0),(0, 0),(padding, padding),(padding,padding)), 'constant', constant_values=(0))
    print(gg1.shape)
    ooo=strided_convolution3D1_g(gg1,self.filters,1)
    return np.rot90(ooo,2,axes=(3,4)).sum(axis=1)
  def perturb_f(self,ep):
    self.filters=self.filters+ep

class FC_Layer:
  def __init__(self,i_dim,out_dim):
    self.W=np.random.randn(i_dim,out_dim)
    self.dw=np.zeros(self.W.shape)
  def feedforward(self,x):
    self.out=np.dot(x,self.W)
    return self.out
  def grad(self,x,loss_grad):
    return self.W*loss_grad[:,np.newaxis]

class Network:
  def __init__(self):
    self.n=10
    self.in_ch=1
    self.m=1
    self.f1=5
    self.f2=2
    self.cs=1
    self.o_ch=1

    self.conv1=ConvLayer(in_ch=self.in_ch,out_ch=self.o_ch,kernel=(self.f1,self.f1),stride=self.cs)

    #self.conv2=ConvLayer(in_ch=2,out_ch=1,kernel=(self.f2,self.f2),stride=self.cs)

    #cos=self.n-self.f1+1-self.f2+1
    cos=self.n-self.f1+1

    self.FC1=FC_Layer(self.o_ch*cos*cos,1)

  def gen_images(self):
    return np.random.randn(self.m,self.in_ch,self.n,self.n)

  def feedforward(self,x):
    c1=self.conv1.feedforward(x)
    #c2=self.conv2.feedforward(c1)
    f1=self.FC1.feedforward(c1.reshape(c1.shape[0],-1))
    return f1
  def backpropagate(self,x,y,yhat):
    loss_grad=2*(yhat-y)
    gradF1=self.FC1.grad(self.conv1.out.reshape(loss_grad.shape[0],-1),loss_grad).reshape(self.conv1.out.shape)
    #print(gradF1.shape)
    gradC1=self.conv1.grad(x,gradF1)
    #print(gradC1)
    #gradC2=self.conv1.grad(x,gradC1)
    #print(gradC2)

  def loss(self,y,yhat):
    return np.sum(np.square(y-yhat))

  def num_grad(self,x,y):
    ep=np.zeros(x.shape)
    dw=np.zeros(x.shape)
    for dd in range(x.shape[0]):
      for k in range(x.shape[1]):
       for i in range(x.shape[2]):
         for j in range(x.shape[3]):

           ep[dd,k,i,j]=1e-4
           yhat1=self.feedforward(x+ep)
           yhat2=self.feedforward(x-ep)
           #print(yhat1.shape)
           dw[dd,k,i,j]=(self.loss(y,yhat1)-self.loss(y,yhat2))/2e-4
           ep[dd,k,i,j]=0

    return dw


  def num_grad_df(self,x,y,shape,func):
    #f=self.conv2.filters
    #func=self.conv2.perturb_f
    ep=np.zeros(shape)
    dw=np.zeros(shape)
    #print(self.conv2.filters)
    for dd in range(shape[0]):
      for k in range(shape[1]):
       for i in range(shape[2]):
         for j in range(shape[3]):

           ep[dd,k,i,j]=1e-4
           func(ep)
           #print(self.conv2.filters)
           yhat1=self.feedforward(x)
           func(-2*ep)
           yhat2=self.feedforward(x)
           #print(self.conv2.filters)
           func(ep)
           #print(yhat1.shape)
           dw[dd,k,i,j]=(self.loss(y,yhat1)-self.loss(y,yhat2))/2e-4
           ep[dd,k,i,j]=0

    return dw

In [41]:
np.random.seed(100)
nn=Network()
img=nn.gen_images()*10
y=np.random.randn(img.shape[0],1)
yhat=nn.feedforward(img)
nn.backpropagate(img,y,yhat)
#nn.num_grad(img,y)
print(nn.conv1.df)
nn.num_grad_df(img,y,nn.conv1.filters.shape,nn.conv1.perturb_f)

(1, 1, 6, 6)
(1, 1, 5, 5)
(1, 1, 10, 10)
(1, 1, 14, 14)
[[[[-23053.69936473  63146.05844429  12830.43335873   3979.68508413
    -14160.9395958 ]
   [ 15516.96692606 -13676.67067759  56553.33360765    751.82189775
     22403.46531044]
   [-69693.4555025  -23662.24816785 -28713.66433329  70401.07392722
    -19347.166547  ]
   [ -5970.00131025 -99294.5850287  -30632.01954132  16074.04770328
    -17880.10346259]
   [  2492.88399743 -22059.715852    21206.59547414 -65963.10665187
     31390.73986857]]]]


array([[[[-23053.69936468,  63146.05844396,  12830.43335883,
            3979.68508405, -14160.93959582],
         [ 15516.96692564, -13676.67067752,  56553.33360781,
             751.82189765,  22403.46531027],
         [-69693.45550213, -23662.24816789, -28713.66433326,
           70401.07392677, -19347.16654701],
         [ -5970.00131049, -99294.58502877, -30632.01954115,
           16074.04770337, -17880.10346303],
         [  2492.88399747, -22059.71585172,  21206.59547429,
          -65963.10665191,  31390.73986858]]]])

In [0]:
class ConvLayer:
  def __init__(self,in_ch=1,out_ch=1,kernel=(2,2),stride=1):
    self.filters=np.random.randn(out_ch,in_ch,kernel[0],kernel[1])
    self.stride=stride

  def feedforward(self,x):
    if(x.shape[1]!=self.filters.shape[1]):
      print(x.shape[1])
      print(self.filters.shape[1])
      raise Exception("channels in input and output are not same")
    self.out=strided_convolution3D(x,self.filters,self.stride)
    return self.out
  def grad(self,x,loss_grad):
    self.df=strided_convolution3D_grad1(x,loss_grad,1).sum(axis=0)
    gg=np.rot90(loss_grad,2,axes=(2,3))
    padding=self.filters.shape[3]-1
    gg1=np.pad(gg, ((0, 0),(0, 0),(padding, padding),(padding,padding)), 'constant', constant_values=(0))
    ooo=strided_convolution3D1_g(gg1,self.filters,1)
    return np.rot90(ooo,2,axes=(3,4)).sum(axis=1)
  def perturb_f(self,ep):
    self.filters=self.filters+ep

class FC_Layer:
  def __init__(self,i_dim,out_dim):
    self.W=np.random.randn(i_dim,out_dim)
    self.dw=np.zeros(self.W.shape)
  def feedforward(self,x):
    self.out=np.dot(x,self.W)
    return self.out
  def grad(self,x,loss_grad):
    return self.W*loss_grad[:,np.newaxis]

class Network:
  def __init__(self):
    self.n=6
    self.in_ch=1
    self.m=1
    self.f1=3
    self.f2=3
    self.cs=1
    self.o_ch=1

    self.conv1=ConvLayer(in_ch=self.in_ch,out_ch=self.o_ch,kernel=(self.f1,self.f1),stride=self.cs)

    self.conv2=ConvLayer(in_ch=1,out_ch=1,kernel=(self.f2,self.f2),stride=self.cs)

    cos=self.n-self.f1+1-self.f2+1

    self.FC1=FC_Layer(cos*cos,1)

  def gen_images(self):
    return np.random.randn(self.m,self.in_ch,self.n,self.n)

  def feedforward(self,x):
    c1=self.conv1.feedforward(x)
    c2=self.conv2.feedforward(c1)
    f1=self.FC1.feedforward(c2.reshape(c2.shape[0],-1))
    return f1
  def backpropagate(self,x,y,yhat):
    loss_grad=2*(yhat-y)
    gradF1=self.FC1.grad(self.conv2.out.reshape(loss_grad.shape[0],-1),loss_grad).reshape(self.conv2.out.shape)
    print(gradF1.shape)
    gradC1=self.conv2.grad(self.conv1.out,gradF1)
    print(gradC1.shape)
    gradC2=self.conv1.grad(x,gradC1)
    #print(gradC2)

  def loss(self,y,yhat):
    return np.sum(np.square(y-yhat))

  def num_grad(self,x,y):
    ep=np.zeros(x.shape)
    dw=np.zeros(x.shape)
    for dd in range(x.shape[0]):
      for k in range(x.shape[1]):
       for i in range(x.shape[2]):
         for j in range(x.shape[3]):

           ep[dd,k,i,j]=1e-4
           yhat1=self.feedforward(x+ep)
           yhat2=self.feedforward(x-ep)
           #print(yhat1.shape)
           dw[dd,k,i,j]=(self.loss(y,yhat1)-self.loss(y,yhat2))/2e-4
           ep[dd,k,i,j]=0

    return dw

  def num_grad_df(self,x,y,shape,func):
    #f=self.conv2.filters
    #func=self.conv2.perturb_f
    ep=np.zeros(shape)
    dw=np.zeros(shape)
    #print(self.conv2.filters)
    for dd in range(shape[0]):
      for k in range(shape[1]):
       for i in range(shape[2]):
         for j in range(shape[3]):

           ep[dd,k,i,j]=1e-4
           func(ep)
           #print(self.conv2.filters)
           yhat1=self.feedforward(x)
           func(-2*ep)
           yhat2=self.feedforward(x)
           #print(self.conv2.filters)
           func(ep)
           #print(yhat1.shape)
           dw[dd,k,i,j]=(self.loss(y,yhat1)-self.loss(y,yhat2))/2e-4
           ep[dd,k,i,j]=0

    return dw

In [61]:
np.random.seed(100)
nn=Network()
img=nn.gen_images()*10
y=np.random.randn(img.shape[0],1)
yhat=nn.feedforward(img)
nn.backpropagate(img,y,yhat)
#nn.num_grad(img,y)
print(nn.conv2.df)
print(nn.num_grad_df(img,y,nn.conv2.filters.shape,nn.conv2.perturb_f))
print(nn.conv1.df)
print(nn.num_grad_df(img,y,nn.conv1.filters.shape,nn.conv1.perturb_f))

(1, 1, 2, 2)
(1, 1, 4, 4)
[[[[  2211.04785945  16869.28227808  13289.61044976]
   [  -614.9298863    4876.60000057   5230.20057548]
   [  -405.75367881 -13608.63117761    -77.14906936]]]]
[[[[  2211.04785944  16869.28227807  13289.61044975]
   [  -614.92988631   4876.60000057   5230.20057547]
   [  -405.7536788  -13608.63117762    -77.14906938]]]]
[[[[-2095.97884206 -1286.06365248 10066.93160815]
   [-1420.71161289 -2554.1986827  -3192.43862478]
   [  417.42078628 -2609.38690358  1346.92323653]]]]
[[[[-2095.97884206 -1286.06365247 10066.93160812]
   [-1420.71161291 -2554.1986827  -3192.43862481]
   [  417.4207863  -2609.38690352  1346.92323656]]]]
