In [1]:
from scipy import signal
from scipy import misc
import matplotlib.pyplot as plt
import numpy as np

In [2]:

def conv_forward_naive(x, w):
  """
  A naive implementation of the forward pass for a convolutional layer.

  The input consists of N data points, each with C channels, height H and width
  W. We convolve each input with F different filters, where each filter spans
  all C channels and has height HH and width HH.

  Input:
  - x: Input data of shape (N, C, H, W)
  - w: Filter weights of shape (F, C, HH, WW)
  - b: Biases, of shape (F,)


  Returns a tuple of:
  - out: Output data, of shape (N, F, H', W') where H' and W' are given by
    H' = 1 + (H + 2 * pad - HH) / stride
    W' = 1 + (W + 2 * pad - WW) / stride
  - cache: (x, w, b, conv_param)
  """
  out = None
  #############################################################################
  # TODO: Implement the convolutional forward pass.                           #
  # Hint: you can use the function np.pad for padding.                        #
  #############################################################################
  (N,H, W) = x.shape
  (_, HH, WW) = w.shape
  H_out = 1 + H - HH 
  W_out = 1 + W - WW 
  out = np.zeros((N, H_out, W_out))

  for n in xrange(N):
    for h_prime in xrange(H_out):
        for w_prime in xrange(W_out):
            h1 = h_prime 
            h2 = h_prime + HH
            w1 = w_prime 
            w2 = w_prime + WW
            window = x[:, h1:h2, w1:w2]
            out[n, h_prime, w_prime] = np.sum(window * w) 
  #############################################################################
  #                             END OF YOUR CODE                              #
  #############################################################################
  cache = (x, w)
  return out, cache



In [3]:
def conv_backward_naive(dout, cache):
  """
  A naive implementation of the backward pass for a convolutional layer.

  Inputs:
  - dout: Upstream derivatives.
  - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive

  Returns a tuple of:
  - dx: Gradient with respect to x
  - dw: Gradient with respect to w
  - db: Gradient with respect to b
  """
  dx, dw = None, None
  #############################################################################
  # TODO: Implement the convolutional backward pass.                          #
  #############################################################################
  (x, w) = cache
  (N, H, W) = x.shape
  (_, HH, WW) = w.shape
  (_, H_out, W_out) = dout.shape

  dx = np.zeros_like(x)
  dw = np.zeros_like(w)


  for n in xrange(N):
    for h_prime in xrange(H_out):
        for w_prime in xrange(W_out):
            h1 = h_prime 
            h2 = h_prime + HH
            w1 = w_prime 
            w2 = w_prime + WW
            dx[:, h1:h2, w1:w2] += w * dout[n,h_prime,w_prime]
            dw[:,:,:] += x[:, h1:h2, w1:w2] * dout[n,h_prime,w_prime]

    #dx[n,:,:,:] = dx_pad[:,1:-1,1:-1]

  #############################################################################
  #                             END OF YOUR CODE                              #
  #############################################################################
  return dx, dw

In [4]:
lena = misc.lena() - misc.lena().mean()
template = np.copy(lena[235:295, 310:370]) # right eye
template -= template.mean()
lena = lena + np.random.randn(*lena.shape) * 50 # add noise
corr = signal.correlate2d(lena, template, mode='valid')
y, x = np.unravel_index(np.argmax(corr), corr.shape) # find the match


In [5]:
print corr.shape 
print lena.shape
print template.shape

(453, 453)
(512, 512)
(60, 60)


In [6]:
print 512-60+1

453


In [7]:
lena2 = np.expand_dims(lena,axis=0)
template2 = np.expand_dims(template,axis=0)
print lena2.shape
(l,m,n) = lena2.shape
conv_out,cache = conv_forward_naive(lena2, template2)



(1, 512, 512)


In [8]:
print conv_out.shape
conv_out2 = conv_out.squeeze()

(1, 453, 453)


In [9]:
def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

In [10]:
print conv_out2[:10,:10]
print '-----------'
print corr[:10,:10]
print '---------------'
print rel_error(conv_out2,corr)

[[-480401.57256889 -436248.90566    -388871.62802411 -370360.69097143
  -372486.59803868 -241334.53455049  -41280.80354578  123635.8191214
   331355.52148494  481922.16116683]
 [-476909.61717919 -444741.98225504 -413120.7413048  -419635.6100475
  -397855.22031456 -253504.07849345  -64202.6906014   118341.86552739
   326676.94287553  487561.5107435 ]
 [-476537.49482989 -457112.2460218  -402310.69508413 -419309.62615936
  -411542.40758553 -259458.62341122  -15136.19159726  128095.02133442
   348657.03395869  485389.1272171 ]
 [-402925.73634128 -376496.86934182 -324365.16136255 -315563.69097824
  -330845.38807606 -212936.64935419   53911.57454726  166620.28758812
   359231.29567563  536493.07843933]
 [-446685.96044137 -430219.5684079  -370377.6620443  -353907.76145991
  -380684.62137758 -232908.56093996   27195.81204897  143780.96910806
   316358.34175948  492351.29289124]
 [-414588.231846   -383679.7291077  -324672.49760907 -325603.9306898
  -359161.63642517 -253522.9205785    24338.3420

In [11]:
fig, (ax_orig, ax_template, ax_corr,ax_convlayer) = plt.subplots(1, 4)
ax_orig.imshow(lena, cmap='gray')
ax_orig.set_title('Original')
ax_orig.set_axis_off()
ax_template.imshow(template, cmap='gray')
ax_template.set_title('Template')
ax_template.set_axis_off()
ax_corr.imshow(corr, cmap='gray')
ax_corr.set_title('Cross-correlation')
ax_corr.set_axis_off()
ax_convlayer.imshow(conv_out2, cmap='gray')
ax_convlayer.set_title('Convolutional Layer')
ax_convlayer.set_axis_off()
ax_orig.plot(x, y, 'ro')
fig.show()

In [12]:
A = np.arange(8).reshape((2,2,2))
print A,"\n----"
print np.flip(np.flip(A,1),2)

[[[0 1]
  [2 3]]

 [[4 5]
  [6 7]]] 
----
[[[3 2]
  [1 0]]

 [[7 6]
  [5 4]]]


In [13]:
dout = np.random.random_sample(conv_out.shape)
print dout.shape
print dout[0,:10,:10]

(1, 453, 453)
[[ 0.23305565  0.33496059  0.37639126  0.98530804  0.76722511  0.37761017
   0.86186562  0.07289429  0.21504176  0.0378794 ]
 [ 0.48343761  0.14660391  0.58575196  0.89256536  0.4845017   0.69243072
   0.96884453  0.83000976  0.36392475  0.75796343]
 [ 0.50197595  0.26081202  0.5227257   0.98155913  0.30252903  0.74239983
   0.91327359  0.32018061  0.89703791  0.4876661 ]
 [ 0.6034152   0.39760012  0.69441794  0.33494449  0.32950506  0.14053118
   0.33853425  0.88460892  0.92028107  0.76954243]
 [ 0.63727464  0.47043977  0.52733388  0.20556574  0.50470286  0.61764912
   0.701715    0.73443601  0.69180491  0.93747095]
 [ 0.99222774  0.8349225   0.29466109  0.02733135  0.65037103  0.45439769
   0.28407654  0.82080386  0.13878077  0.29192673]
 [ 0.35101028  0.88437044  0.531705    0.6698293   0.48501709  0.3200846
   0.08114581  0.67190271  0.94504966  0.84180359]
 [ 0.25757435  0.09401498  0.26395157  0.3864333   0.61303328  0.96801753
   0.50459511  0.49998336  0.18185923 

In [14]:
dLena,dTemplate= conv_backward_naive(dout, cache)
dLenaSqueezed = dLena.squeeze()

In [15]:
x,w= cache
print rel_error(template2,w)
print rel_error(lena2,x)


0.0
0.0


In [16]:
flip_template2 = np.flip(np.flip(template2,1),2)
print lena2.shape,flip_template2.shape
dLena_cross = signal.correlate2d(dout.squeeze(), flip_template2.squeeze(), boundary='fill', mode='full', fillvalue=0)
dTemplate_cross =  signal.correlate2d(x.squeeze(), dout.squeeze(), mode='valid')


(1, 512, 512) (1, 60, 60)


In [17]:
print dLena_cross.shape,dLenaSqueezed.shape
print dTemplate_cross.shape,dTemplate.shape
print flip_template2[0,-1,-1], flip_template2[0,-1,-2]
print template2[0,0,0],template2[0,0,1]

(512, 512) (512, 512)
(60, 60) (1, 60, 60)
75.5697222222 76.5697222222
75.5697222222 76.5697222222


In [18]:
print rel_error(dLena_cross,dLenaSqueezed)
print dLena_cross[:10,:10],"\n------------\n",dLenaSqueezed[:10,:10]

print "======================"
print rel_error(dTemplate.squeeze(),dTemplate_cross)
print dTemplate.squeeze()[:10,:10],"\n------------\n",dTemplate_cross[:10,:10]

0.0
[[   17.61195036    43.15788465    72.40273984   148.84073      208.49931739
    238.80274479   309.18927032   315.83577729   332.24847493
    343.89671446]
 [   53.9121407     91.38453564   166.17036218   312.34592348
    409.57089007   495.51796751   642.98062193   710.82735948
    760.26731819   833.39796333]
 [   92.0620535    151.12228469   265.35162734   491.01049057
    611.78788496   755.44047845   976.65265756  1069.99026208
   1191.05539892  1301.83630532]
 [  138.61030978   228.30471928   396.67785582   651.34017844
    797.22410876   947.78640543  1195.38812208  1355.29386448
   1545.54391691  1718.35777939]
 [  187.67149029   313.75868149   521.80318826   796.62136041
    974.26168487  1171.54283524  1469.41968188  1686.48770088
   1933.79591064  2174.56364499]
 [  263.3607249    453.78598683   686.42064094   964.72282293
   1190.95231213  1417.56808572  1735.9563338   2025.72996523
   2272.76207067  2529.60002987]
 [  289.83919492   549.47535377   820.89277923  1149.9

In [19]:
print rel_error(lena,x.squeeze())
print x

0.0
[[[ 107.13206929  -15.70538513    4.40817845 ...,   16.93980258
     78.42212502  -47.0964331 ]
  [  93.94120317   34.50840037   -4.15232258 ...,  129.05323562
     41.55790951  -45.17695469]
  [  25.99810041   89.07012457   72.50647415 ...,   21.09100135
     66.83900414  -18.72084458]
  ..., 
  [ -88.68256442 -128.07535799  -96.34801464 ...,  -50.25151191
    -39.83370089  -72.60511688]
  [ -61.4735109    -1.48368096  -12.10855022 ...,   25.74741399
    -23.7431099   -11.59738917]
  [-149.21462036 -156.29655769  -54.66906157 ...,   29.71933207
    -29.9184338    -8.31780508]]]


In [20]:
print x.shape,lena.shape

(1, 512, 512) (512, 512)
