# lets make a convolutional network 

In [1]:
# som helper functions
def same_dim(x,dim):
    assert len(x.shape)==dim, \
    ''' they are suppose to have the same
    dimension but first one is {0} and the second
    one is {1} '''.format(len(x.shape),dim)

def assert_same_shape(x,x_grad):
    assert x.shape == x_grad.shape ,\
    ''' they are suppose to have the same shape 
    but the first one is {0} and the second one 
    is {1}'''.format(tuple(x.shape),tuple(x_grad.shape))

In [2]:
import numpy as np
from numpy import ndarray
from __future__ import division

In [3]:
a=[12,34,45,56,90]

In [4]:
new=[0]
for i in a:
    new.append(i)
new.append(0)
new

[0, 12, 34, 45, 56, 90, 0]

In [5]:
np.repeat([0],5)

array([0, 0, 0, 0, 0])

In [129]:
def _1d_pad(arr:np.ndarray,num:int)->np.ndarray:
    temp=np.array([0])
    temp=temp.flatten()
    temp=np.repeat(temp,num)
    return np.concatenate([temp,arr,temp])

In [7]:
te=np.array([1,2,3,4,5,6,7,8,9])
_1d_pad(te,1)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0])

In [8]:
def _1d_conv(inp:np.ndarray,param:np.ndarray):
    '''going to convolove the input'''
    # check is they are the same dimension
    same_dim(inp,1)
    same_dim(param,1)
    
    #pad the input
    param_len=param.shape[0]
    pading_el=param_len//2
    input_padded=_1d_pad(inp,pading_el)

    #lets convolve the input and make output
    out=np.zeros(inp.shape[0])
    for i in range(out.shape[0]):
        for j in range(param_len):
            out[i]+=param[j]*input_padded[i+j]
            
    assert_same_shape(inp,out)
    return out

In [9]:
param=np.array([1,2,3])
x=_1d_conv(te,param)
print(x)
print(len(x))
print(len(te))

[ 8. 14. 20. 26. 32. 38. 44. 50. 26.]
9
9


In [10]:
def sum_conv(inp:np.ndarray,param:np.ndarray):
    out=_1d_conv(inp,param)
    sum=np.sum(out)
    return sum

In [11]:
input_1d = np.array([1,2,3,4,5])
param_1d = np.array([1,1,1])
sum_conv(input_1d,param_1d)

39.0

# backpropagation

In [12]:
input_1d = np.array([1,2,3,4,5])
param_1d_2 = np.array([2,1,1])
sum_conv(input_1d,param_1d_2) - sum_conv(input_1d,param_1d)

10.0

In [13]:
# calculating the input gradient

def param_grad(inp:np.ndarray,
               param:np.ndarray,
              output_grad:np.ndarray=None)->np.ndarray:
    #lets pad our input
    param_len=param.shape[0]
    embed_num=param_len//2
    input_pad=_1d_pad(inp,embed_num)
    
    if output_grad is None:
        output_grad=np.ones_like(inp)
    else:
        assert_same_shape(inp,output_grad)
        
    param_grad=np.zeros_like(param)
    
    for i in range(inp.shape[0]):
        for j in range(param.shape[0]):
            param_grad[j]+=input_pad[i+j]*output_grad[i]

    
    assert_same_shape(param,param_grad)
    return param_grad


In [14]:
def _input_grad_1d(inp:np.ndarray,
                  param:np.ndarray,
                  output_grad:np.ndarray=None)->np.ndarray:
    
    
    param_len=param.shape[0]
    emb_num=param_len//2
    
    if output_grad is None:
        output_grad=np.ones_like(inp)
    else:
        assert_same_shape(output_grad,inp)

    input_grad=np.zeros_like(output_grad)
    output_pad=_1d_pad(output_grad,emb_num)

    for i in range(inp.shape[0]):
        for j in range(param.shape[0]):
            input_grad[i] += output_pad[i+param_len-j-1]*param[j]
            
    assert_same_shape(input_grad,output_grad)
    return input_grad


# example

In [15]:
input_1d = np.array([10,11,12,13])
param_1d_2 = np.array([2,1])
print("input gradient is: ", _input_grad_1d(input_1d,param_1d))
#print("parameter gradient is ",param_grad(input_1d,param_1d_2))

input gradient is:  [2 3 3 2]


In [16]:
_input_grad_1d(np.array([90,23,43]),np.array([9,3,4]))

array([12, 16,  7])

# batches

In [17]:
t=np.array([[12,34,45],[90,94,99]])
t.shape

(2, 3)

In [18]:
def _pad_1d_batch(inp:np.ndarray,
                  num:int):
    outs=[_1d_pad(objs,num) for objs in inp]
    return outs

In [19]:
#compute the 1d batche
input_1d_batch = np.array([[0,1,2,3,4,5,6], 
                           [1,2,3,4,5,6,7]])

In [20]:
_pad_1d_batch(input_1d_batch,3)

[array([0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 0, 0, 0]),
 array([0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0])]

# forward 

In [21]:
def _1d_batch_conv(inp:np.ndarray,
                  param:np.ndarray)->np.ndarray:
    outs=[_1d_conv(objs,param) for objs in inp]
    return np.stack(outs)

In [22]:
params=np.array([12,23,34])
_1d_batch_conv(input_1d_batch,param_1d)

array([[ 1.,  3.,  6.,  9., 12., 15., 11.],
       [ 3.,  6.,  9., 12., 15., 18., 13.]])

# gradient

### input gradient

In [23]:
def input_grad_1d_batch(inp:np.ndarray,
              param:np.ndarray)->np.ndarray:
    out=_1d_batch_conv(inp,param)
    output_grad=np.ones_like(out)
    
    batch_size=output_grad.shape[0]
    grad=[_input_grad_1d(inp[i],param,output_grad[i]) for i in range(batch_size)]
    return np.stack(grad)

In [24]:
input_grad_1d_batch(input_1d_batch,param_1d_2)

array([[2., 3., 3., 3., 3., 3., 3.],
       [2., 3., 3., 3., 3., 3., 3.]])

### parameter gradient

In [25]:
def param_grad_1d_batch(inp:np.ndarray,
                       param:np.ndarray,
                       )->np.ndarray:
    ''' this calcualtes the parameter gradient of 1d input batch'''
    output_grad=np.ones_like(inp)
    input_pad=_pad_1d_batch(inp,1)

    param_grad=np.zeros_like(param)
    
    for i in range(inp.shape[0]):
        for j in range(inp.shape[1]):
            for k in range(param.shape[0]):
                param_grad[k]+=input_pad[i][k+j]*output_grad[i][j]
    return param_grad

In [26]:
param_grad_1d_batch(input_1d_batch,param_1d_2)

array([36, 49])

# 2D COMPUTATIONS

In [27]:
np.random.randn(1,28,28)

array([[[-2.20896567, -0.00964138, -0.50867913,  0.57006757,
         -1.60685506,  0.49517295, -0.0534404 , -0.58421673,
          2.51417974,  2.41071086, -0.53003636, -0.75183636,
         -0.82532866,  0.29222244,  0.05816759,  0.2196481 ,
          0.46517556, -1.4033292 , -0.22187631, -1.18604524,
          0.00716199, -0.52933361, -0.95646104,  2.19221808,
         -0.5740061 ,  0.95870688,  1.04538888, -0.33396164],
        [ 0.38738271,  1.72816799, -0.05012132,  0.15076129,
          1.5820012 , -0.64993349,  1.79538607, -0.11774779,
          0.9878857 , -1.80067174,  1.05004005, -2.57346564,
         -0.89130738, -0.67647186,  1.1241367 , -0.01928825,
          0.38804259, -1.67276991,  0.0542456 , -0.10847661,
          2.04692904,  0.31848372, -0.59276674,  0.33933378,
          0.15518875,  1.53750836, -0.09675682, -1.38007286],
        [-1.89436067, -1.1993474 ,  0.30647703, -0.50215363,
         -1.3328232 ,  2.17870037, -1.16345952, -1.77546397,
         -0.82699276, 

In [28]:
imgs_2d_batch = np.random.randn(3, 28, 28)
param_2d = np.random.randn(3, 3)

In [29]:
def _pad_2d_obj(inp:np.ndarray,
               num:int)->np.ndarray:
    ''' this paddes the 2d inputs that came from 3d'''
    input_pad=_pad_1d_batch(inp,num)
    zeros=np.zeros((num,inp.shape[0]+num*2))
    return np.concatenate([zeros,input_pad,zeros])

In [30]:
def _pad_2d(inp:np.ndarray,
           num:int)->np.ndarray:
    ''' the input is 3d'''
    outs=[_pad_2d_obj(obj,num) for obj in inp]
    return np.stack(outs)

In [31]:
_pad_2d(imgs_2d_batch,1)

array([[[ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        , -0.35019564, -0.71613171, ..., -1.02727293,
         -0.9303927 ,  0.        ],
        [ 0.        , -0.86897765, -0.60739455, ...,  1.40532534,
         -1.63100755,  0.        ],
        ...,
        [ 0.        ,  1.62972478, -2.20169887, ..., -0.89887595,
         -0.09243871,  0.        ],
        [ 0.        ,  1.01666333, -0.24836335, ...,  1.02709533,
          0.84572472,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ]],

       [[ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        , -1.99411222, -0.8872034 , ...,  0.97000352,
         -0.25159189,  0.        ],
        [ 0.        ,  1.23420701, -0.52190442, ..., -0.60902808,
         -0.08239903,  0.        ],
        ...,
        [ 0.        , -1.01565345, -0.15391019, ..., -

# compute output

In [32]:
def _compute_output_obs_2d(obj:np.ndarray,
                           param:np.ndarray)->np.ndarray:
    ''' this computes the convolution of the input and the filter'''
    
    pad_len=param.shape[0]//2
    paded_input=_pad_2d_obj(obj,pad_len)
    out=np.zeros_like(obj)
    for o_w in range(out.shape[0]):
        for o_h in range(out.shape[1]):
            for p_w in range(param.shape[0]):
                for p_h in range(param.shape[1]):
                    out[o_w][o_h]+=param[p_w][p_h]*paded_input[o_w+p_w][o_h+p_h]
    return out

In [33]:
def _compute_output_2d(inp:np.ndarray,
                       param:np.ndarray)->np.ndarray:
    ''' takes batch input'''
    same_dim(inp,3)
    outs=[_compute_output_obs_2d(obj,param) for obj in inp]
    return np.stack(outs)

In [34]:
_compute_output_2d(imgs_2d_batch,param_2d).shape

(3, 28, 28)

# gradient

In [35]:
def _comput_grads_obs_2d(input_obs:np.ndarray,
                         output_grad:np.ndarray,
                         param:np.ndarray)->np.ndarray:
    
    param_size=param.shape[0]
    output_pad=_pad_2d_obj(output_grad,param_size//2)
    input_grad=np.zeros_like(input_obs)

    for i_w in range(input_obs.shape[0]):
        for i_h in range(input_obs.shape[1]):
            for p_w in range(param_size):
                for p_h in range(param_size):
                    input_grad[i_w][i_h]+=output_pad[i_w+param_size-p_w-1][i_h+param_size-p_h-1]*param[p_w][p_h]
    return input_grad


def _comput_grads_2d(inp:np.ndarray,
                    output_grad:np.ndarray,
                    param:np.ndarray
                   )->np.ndarray:
    grads=[_comput_grads_obs_2d(inp[i],output_grad[i],param) for i in range(output_grad.shape[0])]
    return np.stack(grads)


def param_grad(inp:np.ndarray,
              output_grad:np.ndarray,
              param:np.ndarray)->np.ndarray:
    
    ''' calculates the parameter gradient'''
    
    param_size=inp.shape[0]
    inp_pad=_pad_2d_obj(inp,param_size//2)
    param_grad=np.zeros_like(param)
    img_size=output_grad.shape[1:]
    
    for i in range(inp.shape[0]):
        for i_w in range(img_size[0]):
            for i_h in range(img_size[1]):
                for p_w in range(param_size):
                    for p_h in range(param_size):
                        param_grad[p_w][p_h]+=inp_pad[i][i_w+p_w][i_h+p_h]*output_grad[i][i_w][i_h]
    return param_grad



def _param_grad_2d(inp: ndarray,
                   output_grad: ndarray, 
                   param: ndarray) -> ndarray:

    param_size = param.shape[0]
    inp_pad = _pad_2d(inp, param_size // 2)

    param_grad = np.zeros_like(param)
    img_shape = output_grad.shape[1:]
    
    for i in range(inp.shape[0]):
        for o_w in range(img_shape[0]):
            for o_h in range(img_shape[1]):
                for p_w in range(param_size):
                    for p_h in range(param_size):
                        param_grad[p_w][p_h] += inp_pad[i][o_w+p_w][o_h+p_h] \
                        * output_grad[i][o_w][o_h]
    return param_grad

In [36]:
imgs_2d_batch.shape

(3, 28, 28)

In [37]:
img_grads=_comput_grads_2d(
                                imgs_2d_batch,
                                np.ones_like(imgs_2d_batch),
                                param_2d
)
img_param_grad=_param_grad_2d(imgs_2d_batch,
                             np.ones_like(imgs_2d_batch),
                             param_2d)

In [38]:
img_grads.shape,img_param_grad.shape

((3, 28, 28), (3, 3))

# with channels + 2D 

In [40]:
def _pad_2d_channels(inp:np.ndarray,
                    num:int)->np.ndarray:
    '''input has dimension [num_channels,image_width,image_height]'''
    return np.stack([_pad_2d_obj(channel,num) for channel in inp])
    
def _pad_conv_input(inp:np.ndarray,
                   num:np.ndarray)->np.ndarray:
    '''the input to these function is [num_batches,num_channel,image_width,image_height'''
    return np.stack([_pad_2d_channels(objs,num) for objs in inp])

# forward 

In [105]:
def _compute_output_obs(obs:np.ndarray,
                       param:np.ndarray)->np.ndarray:
    
    '''obs:the input/obs shape is num_channe,img_width,img_height
    
     param: input_channel,output_channel,filter_width,filter_height
     
     '''
    
    same_dim(obs,3)
    same_dim(param,4)
    
    param_size=param.shape[2]
    pad_num=param_size//2
    
    padded_obs=_pad_2d_channels(obs,pad_num)
    img_size=obs.shape[1]

    input_channel=param.shape[0]
    output_channel=param.shape[1]
    
    out=np.zeros((output_channel,)+obs.shape[1:])  #out output channel + width and height of th image
    
    for c_i in range(input_channel):
        for c_o in range(output_channel):
            for i_w in range(img_size):   #here we are setting the value to img_size we incinuating that the image is square....i think
                for i_h in range(img_size):#here we are setting the value to img_size we incinuating that the image is square....i think
                    for p_w in range(param_size):    
                        for p_h in range(param_size):
                            out[c_o][i_w][i_h] += param[c_i][c_o][p_w][p_h] * padded_obs[c_i][i_w+p_w][i_h+p_h]
    return out


def _output(inp:np.ndarray,
            param:np.ndarray)->np.ndarray:
    
    ''' here the input has the shape 
        inp:[batch size,channels,img_width,img_height]
        param:[input_channels,output_channels,filter width,filter height]
        '''
    out=[_compute_output_obs(obs,param) for obs in inp]
    return np.stack(out)

# backward 

In [122]:
def _compute_gradient_obs_channel(inp_obs:np.ndarray,
                                  output_grad_obs:np.ndarray,
                                  param:np.ndarray,
                                 )->np.ndarray:
    ''' the inp size is:
    
    inp:[input_channel,image_width,image_height]
    
    output_grad:[output_channel,image_width,image_height]
    
    param:[input channel,output channel,im_width,img_height]
    '''
    param_size=param.shape[2]
    paded_output_grads=_pad_2d_channels(output_grad_obs,param_size//2)
    input_grad=np.zeros_like(inp_obs)
    
    img_size=inp_obs.shape[1]
    in_chan=inp_obs.shape[0]
    out_chan=param.shape[1]
    
    for c_in in range(in_chan):
        for c_out in range(out_chan):
            for i_w in range(inp_obs.shape[1]):
                for i_h in range(inp_obs.shape[2]):
                    for p_w in range(param_size):  # we can also type the input width and input height here
                        for p_h in range(param_size):#we can also type the input width and input height here
                            input_grad[c_in][i_w][i_h]+=paded_output_grads[c_out][i_w+param_size-p_w-1][i_h+param_size-p_h-1]*param[c_in][c_out][p_w][p_h]
    return input_grad

def input_grad(inp:np.ndarray,
               output_grad:np.ndarray,
               param:np.ndarray)->np.ndarray:
    
    grads=[_compute_gradient_obs_channel(inp[i],output_grad[i],param) for i in range(output_grad.shape[0])]
    return np.stack(grads)

In [133]:
def param_grad(inp:np.ndarray,
              output_grad:np.ndarray,
              param:np.ndarray)->np.ndarray:
    
    ''' the inp size is:
    inp:[batch_size,input_channel,image_width,image_height]
    
    output_grad:[batch_size,output_channel,image_width,image_height]
    param:[input channel,output channel,im_width,img_height]'''
    
    param_grad=np.zeros_like(param)
    param_size=param.shape[2]

    in_channel=inp.shape[1]
    out_channel=output_grad.shape[1]
    paded_inp=_pad_conv_input(inp,param_size//2)
    img_shape=output_grad.shape[2:]  #this could be inp[1:]

    for i in range(inp.shape[0]):
        for c_in in range(in_channel):
            for c_out in range(out_channel):
                for o_w in range(img_shape[0]):
                    for o_h in range(img_shape[1]):
                        for p_w in range(param_size):
                            for p_h in range(param_size):
                                param_grad[c_in][c_out][p_w][p_h]+=paded_inp[i][c_in][o_w+p_w][o_h+p_h]*output_grad[i][c_out][o_w][o_h]
                                
    return param_grad

In [84]:
# this is for the forward
def _compute_output_sum(imgs:np.ndarray,param:np.ndarray):
    return _output(imgs,param).sum()

# testing gradient

In [85]:
cifar_imgs = np.random.randn(10, 3, 32, 32)
cifar_param = np.random.randn(3, 16, 5, 5)

## input gradient

In [89]:
cifar_imgs_2=cifar_imgs.copy()
cifar_imgs_2[3][1][2][19]+=1

In [106]:
_compute_output_sum(cifar_imgs_2,cifar_param) - _compute_output_sum(cifar_imgs,cifar_param)

14.349635131151445

In [123]:
input_grad(cifar_imgs,
            np.ones((10, 16, 32, 32)),
            cifar_param)[3][1][2][19]

14.349635131150292

# param gradient

In [124]:
cifar_param_2 = cifar_param.copy()
cifar_param_2[0][8][0][2] += 1

In [125]:
_compute_output_sum(cifar_imgs_2,cifar_param) - _compute_output_sum(cifar_imgs,cifar_param)

14.349635131151445

In [134]:
param_grad(cifar_imgs,
            np.ones((10, 16, 32, 32)),
            cifar_param)[0][8][0][2]

-154.5101645255271