In [1]:
import copy, os, gc, tqdm
import numpy as np
import pickle as pkl
from time import time, sleep

from IPython import display
from collections import Counter
from itertools import product, chain, combinations


#matplotlib
import matplotlib
import matplotlib as mp
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.collections import PolyCollection
from matplotlib.colors import colorConverter
%matplotlib inline

#https://matplotlib.org/users/customizing.html
matplotlib.rcParams['legend.markerscale'] = 1.5     # the relative size of legend markers vs. original
matplotlib.rcParams['legend.handletextpad'] = 0.01
matplotlib.rcParams['legend.labelspacing'] = 0.4    # the vertical space between the legend entries in fraction of fontsize
matplotlib.rcParams['legend.borderpad'] = 0.5       # border whitespace in fontsize units
matplotlib.rcParams['font.size'] = 14
matplotlib.rcParams['font.family'] = 'serif'
matplotlib.rcParams['font.serif'] = 'Times New Roman'
matplotlib.rcParams['axes.labelsize'] = 20
matplotlib.rcParams['axes.titlesize'] = 20
# Configuring latex fonts
matplotlib.rc('font', **{'family':'serif'})
matplotlib.rc('text', usetex=True)
matplotlib.rc('text.latex', unicode=True)
matplotlib.rc('text.latex', preamble=r'\usepackage[utf8]{inputenc}')
matplotlib.rc('text.latex', preamble=r'\usepackage[english]{babel}')
matplotlib.rc('text.latex', preamble=r'\usepackage{amsmath}')


import sys
library_path = '../../'
if library_path not in sys.path:
    sys.path.append(library_path)

CIFAR10_DIR = library_path + 'datasets/CIFAR10'
MNIST_DIR   = library_path + 'datasets/MNIST'

from ml.utils import get_CIFAR10_data, get_MNIST_data
from ml.neural_network import Layer, Sequential, Model, Solver
from ml.neural_network.layers import Dense, Dropout, SoftMax, BatchNormalization
from ml.neural_network.nonlinearities import Tanh, ReLU, LeakyReLU, ELU, SoftPlus
from ml.neural_network.criterions import MSECriterion, MulticlassLogLoss
from ml.neural_network.regularizers import *
from ml.neural_network.initializers import *
from ml.neural_network.optimizers import *
from ml.neural_network.decorators import *
from ml.neural_network.gradients import *

In [2]:
def plot_solver_history(history, axarr=None, figsize=(10, 14), colors=None, markers=None):
    if axarr is None:
        _, axarr = plt.subplots(3, 1, figsize=figsize)
    plot_kwargs = {'markeredgecolor': 'k', 'ms': 5, 'alpha': 0.7}
    if colors is None: 
        colors = {'train': 'r', 'val': 'b'}
    if markers is None:
        markers = {'train': 'o', 'val': '^'}
        
    ax = axarr[0]  
    ax.plot(history['loss_history'], color=colors['train'])
    ax.set_yscale('log')
    ax.set_xlabel('iteration')
    ax.set_ylabel('loss')
    ax.set_title('Training loss vs iteration')
    ax.grid(linestyle='--', alpha=0.5)

    ax = axarr[1]
    for label in ['train', 'val']:
        data = history['{}_loss_history'.format(label)]
        ax.plot(data, marker=markers[label], color=colors[label], label=label, **plot_kwargs)
    ax.set_yscale('log')
    ax.set_xlabel('epoch')
    ax.set_ylabel('loss')
    ax.set_title('Loss vs epoch')
    ax.grid(linestyle='--', alpha=0.5)
    ax.legend(loc='upper right', fontsize=16)

    ax = axarr[2]
    for label in ['train', 'val']:
        data = history['{}_acc_history'.format(label)]
        ax.plot(data, marker=markers[label], color=colors[label], label=label, **plot_kwargs)
    ax.set_xlabel('epoch')
    ax.set_ylabel('training loss')
    ax.set_title('Accuracy vs epoch')
    ax.grid(linestyle='--', alpha=0.5)
    ax.legend(loc='lower right', fontsize=16)
    plt.tight_layout()
    plt.show()
    return axarr

<a id='toc'></a>
# Table of Contents
* [2. Convolutional Networks](#cnn)
    * [2.1 Convolution](#cnn_conv)
        * [2.1.1 Convolution: Naive forward pass](#cnn_conv_forw)
        * [2.1.2 Aside: Image processing via convolutions](#cnn_conv_aside)
        * [2.1.3 Convolution: Naive backward pass](#cnn_conv_back)
    * [2.2 Max pooling](#max_pooling)
        * [2.2.1 Max pooling: Naive forward](#cnn_max_pooling_forw)
        * [2.2.2 Max pooling: Naive backward](#cnn_max_pooling_back)
    * [2.3 Fast layers](#cnn_fast)
    * [2.4 Convolutional "sandwich" layers](#cnn_sand)
    * [2.5 Spatial Batch Normalization](#cnn_spatial)
        * [2.5.1 Spatial batch normalization: forward](#cnn_spatial_forw)
        * [2.5.2 Spatial batch normalization: backward](#cnn_spatial_back)
    * [2.6 Three Layer ConvNet](#cnn_three)
        * [2.6.1 Sanity check loss](#cnn_three_sanity)
        * [2.6.2 Gradient check](#cnn_three_grad)
        * [2.6.3 Overfit small data](#cnn_three_overfit)
        * [2.6.4 Train net](#cnn_three_train)
        * [2.6.5 Visualize filters](#cnn_three_visualize)

<a id='cnn'></a>
# X. Convolutional Networks  [[toc](#toc)]
So far we have worked with deep fully-connected networks, using them to explore different optimization strategies and network architectures. Fully-connected networks are a good testbed for experimentation because they are very computationally efficient, but in practice all state-of-the-art results use convolutional networks instead.

First you will implement several layer types that are used in convolutional networks. You will then use these layers to train a convolutional network on the CIFAR-10 dataset.

<a id='cnn_conv'></a>
## X.X Convolution [[toc](#toc)]

In [8]:
dense = Dense(3)
print(dense.initialize({'input_shape': (10, 2)}))
print(dense)
print(dense.W)
print(dense.grad_W)
print(dense.b)
print(dense.grad_b)

grad_checker = GradientsChecker()
grad_checker.eval_gradients(dense)

{'seed': 1, 'grad_clip': inf, 'names': {'Dense': 1}, 'debug': False, 'dtype': <class 'numpy.float64'>, 'input_shape': (10, 3)}
Dense(2->3)
[[ 0.44112583 -0.3463194  -0.01227563]
 [ 0.71643002  0.50088484 -1.14158192]]
[[ 0.  0.  0.]
 [ 0.  0.  0.]]
[ 0.  0.  0.]
[ 0.  0.  0.]
grad_Dense0:W error = 3.612133055137527e-12
grad_Dense0:b error = 3.0713911555640427e-12
grad_X error = 3.545303923963912e-10


In [55]:
def find_1d_padding(array_size, filter_size, stride):
    """
    This function finds padding for 1D arrays.
    
    Inputs:
    - array_size:   size of the provided array
    - filter_size:  size of filter
    - stride:       stride used for convolution (cross correlation)
    
    Returns a tuple of:
    - padding (left_pad, right_pad): tuple of ints
        - left_pad:  left padding for the array
        - right_pad: right padding for the array
    - output_size: int; the output size of convolution in case of the found padding equals
            (array_size + left_pad + right_pad - filter_size) / stride + 1
    """
    assert array_size > 0
    assert filter_size > 0
    assert stride > 0
    
    # It is possible that array_size < filter_size. In that case array first needs to be padded
    init_left_pad = init_right_pad = 0
    if filter_size > array_size:
        dif = filter_size - array_size
        init_left_pad  = (dif + 1) // 2
        init_right_pad = dif - init_left_pad
        assert init_left_pad >= init_right_pad
        array_size += init_left_pad + init_right_pad

    for left_pad in range(filter_size):
        for right_pad in range(left_pad + 1):
            if check_padding(array_size, filter_size, stride, (left_pad, right_pad)):
                output_size = 1 + (array_size + left_pad + right_pad - filter_size) // stride
                return (init_left_pad + left_pad, init_right_pad + right_pad), output_size
    assert False, 'Appropriate padding not found'


def find_2d_padding(image_size, filter_size, stride):
    """
    This function finds padding for 2D arrays. For that it finds padding along height and width.
    
    Inputs:
    - image_size:   tuple (H, W) of image height and width
    - filter_size:  tuple (HH, WW) of filter height and width
    - stride:       int, equals for both height and width
    
    Returns a tuple of:
    - (H_pad, W_pad): tuple of ints
        - H_pad is a tuple (upp_pad, low_pad) for height paddding
        - W_pad is a tuple (left_pad, right_pad) for width padding
    - (H_output, W_output): tuple of ints
        - H_output is the output height of convolution
        - W_output is the output width of convolution
    """
    if isinstance(image_size, numbers.Number):
        image_size = (image_size, image_size)
    if isinstance(filter_size, numbers.Number):
        filter_size = (filter_size, filter_size)
    if isinstance(stride, numbers.Number):
        stride = (stride, stride)
    input_h, input_w   = image_size
    filter_h, filter_w = filter_size
    stride_h, stride_w = stride
    pad_u_pad_l, output_h = find_1d_padding(input_h, filter_h, stride_h)
    pad_l_pad_r, output_w = find_1d_padding(input_w, filter_w, stride_w)
    return (pad_u_pad_l, pad_l_pad_r), (output_h, output_w)

def parse_padding(padding):
    """Accepts a sequence of padding values and turns them into standard format"""
    if isinstance(padding, int):
        padding = [padding]
    elif isinstance(padding, tuple):
        assert len(padding) == 2
        padding = [padding]
    assert isinstance(padding, list), 'Padding must be provided in a list.'
    parsed = []
    for value in padding:
        if isinstance(value, int):
            parsed.append((value, value))
        elif isinstance(value, tuple):
            assert len(value) == 2
            parsed.append(value)
        else:
            assert False, 'Unknown padding type "{}"'.format(type(value).__name__)
    return parsed
  
def check_padding(array_size, filter_size, stride, pad):
    """
    This function checks that given array padding is correct.
    
    Inputs:
    - array_size: int; array size
    - filter_size: int; size of filter
    - stride: int; stride of convolution (cross-correlation)
    - pad: int or tuple of 2 ints; int the first case left and right padding are the same, in the second case
        they equal the values in the tuple respectively.
        
    Returns True if padding is correct. Otherwise returns False.
    """
    pad = parse_padding(pad)
    assert len(pad) == 1
    left_pad, right_pad = pad[0]
    if (array_size + left_pad + right_pad - filter_size) % stride == 0:
        return True
    return False

In [57]:
print(find_1d_padding(5, 3, 2))
print(find_2d_padding((5, 5), (2, 2), (2, 2)))

((0, 0), 2)
(((1, 0), (1, 0)), (3, 3))


In [59]:
import numbers

class Convolution(Layer):
    def __init__(self, n_filters, filter_size=(3, 3), stride=(1, 1), padding=None, use_bias=True,  
                 W_init=None, b_init=None, W_reg=None, b_reg=None, name=None):
        super().__init__(name=name)
        assert isinstance(n_filters, numbers.Number)
        self.n_filters = n_filters
        if isinstance(filter_size, numbers.Number): 
            filter_size = (filter_size, filter_size)
        if isinstance(stride, number.Number): 
            stride = (stride, stride)
        self.filter_size = filter_size
        self.stride = stride
        self.padding = padding
        self.W_init = W_init
        self.b_init = b_init
        self.W_reg = W_reg
        self.b_reg = b_reg
        
    def _initialize_seed(self, params):
        self.seed = params.setdefault('seed', 0)
        self.generator = np.random.RandomState(self.seed)
        params['seed'] += 1
        return params
    
    def _initialize_input_shape(self, params):
        assert 'input_shape' in params
        input_shape = params['input_shape']
        assert len(input_shape) == 4
        self.input_shape = input_shape
    
    def _initialize_params(self, params):
        self._initialize_W(self, params)
        self._initialize_b(self, params)
        self._initialize_padding(self, params) # Это костыль
        return params
    def _initialize_W(self, params):
        n_samples, n_channels, input_h, input_w = self.input_shape
        filter_h, filter_w = self.filter_size
        W_shape = (self.n_filters, n_channels, filter_h, filter_w)
        self.W_initializer = get_kernel_initializer(init=self.W_init, generator=self.generator, dtype=self.dtype)
        self.W = self.W_initializer(W_shape)
        return params
    def _initialize_b(self, params):
        self.b_initializer = get_bias_initializer(init=self.b_init, dtype=self.dtype)
        self.b = self.b_initializer((self.n_filters,))
        return params
    def _initialize_padding(self, params):
        _, _, input_h, input_w = self.input_shape
        stride_h, stride_w = self.stride
        filter_h, filter_w = self.filter_size
        if self.padding is None:
            self.padding, _ = find_2d_padding((input_h, input_w), self.filter_size, self.stride)
        (upp_pad, low_pad), (left_pad, right_pad) = self.padding
        assert (input_h + upp_pad + low_pad - filter_h) % stride_h == 0
        assert (input_w + left_pad + right_pad - filter_w) % stride_w == 0
        return params

    def _initialize_output_shape(self, params):
        n_samples, n_channels, input_h, input_w = self.input_shape
        stride_h, stride_w = self.stride
        filter_h, filter_w = self.filter_size
        (upp_pad, low_pad), (left_pad, right_pad) = self.padding
        output_h = (input_h + upp_pad + low_pad - filter_h) // stride_h + 1
        output_w = (input_w + left_pad + right_pad - filter_w) // stride_w + 1 
        self.output_shape = (n_samples, self.n_filters, output_h, output_w)
        return params
    
    def update_output(self, input):
        pass
    def update_grad_input(self, input, grad_output):
        pass
    def update_grad_params(self, input, grad_output):
        pass

NameError: name 'Layer' is not defined

<a id='cnn_conv_forw'></a>
### X.X.X Convolution: forward pass  [[toc](#toc)]
The core of a convolutional network is the convolution operation. In the file `ml/neural_network/cs231n/second/conv_layers_naive.py`, implement the forward pass for the convolution layer in the function `conv_forward_naive`. 

You don't have to worry too much about efficiency at this point; just write the code in whatever way you find most clear.

You can test your implementation by running the following:

In [None]:
x_shape = (2, 3, 4, 4)
w_shape = (3, 3, 4, 4)
x = np.linspace(-0.1, 0.5, num=np.prod(x_shape)).reshape(x_shape)
w = np.linspace(-0.2, 0.3, num=np.prod(w_shape)).reshape(w_shape)
b = np.linspace(-0.1, 0.2, num=3)

conv_param = {'stride': 2, 'pad': 1}
out, _ = conv_forward(x, w, b, conv_param, method='2d')
out, _ = conv_forward_naive(x, w, b, conv_param)
correct_out = np.array([[[[-0.08759809, -0.10987781],
                           [-0.18387192, -0.2109216 ]],
                          [[ 0.21027089,  0.21661097],
                           [ 0.22847626,  0.23004637]],
                          [[ 0.50813986,  0.54309974],
                           [ 0.64082444,  0.67101435]]],
                         [[[-0.98053589, -1.03143541],
                           [-1.19128892, -1.24695841]],
                          [[ 0.69108355,  0.66880383],
                           [ 0.59480972,  0.56776003]],
                          [[ 2.36270298,  2.36904306],
                           [ 2.38090835,  2.38247847]]]])

# Compare your output to ours; difference should be around 2e-8
print('Testing conv_forward_naive')
print('difference: ', rel_error(out, correct_out))

<a id='cnn_conv_aside'></a>
### 2.1.2 Aside: Image processing via convolutions [[toc](#toc)]
As fun way to both check your implementation and gain a better understanding of the type of operation that convolutional layers can perform, we will set up an input containing two images and manually set up filters that perform common image processing operations (grayscale conversion and edge detection). The convolution forward pass will apply these operations to each of the input images. We can then visualize the results as a sanity check. 

In [None]:
from scipy.misc import imread, imresize

kitten, puppy = imread('kitten.jpg'), imread('puppy.jpg')
# kitten is wide, and puppy is already square
d = kitten.shape[1] - kitten.shape[0]
kitten_cropped = kitten[:, d//2:-d//2, :]

img_size = 200   # Make this smaller if it runs too slow
x = np.zeros((2, 3, img_size, img_size))
x[0, :, :, :] = imresize(puppy, (img_size, img_size)).transpose((2, 0, 1))
x[1, :, :, :] = imresize(kitten_cropped, (img_size, img_size)).transpose((2, 0, 1))

# Set up a convolutional weights holding 2 filters, each 3x3
w = np.zeros((2, 3, 3, 3))

# The first filter converts the image to grayscale.
# Set up the red, green, and blue channels of the filter.
w[0, 0, :, :] = [[0, 0, 0], [0, 0.3, 0], [0, 0, 0]]
w[0, 1, :, :] = [[0, 0, 0], [0, 0.6, 0], [0, 0, 0]]
w[0, 2, :, :] = [[0, 0, 0], [0, 0.1, 0], [0, 0, 0]]

# Second filter detects horizontal edges in the blue channel.
w[1, 2, :, :] = [[1, 2, 1], [0, 0, 0], [-1, -2, -1]]

# Vector of biases. We don't need any bias for the grayscale
# filter, but for the edge detection filter we want to add 128
# to each output so that nothing is negative.
b = np.array([0, 128])

# Compute the result of convolving each input in x with each filter in w,
# offsetting by b, and storing the results in out.
out, _ = conv_forward_naive(x, w, b, {'stride': 1, 'pad': 1})

def imshow_noax(img, normalize=True):
    """ Tiny helper to show images as uint8 and remove axis labels """
    if normalize:
        img_max, img_min = np.max(img), np.min(img)
        img = 255.0 * (img - img_min) / (img_max - img_min)
    plt.imshow(img.astype('uint8'))
    plt.gca().axis('off')

# Show the original images and the results of the conv operation
plt.subplot(2, 3, 1)
imshow_noax(puppy, normalize=False)
plt.title('Original image')
plt.subplot(2, 3, 2)
imshow_noax(out[0, 0])
plt.title('Grayscale')
plt.subplot(2, 3, 3)
imshow_noax(out[0, 1])
plt.title('Edges')
plt.subplot(2, 3, 4)
imshow_noax(kitten_cropped, normalize=False)
plt.subplot(2, 3, 5)
imshow_noax(out[1, 0])
plt.subplot(2, 3, 6)
imshow_noax(out[1, 1])
plt.show()

In [None]:
<a id='cnn_conv_back'></a>
### 2.1.3 Convolution: Naive backward pass [[toc](#toc)]
Implement the backward pass for the convolution operation in the function `conv_backward_naive` in the file `cs231n/layers.py`. Again, you don't need to worry too much about computational efficiency.

When you are done, run the following to check your backward pass with a numeric gradient check.

In [None]:
np.random.seed(231)
x = np.random.randn(4, 3, 5, 5)
w = np.random.randn(2, 3, 3, 3)
b = np.random.randn(2,)
dout = np.random.randn(4, 2, 5, 5)
conv_param = {'stride': 1, 'pad': 1}

dx_num = eval_numerical_gradient_array(lambda x: conv_forward_naive(x, w, b, conv_param)[0], x, dout)
dw_num = eval_numerical_gradient_array(lambda w: conv_forward_naive(x, w, b, conv_param)[0], w, dout)
db_num = eval_numerical_gradient_array(lambda b: conv_forward_naive(x, w, b, conv_param)[0], b, dout)

out, cache = conv_forward_naive(x, w, b, conv_param)
dx, dw, db = conv_backward_naive(dout, cache)

# Your errors should be around 1e-8'
print('Testing conv_backward_naive function')
print('dx error: ', rel_error(dx, dx_num))
print('dw error: ', rel_error(dw, dw_num))
print('db error: ', rel_error(db, db_num))