In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

<b> Загрузка и предобработка исходных данных

In [None]:
data = pd.read_csv("../../Практические материалы/Lab 1. Linear and logistic regression/mnist/mnist_train.csv")
data_test = pd.read_csv("../../Практические материалы/Lab 1. Linear and logistic regression/mnist/mnist_test.csv")

In [None]:
data.head()

In [None]:
X = data.iloc[:,1:].values
X_test = data_test.iloc[:,1:].values

In [None]:
mean = np.mean(X)
std = np.std(X)

X = (X - mean)/std
X_test = (X_test - mean)/std

In [None]:
m = len(X)
n = 10

Y = np.zeros([m,n])

for i in range(m):
    Y[i,data.label.values[i]] = 1

In [None]:
Y

In [None]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [None]:
def tanh(z):
    return (np.exp(z)-np.exp(-z))/(np.exp(z)+np.exp(-z))

In [None]:
def relu(z):
    return np.max(0,z)

In [None]:
def softmax(z):
    return np.exp(z)/np.sum(np.exp(z), axis = 1).reshape([len(z),1])

In [None]:
def conv(z, filters, padding = 1):
    height = z.shape[0]
    width = z.shape[1]
    depth = z.shape[2]
    samples = z.shape[3]
    n = filters.shape[0]
    m = filters.shape[1]
    l = filters.shape[2]
    n_filters = filters.shape[3]
    
    z_pad = np.zeros([height+padding*2,width+padding*2,depth,samples])
    if padding > 0:
        z_pad[padding:-padding,padding:-padding,:,:] = z[:,:,:,:]
    else:
        z_pad = z
    
    output_h = height + padding * 2 - (n - 1)
    output_w = width + padding * 2 - (n - 1)
    result = np.zeros([output_h,output_w,n_filters,samples])
    
    for s in range(samples):
        for k in range(n_filters):
            for i in range(output_h):
                for j in range(output_w):
                    result[i,j,k,s] = np.sum(z_pad[i:i+n,j:j+m,0:l,s]*filters[:,:,:,k])

    return result

In [None]:
def max_pool(z, pool_size):
    height = z.shape[0]
    width = z.shape[1]
    depth = z.shape[2]
    samples = z.shape[3]
    
    h2 = (pool_size - height%pool_size)%pool_size
    w2 = (pool_size - width%pool_size)%pool_size
        
    z_pad = np.zeros([height+h2,width+w2,depth,samples])
    z_pad[:height,:width,:,:] = z

    result = np.zeros([(height+h2)//pool_size,(width+w2)//pool_size,depth,samples])
        
    for s in range(samples):    
        for k in range(depth):
            for i in range(0,height+h2,pool_size):
                for j in range(0,width+w2,pool_size):
                    tmp = z_pad[i:i+pool_size,j:j+pool_size,k,s]
                    result[i//pool_size,j//pool_size,k,s] = np.max(tmp)
                    
    return result

In [None]:
def avg_pool(z, pool_size):
    height = z.shape[0]
    width = z.shape[1]
    depth = z.shape[2]
    samples = z.shape[3]
    
    h2 = (pool_size - height%pool_size)%pool_size
    w2 = (pool_size - width%pool_size)%pool_size
        
    z_pad = np.zeros([height+h2,width+w2,depth,samples])
    z_pad[:height,:width,:,:] = z[:,:,:,:]

    result = np.zeros([(height+h2)//pool_size,(width+w2)//pool_size,depth,samples])
    
    for s in range(samples):
        for k in range(depth):
            for i in range(0,height+h2,pool_size):
                for j in range(0,width+w2,pool_size):
                    tmp = z_pad[i:i+pool_size,j:j+pool_size,k,s]
                    result[i//pool_size,j//pool_size,k,s] = np.mean(tmp)
                    
    return result

In [None]:
def flatten(z):
    dim0 = z.shape[0]
    dim1 = z.shape[1]
    dim2 = z.shape[2]
    dim3 = z.shape[3]
    
    result = np.zeros([dim3,dim0*dim1*dim2])
    
    for s in range(dim3):
        result[s,:] = z[:,:,:,s].flatten()
    
    return result

In [None]:
def d_conv(filters,f_prev,sigma,padding = 1):
    d_filters = np.zeros_like(filters)
    height = sigma.shape[0]
    width = sigma.shape[1]
    depth = sigma.shape[2]
    samples = sigma.shape[3]
    n = filters.shape[0]
    m = filters.shape[1]
    l = filters.shape[2]
    n_filters = filters.shape[3]
    
    f_prev_pad = np.zeros([f_prev.shape[0]+padding*2,f_prev.shape[1]+padding*2,f_prev.shape[2],f_prev.shape[3]])
    
    if padding > 0:
        f_prev_pad[padding:-padding,padding:-padding,:,:] = f_prev[:,:,:,:]
    else:
        f_prev_pad = f_prev
    
    #print(X)
    sigma_new = np.zeros_like(f_prev_pad)
    
    for s in range(samples):
        for k in range(n_filters):
            for i in range(height):
                for j in range(width):
                    d_filters[:,:,:,k] += f_prev_pad[i:i+n,j:j+m,0:l,s]*sigma[i,j,k,s]
                    sigma_new[i:i+n,j:j+m,0:l,s] += filters[:,:,:,k]*sigma[i,j,k,s]
                
    if padding > 0:
        sigma_new = sigma_new[padding:-padding,padding:-padding,:,:]
                
    return d_filters,sigma_new

In [None]:
def d_flat(z,size):
    dim0 = size[0]
    dim1 = size[1]
    dim2 = size[2]
    dim3 = size[3]
    
    result = np.zeros(size)
    
    for s in range(dim3):
        result[:,:,:,s] = z[s,:].reshape([dim0,dim1,dim2])
    
    return result

In [None]:
def d_max_pool(sigma,f_prev,pool_size):
    result = np.zeros([sigma.shape[0]*pool_size,sigma.shape[1]*pool_size,sigma.shape[2],sigma.shape[3]])

    for s in range(sigma.shape[3]):
        for k in range(sigma.shape[2]):
            for i in range(0,sigma.shape[0]*pool_size,pool_size):
                for j in range(0,sigma.shape[1]*pool_size,pool_size):
                    tmp = f_prev[i:i+pool_size,j:j+pool_size,k,s]
                    max_ind = np.argmax(tmp.flatten())
                    
                    result[i+max_ind//pool_size,j+max_ind%pool_size,k,s] = sigma[i//pool_size,j//pool_size,k,s]

    return result

In [None]:
def d_avg_pool(sigma,f_prev,pool_size):
    result = np.zeros([sigma.shape[0]*pool_size,sigma.shape[1]*pool_size,sigma.shape[2],sigma.shape[3]])

    for s in range(sigma.shape[3]):
        for k in range(sigma.shape[2]):
            for i in range(0,sigma.shape[0]*pool_size,pool_size):
                for j in range(0,sigma.shape[1]*pool_size,pool_size):
                    result[i:i+pool_size,j:j+pool_size,k,s] = sigma[i//pool_size,j//pool_size,k,s]

    return result/pool_size**2

In [None]:
def d_softmax(z,sigma):
    d_softmax = np.zeros([z.shape[1],z.shape[1],z.shape[0]])
    
    for k in range(z.shape[0]):
        for i in range(d_softmax.shape[0]):
            for j in range(d_softmax.shape[1]):
                if i == j:
                    d_softmax[i,j,k] += z[k,i]*(1-z[k,j])
                else:
                    d_softmax[i,j,k] += -z[k,i]*z[k,j]
    
    sigma_new = np.zeros_like(sigma)
    for k in range(z.shape[0]):
        sigma_new[k,:] = np.matmul(sigma[k:k+1,:],d_softmax[:,:,k]).flatten()
    
    return sigma_new

In [None]:
def forward(X,filters,W,B,pool_size):
    F0 = conv(X,filters[0],padding=2)
    F1 = tanh(F0)
    F2 = avg_pool(F1,pool_size)
    F3 = conv(F2,filters[1],padding=0)
    F4 = tanh(F3)
    F5 = avg_pool(F4,pool_size)
    F6 = conv(F5,filters[2],padding=0)
    F7 = tanh(F6)
    F8 = flatten(F7)
    F9 = tanh(np.matmul(F8,W[0]) + B[0])
    F10 = softmax(np.matmul(F9,W[1]) + B[1])
    
    return (F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10)

In [None]:
def backprop(X,F,W,Y,filters,pool_size):
    sigma = (F[10] - Y)/(F[10]*(1-F[10]))
    sigma = d_softmax(F[10],sigma)
    
    dW2 = np.matmul(F[9].T,sigma)
    dB2 = np.sum(sigma, axis = 0)
    
    sigma = np.matmul(sigma,W[1].T)
    sigma = sigma*(1-F[9]*F[9])
    
    dW1 = np.matmul(F[8].T,sigma)
    dB1 = np.sum(sigma, axis = 0)
    
    sigma = np.matmul(sigma,W[0].T)
    sigma = d_flat(sigma,F[7].shape)
    sigma = sigma*(1-F[7]*F[7])
    
    d_filters3,sigma = d_conv(filters[2],F[5],sigma,padding=0)
    sigma = d_avg_pool(sigma,F[4],pool_size)
    
    sigma = sigma*(1-F[4]*F[4])
    
    d_filters2,sigma = d_conv(filters[1],F[2],sigma,padding=0)
    sigma = d_avg_pool(sigma,F[1],pool_size) 
    
    sigma = sigma*(1-F[1]*F[1])
    d_filters1,sigma = d_conv(filters[0],X,sigma,padding=2)
    
    return (dW2,dB2,dW1,dB1,d_filters3,d_filters2,d_filters1)    

In [None]:
filters1 = np.random.randn(5,5,1,6)
filters2 = np.random.randn(5,5,6,16)
filters3 = np.random.randn(5,5,16,120)

filters = [filters1,filters2,filters3]

W1 = np.random.randn(120,84)
B1 = np.random.randn(1,84)
W2 = np.random.randn(84,10)
B2 = np.random.randn(1,10)

W = [W1,W2]
B = [B1,B2]

pool_size = 2

batch_size = 100

m = len(X)

alpha = 0.00001

for i in range(0,m,batch_size):
    X_batch = np.zeros([28,28,1,batch_size])
    for k in range(batch_size):
        X_batch[:,:,0,k] = X[i+k,:].reshape([28,28])
        
    Y_batch = Y[i:i+batch_size,:]
    F = forward(X_batch,filters,W,B,pool_size)
    J = -np.sum(Y_batch*np.log(F[-1])+(1-Y_batch)*np.log(1-F[-1]))
    print(J)
    grads = backprop(X_batch,F,W,Y_batch,filters,pool_size)
    
    W2 = W2 - alpha*grads[0]
    B2 = B2 - alpha*grads[1]
    W1 = W1 - alpha*grads[2]
    B1 = B1 - alpha*grads[3]
    filters3 = filters3 - alpha*grads[4]
    filters2 = filters2 - alpha*grads[5]
    filters1 = filters1 - alpha*grads[6]

In [None]:
grads = backprop(X,F,W,Y,filters,pool_size)

In [None]:
F = forward(X,filters,W,B,pool_size)
J1 = -np.sum(Y*np.log(F[-1])+(1-Y)*np.log(1-F[-1]))
filters1[3,3,0,0] += 0.0000001
F = forward(X,filters,W,B,pool_size)
J2 = -np.sum(Y*np.log(F[-1])+(1-Y)*np.log(1-F[-1]))
filters1[3,3,0,0] -= 0.0000001
print((J2-J1)/0.0000001)

In [None]:
grads[6][3,3,0,0]

In [None]:
X_test.shape

In [None]:
X_tst = np.zeros([28,28,1,100])
for i in range(100):
    X_tst[:,:,0,i] = X_test[i,:].reshape(28,28)

In [None]:
F = forward(X_tst,filters,W,B,pool_size)

In [None]:
pred = np.argmax(F[-1],axis=1)

In [None]:
from keras import Sequential
from keras.layers import Conv2D, Flatten, AveragePooling2D, Dense
from keras.optimizers import Adam, SGD

In [None]:
model = Sequential()

model.add(Conv2D(filters=6, kernel_size=(5, 5), activation='tanh', padding = 'same', input_shape=(28,28,1)))
model.add(AveragePooling2D())

model.add(Conv2D(filters=16, kernel_size=(5, 5), activation='tanh', padding = 'valid'))
model.add(AveragePooling2D())

model.add(Conv2D(filters=120, kernel_size=(5, 5), activation='tanh', padding = 'valid'))

model.add(Flatten())

model.add(Dense(units=84, activation='tanh'))

model.add(Dense(units=10, activation = 'softmax'))

opt = Adam()

model.compile(optimizer = opt, loss = 'categorical_crossentropy',metrics = ['categorical_crossentropy','accuracy'])

model.summary()

In [None]:
X_train = np.zeros([60000,28,28,1])
for i in range(60000):
    X_train[i,:,:,0] = X[i,:].reshape(28,28)

In [None]:
model.fit(X_train, Y, batch_size = 1, epochs=10)

In [None]:
Доделать производные других слоев

Обучить 1 итерацию сравнить ошибку с Keras для разных размеров батча

batch = 1

Сделать batch_normalization

Сделать dropout