# Stacking and back propagation on Mnist with multiple sigmoids

### import data

In [1]:
from sklearn.datasets import fetch_mldata
import numpy as np
from sklearn.utils import shuffle
mnist = fetch_mldata('MNIST original', data_home=".")
mnist.data, mnist.target = shuffle(mnist.data, mnist.target)
X = mnist.data[0:5000]
s = np.ones(X.shape[0])
X_1 = np.hstack((X, s.reshape(len(s),1)))
y_float = mnist.target[0:5000]
y_int = y_float.astype(np.int64)
y = np.eye(10)[y_int]
N = X.shape[1]

### define function

In [2]:
def linear_output_z1(x, w1):
    z1 = np.dot(x, w1)
    return z1

def logistic_output_z2(x, w2):
    z2 = sigmoid(np.dot(x, w2))
    return z2

def sigmoid(a):
    return 1.0 / (1.0 + np.exp(-a))

### define gradient

In [3]:
def logistic_output(x, z1, z2, theta1, theta2, theta3):
    zf = (z1 * theta1) + (z2 * theta2) + np.array(np.dot(x, theta3))
    return zf

def gradient_c_theta1(z1, zf, y):
    gradient = np.matmul(np.transpose(z1), (sigmoid(zf)-y)) / N
    return gradient

def gradient_c_theta2(z2, zf, y):
    gradient = np.matmul(np.transpose(z2), (sigmoid(zf)-y)) / N
    return gradient

def gradient_c_theta3(x, zf, y):
    gradient = np.matmul(np.transpose(x), (sigmoid(zf)-y)) / N
    return gradient

def gradient_c_w1(x, theta1, zf, y):
    gradient = np.matmul(np.transpose(x), np.transpose(np.matmul(theta1, np.transpose(np.array((sigmoid(zf) - y)))))) / N
    return gradient

def gradient_c_w2(x, w2, theta2, zf, y):
    elemsig = sigmoid(np.dot(x, w2)) # [5000, 1]
    elem1=np.transpose(np.array(sigmoid(zf) - y)) # [10,5000]
    elem2=np.matmul(theta2, elem1) # [1, 5000]
    elem3=np.matmul(np.transpose(elemsig), np.transpose(elem2)) # [1,1]
    elem4 = (elemsig* (1 - elem3)) # [5000,1]
    gradient= np.matmul(np.transpose(x), elem4)/ N # [785,1]

    return gradient


### define gradient descent 

In [74]:
def gradient_descent(x, y, int_w1, int_w2, int_theta1, int_theta2, int_theta3, lowtrain, uptrain, iterations):

    place_w1=int_w1
    place_w2=int_w2
    place_theta1=int_theta1
    place_theta2=int_theta2
    place_theta3=int_theta3

    for i in range(iterations):
        trainrate=np.random.uniform(lowtrain,uptrain,1)
        z1 = linear_output_z1(x, place_w1)
        z2 = logistic_output_z2(x, place_w2)
        z2=np.dot(x,place_w2)
        zf = logistic_output(x, z1, z2, place_theta1, place_theta2, place_theta3)
        p = sigmoid(zf)

        gradient_theta1 = gradient_c_theta1(z1, zf, y)
        gradient_theta2 = gradient_c_theta2(z2, zf, y)
        gradient_theta3 = gradient_c_theta3(x, zf, y)
        gradient_w1 = gradient_c_w1(x, place_theta1, zf, y)
        gradient_w2 = gradient_c_w2(x, place_w2, place_theta2, zf, y)
        
            
        place_w1 = place_w1 - (trainrate * gradient_w1)
        place_w2 = place_w2 - (trainrate * gradient_w2)
        place_theta1 = place_theta1 - trainrate*gradient_theta1
        place_theta2 = place_theta2 - trainrate*gradient_theta2
        place_theta3 = place_theta3 - trainrate*gradient_theta3

    
    return  p, place_theta1, place_theta2, place_theta3, place_w1, place_w2


### initialization

In [127]:
N_linear = 1
M_logistic = 1

# int_w1, int_w2 = np.random.uniform(-1,1,(X.shape[1]+1,N_linear)), np.random.uniform(-1,1,(X.shape[1]+1, M_logistic))
# int_theta1, int_theta2, int_theta3 = np.random.uniform(-1,1,(N_linear,10)), np.random.uniform(-1,1,(M_logistic,10)),np.random.uniform(-1,1,(X.shape[1] + 1,10))
int_w1 = result[4]
int_w2 = result[5]
int_theta1 = result[1]
int_theta2 = result[2]
int_theta3 = result[3]

result = gradient_descent(X_1, y, int_w1, int_w2, int_theta1, int_theta2, int_theta3, 0.00000001, 0.0000001, 1000)

  # Remove the CWD from sys.path while we load stuff.


In [137]:
result[1]

array([[ 0.05423276,  0.09086229, -0.30802961, -0.44445979, -0.41052024,
        -0.04401048, -0.32951466,  0.48090006, -0.42377873, -0.77507225]])

In [138]:
result[2]

array([[-10.33797128,  -6.89970327,  -4.72162053, -10.55489808,
         -3.32143013, -10.84287175,  -5.27814836, -10.02382195,
         -4.22391811,  -2.68999257]])

In [139]:
result[3]

array([[ 0.53169173,  0.19318105,  0.67536041, ..., -0.37086208,
         0.16278701,  0.30953829],
       [ 0.56601556, -0.35782477, -0.83012092, ...,  0.98235626,
        -0.81739907, -0.78477274],
       [-0.02743481, -0.84095702,  0.46333047, ..., -0.76175268,
         0.40764109,  0.93778363],
       ..., 
       [ 0.67137672,  0.92356931,  0.18667619, ...,  0.42905763,
         0.80128187, -0.17947101],
       [-0.83029191,  0.16242591,  0.4616667 , ...,  0.75930326,
        -0.84364951,  0.48989206],
       [-0.83288963, -0.35011157,  0.28178261, ..., -0.9259662 ,
         0.87142509, -0.71700001]])

In [120]:
result[1]

array([[ 0.08068534,  0.07487679, -0.31806324, -0.44227333, -0.41058022,
        -0.03343121, -0.31973432,  0.47069851, -0.44189256, -0.77063395]])

In [121]:
result[2]

array([[ 0.06547867, -3.57284835, -0.77638974, -0.52041   ,  0.20548366,
        -4.3363253 , -1.72088078,  0.09071168, -0.36075607, -2.91048008]])

In [125]:
int_theta2

array([[ 0.29472873,  0.69212392,  0.19725828,  0.62747261, -0.68238557,
        -0.51494253, -0.9280679 , -0.62327089,  0.40216484,  0.14923645]])

In [122]:
result[3]

array([[ 0.53169173,  0.19318105,  0.67536041, ..., -0.37086208,
         0.16278701,  0.30953829],
       [ 0.56601556, -0.35782477, -0.83012092, ...,  0.98235626,
        -0.81739907, -0.78477274],
       [-0.02743481, -0.84095702,  0.46333047, ..., -0.76175268,
         0.40764109,  0.93778363],
       ..., 
       [ 0.67137672,  0.92356931,  0.18667619, ...,  0.42905763,
         0.80128187, -0.17947101],
       [-0.83029191,  0.16242591,  0.4616667 , ...,  0.75930326,
        -0.84364951,  0.48989206],
       [-0.83288335, -0.35012391,  0.28178581, ..., -0.92596953,
         0.87143226, -0.71699999]])

In [126]:
int_theta3

array([[ 0.53169173,  0.19318105,  0.67536041, ..., -0.37086208,
         0.16278701,  0.30953829],
       [ 0.56601556, -0.35782477, -0.83012092, ...,  0.98235626,
        -0.81739907, -0.78477274],
       [-0.02743481, -0.84095702,  0.46333047, ..., -0.76175268,
         0.40764109,  0.93778363],
       ..., 
       [ 0.67137672,  0.92356931,  0.18667619, ...,  0.42905763,
         0.80128187, -0.17947101],
       [-0.83029191,  0.16242591,  0.4616667 , ...,  0.75930326,
        -0.84364951,  0.48989206],
       [-0.83281339, -0.35022602,  0.2818387 , ..., -0.92600112,
         0.87151344, -0.71698915]])