# Stacking and back propagation on Mnist APPROXIMATE

### import data

In [117]:
from sklearn.datasets import fetch_mldata
import numpy as np
mnist = fetch_mldata('MNIST original', data_home=".")
X = mnist.data
s = np.ones(X.shape[0])
X_1 = np.hstack((X, s.reshape(len(s),1)))
y_float = mnist.target
y_int = y_float.astype(np.int64)
y = np.eye(10)[y_int]
N = X.shape[0]

### define function

In [222]:
def linear_output_z1(x, w1):
    z1 = np.dot(x, w1) 
    return z1

def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

### define gradient descent

In [223]:
def cross_entropy(theta, w1, w2, x, y):
    x_1 = np.hstack((X, s.reshape(len(s),1)))
    z1 = linear_output_z1(x_1, w1)
    z2 = softmax(np.dot(x_1, np.transpose(w2)))
    Z = np.c_[x, z1, z2, s]
    zf = np.matmul(theta, np.transpose(Z))
    p = softmax(zf)
    ce =  np.multiply(np.transpose(y), np.log(p))
    return z2

In [224]:
hoge = np.dot(X_1, np.transpose(initial_w2)) 

In [225]:
softmax(hoge[0])

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.])

In [226]:
softmax(hoge)[0]

  


array([  0.,  nan,   0.,  nan,   0.,   0.,  nan,  nan,  nan,   0.])

### initialization

In [227]:
initial_theta = np.random.uniform(-1,1,(10, X.shape[1]+12))
initial_w1 = np.random.uniform(-1,1, X.shape[1]+1)
initial_w2 = np.random.uniform(-1,1,(10, X.shape[1]+1))

In [228]:
cross_entropy(initial_theta, initial_w1, initial_w2, X, y)

  


array([[ nan,  nan,   0., ...,  nan,  nan,  nan],
       [ nan,  nan,   0., ...,  nan,  nan,  nan],
       [ nan,  nan,   0., ...,  nan,  nan,  nan],
       ..., 
       [ nan,  nan,   0., ...,  nan,  nan,  nan],
       [ nan,  nan,   0., ...,  nan,  nan,  nan],
       [ nan,  nan,   0., ...,  nan,  nan,  nan]])

In [229]:
hoge = np.dot(X_1, np.transpose(initial_w2)) 

In [230]:
softmax(y)

array([[  3.32059453e-05,   1.19710436e-05,   1.21935175e-05, ...,
          1.21165962e-05,   1.22358175e-05,   1.22016982e-05],
       [  3.32059453e-05,   1.19710436e-05,   1.21935175e-05, ...,
          1.21165962e-05,   1.22358175e-05,   1.22016982e-05],
       [  3.32059453e-05,   1.19710436e-05,   1.21935175e-05, ...,
          1.21165962e-05,   1.22358175e-05,   1.22016982e-05],
       ..., 
       [  1.22157846e-05,   1.19710436e-05,   1.21935175e-05, ...,
          1.21165962e-05,   1.22358175e-05,   3.31676546e-05],
       [  1.22157846e-05,   1.19710436e-05,   1.21935175e-05, ...,
          1.21165962e-05,   1.22358175e-05,   3.31676546e-05],
       [  1.22157846e-05,   1.19710436e-05,   1.21935175e-05, ...,
          1.21165962e-05,   1.22358175e-05,   3.31676546e-05]])

In [247]:
softmax(hoge[10:12])

  


array([[ nan,  nan,  nan,  nan,  nan,   1.,  nan,  nan,  nan,  nan],
       [ nan,  nan,  nan,  nan,  nan,   0.,  nan,  nan,  nan,  nan]])

In [249]:
new_x = np.cbrt(X)

In [252]:
new_X_1 = np.hstack((new_x, s.reshape(len(s),1)))


In [253]:
hoge = np.dot(new_X_1, np.transpose(initial_w2)) 

In [258]:
softmax(hoge[0])

array([  4.99052414e-72,   7.27273071e-99,   4.58130618e-08,
         5.01339011e-34,   1.32243317e-64,   2.54914066e-24,
         2.54250574e-17,   5.72639181e-40,   9.99999954e-01,
         7.14560320e-64])

In [259]:
hoge.shape

(70000, 10)

In [264]:
softmax(hoge)[0]

array([  1.50997954e-61,   5.77233935e-79,   3.41523929e-57,
         1.35535404e-48,   4.73393248e-59,   9.10685899e-48,
         2.73984166e-34,   1.51452242e-44,   9.41136048e-18,
         1.06905854e-76])

In [None]:
v