In [14]:
import numpy as np
import struct
 
train_images_idx3_ubyte_file = '/root/autodl-tmp/ML/data/MNIST/raw/train-images-idx3-ubyte'
train_labels_idx1_ubyte_file = '/root/autodl-tmp/ML/data/MNIST/raw/train-labels-idx1-ubyte'
 
test_images_idx3_ubyte_file = '/root/autodl-tmp/ML/data/MNIST/raw/t10k-images-idx3-ubyte'
test_labels_idx1_ubyte_file = '/root/autodl-tmp/ML/data/MNIST/raw/t10k-labels-idx1-ubyte'

In [15]:
def decode_idx3_ubyte(idx3_ubyte_file):
    bin_data = open(idx3_ubyte_file, 'rb').read()
 
    offset = 0
    fmt_header = '>IIII'
    magic_number, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, offset)
    print ("magic:%d, count: %d, size: %d*%d" % (magic_number, num_images, num_rows, num_cols))
 
    image_size = num_rows * num_cols
    offset += struct.calcsize(fmt_header)
    fmt_image = '>' + str(image_size) + 'B'
    images = np.empty((num_images, num_rows, num_cols))
    for i in range(num_images):
        if (i + 1) % 10000 == 0:
            print("done %d" % (i + 1) + "pictures")
        images[i] = np.array(struct.unpack_from(fmt_image, bin_data, offset)).reshape((num_rows, num_cols))
        offset += struct.calcsize(fmt_image)
    return images

In [16]:
def decode_idx1_ubyte(idx1_ubyte_file):
    bin_data = open(idx1_ubyte_file, 'rb').read()
 
    offset = 0
    fmt_header = '>ii'
    magic_number, num_images = struct.unpack_from(fmt_header, bin_data, offset)
    print("magic:%d, num_images: %d zhang" % (magic_number, num_images))
 
    offset += struct.calcsize(fmt_header)
    fmt_image = '>B'
    labels = np.empty(num_images)
    for i in range(num_images):
        if (i + 1) % 10000 == 0:
            print("done %d" % (i + 1) + "zhang")
        labels[i] = struct.unpack_from(fmt_image, bin_data, offset)[0]
        offset += struct.calcsize(fmt_image)
    return labels

In [17]:
def load_train_images(idx_ubyte_file=train_images_idx3_ubyte_file):
    return decode_idx3_ubyte(idx_ubyte_file)
 
 
def load_train_labels(idx_ubyte_file=train_labels_idx1_ubyte_file):
    return decode_idx1_ubyte(idx_ubyte_file)
 
 
def load_test_images(idx_ubyte_file=test_images_idx3_ubyte_file):
    return decode_idx3_ubyte(idx_ubyte_file)
 
 
def load_test_labels(idx_ubyte_file=test_labels_idx1_ubyte_file):
    return decode_idx1_ubyte(idx_ubyte_file)

In [18]:
def narmalize_data(ima):
    a_max=np.max(ima)
    a_min=np.min(ima)
    for j in range(ima.shape[0]):
        ima[j]=(ima[j]-a_min)/(a_max-a_min)
    return ima
def initialize_with_zeros(n_x,n_h,n_y):
    np.random.seed(2)
    W1=np.random.uniform(-np.sqrt(6)/np.sqrt(n_x+n_h),np.sqrt(6)/np.sqrt(n_h+n_x),size=(n_h,n_x))
    b1=np.zeros((n_h,1))
    W2=np.random.uniform(-np.sqrt(6)/np.sqrt(n_y+n_h),np.sqrt(6)/np.sqrt(n_y+n_h),size=(n_y,n_h))
    b2=np.zeros((n_y,1))
 
    assert (W1.shape == (n_h, n_x))
    assert (b1.shape == (n_h, 1))
    assert (W2.shape == (n_y, n_h))
    assert (b2.shape == (n_y, 1))
 
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
 
    return parameters

In [19]:
def forward_propagation(X,parameters):
    W1=parameters["W1"]
    b1=parameters["b1"]
    W2=parameters["W2"]
    b2=parameters["b2"]
    Z1=np.dot(W1,X)+b1
    A1=np.tanh(Z1)
    Z2=np.dot(W2,A1)+b2
    A2=sigmoid(Z2)
    cache = {"Z1": Z1,
             "A1": A1,
             "Z2": Z2,
             "A2": A2}
    return A2, cache
 
def costloss(A2,Y,parameters):
    t=0.00000000001
    logprobs=np.multiply(np.log(A2+t),Y) + np.multiply(np.log(1-A2+t),(1-Y))
    cost=np.sum(logprobs,axis=0,keepdims=True)/A2.shape[0]
    return cost

In [22]:
def back_propagation(parameters,cache,X,Y):
    W1=parameters["W1"]
    W2=parameters["W2"]
    A1 = cache["A1"]
    A2 = cache["A2"]
    Z1=cache["Z1"]
 
    dZ2=A2-Y
    dW2=np.dot(dZ2,A1.T)
    db2=np.sum(dZ2,axis=1,keepdims=True)
    dZ1=np.dot(W2.T,dZ2)*(1-np.power(A1,2))
    dW1=np.dot(dZ1,X.T)
    db1=np.sum(dZ1,axis=1,keepdims=True)
    grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2}
    return grads
 
def update_para(parameters, grads, learning_rate ):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    dW1 = grads["dW1"]
    db1 = grads["db1"]
    dW2 = grads["dW2"]
    db2 = grads["db2"]
    W1=W1-learning_rate*dW1
    b1=b1-learning_rate*db1
    W2=W2-learning_rate*dW2
    b2=b2-learning_rate*db2
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    return parameters

In [23]:
def sigmoid(x):
    s=1/(1+np.exp(-x))
    return s
def image2vector(image):
    v=np.reshape(image,[784,1])
    return v
def softmax(x):
    v=np.argmax(x)
    return v

In [28]:
def run():
    train_images = load_train_images()
    train_labels = load_train_labels()
    test_images = load_test_images()
    test_labels = load_test_labels()
 
    ii=0
    n_x=28*28
    n_h=32
    n_y=10
    parameters=initialize_with_zeros(n_x,n_h,n_y)
    for i in range(50000):
        # print('i:',i)
        img_train=train_images[i]
        label_train1=train_labels[i]
        label_train=np.zeros((10,1))
        ttt=0.001
        if i>1000:
            ttt=ttt*0.999
        label_train[int(train_labels[i])]=1
        imgvector1=image2vector(img_train)
        imgvector=narmalize_data(imgvector1)
 
        A2,cache=forward_propagation(imgvector,parameters)
        pre_label=softmax(A2)
        costl=costloss(A2,label_train,parameters)
        grads = back_propagation(parameters, cache, imgvector, label_train)
        parameters = update_para(parameters, grads, learning_rate = ttt)
        grads["dW1"]=0
        grads["dW2"]=0
        grads["db1"]=0
        grads["db2"]=0
    for i in range(10000):
        img_train=test_images[i]
        vector_image=narmalize_data(image2vector(img_train))
        label_trainx=test_labels[i]
        aa2,xxx=forward_propagation(vector_image,parameters)
        predict_value=softmax(aa2)
        if predict_value==int(label_trainx):
            ii=ii+1
    print(ii/10000)

In [30]:
run()

magic:2051, count: 60000, size: 28*28
done 10000pictures
done 20000pictures
done 30000pictures
done 40000pictures
done 50000pictures
done 60000pictures
magic:2049, num_images: 60000 zhang
done 10000zhang
done 20000zhang
done 30000zhang
done 40000zhang
done 50000zhang
done 60000zhang
magic:2051, count: 10000, size: 28*28
done 10000pictures
magic:2049, num_images: 10000 zhang
done 10000zhang
0.9119
