In [None]:
import numpy as np
import h5py
import time
import copy
from random import randint
import torch
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
class NN():
    def __init__(self,input_size,output_size,batch_size,data_x,data_y,loss_function,grad,init,f):
        '''batch_size: how many elements in one batch
        loss_function: f(np.array,np.array)->double
        grad: a list of grad function, each of which f(x,y,theta,output_size)->np.array
        init: initial theta, which is a list of all parameters. (i.e. [C,b1,W,b2])
        f: the fitting function. f(x)=pred_y
        '''
        self.outsize=output_size
        self.insize=input_size
        tensor_x=torch.FloatTensor(data_x)
        batch_num=int(tensor_x.size()[0]/batch_size)
        self.x=tensor_x.reshape(batch_num,batch_size,input_size)
        tensor_y=torch.FloatTensor(data_y)
        self.y=tensor_y.reshape(batch_num,batch_size,1)
        self.loss=0.0
        self.lossf=loss_function
        self.theta=init
        self.alpha=0.1
        self.grad=grad
        self.f=f
        
    def adjust_rate(self,alpha):
        self.alpha=alpha
        
    def forward(self):
        batch_num=self.x.size()[0]
        batch_size=self.x.size()[1]
        theta_num=len(self.theta)
        
        
        #Update parameters
        for i in range(batch_num):
            grad_list=[[] for k in range(theta_num)]

            for j in range(batch_size):               
                x=self.x[i,j,:].numpy()
                y=self.y[i,j,:].numpy()[0]

                for k in range(theta_num):
                    outsize=self.outsize
                    grad_theta=self.grad[k](x,y,self.theta,outsize)
                    grad_list[k].append(grad_theta)

            for k in range(theta_num):
                grad_result=np.mean(grad_list[k],axis=0)
                self.theta[k]-=self.alpha*grad_result
            
        #Shuffle
        temp_x=self.x.reshape(batch_num*batch_size,self.insize)
        temp_y=self.y.reshape(batch_num*batch_size,1)
        new_index=torch.randperm(batch_num*batch_size)
        new_x=temp_x[new_index,:]
        new_y=temp_y[new_index,:]
        self.x=new_x.reshape(batch_num,batch_size,self.insize)
        self.y=new_y.reshape(batch_num,batch_size,1)
        
    def predict(self,data_x):
        tensor_x=torch.FloatTensor(data_x)

        sample_num=int(tensor_x.size()[0])

        output_list=[]
        for i in range(sample_num):
            x=tensor_x[i,:].numpy()
            y=self.f(x,self.theta)
            output_list.append(y)
        return output_list
    
    def get_error(self):
        batch_num=self.x.size()[0]
        batch_size=self.x.size()[1]
        data_x=self.x.reshape(batch_num*batch_size,self.insize)
        data_y=self.y.reshape((batch_num*batch_size,1))
        y_list=self.predict(data_x)
        
        error_list=[]
        for i in range(len(y_list)):
            y=y_list[i]
            y_real=data_y[i,:].numpy()[0]
            error_list.append(self.lossf(y,y_real))
            
        self.loss=np.mean(error_list)
        return np.mean(error_list)
    
    def save(self):
        return self.theta

In [None]:
#load MNIST data
MNIST_data = h5py.File('MNISTdata.hdf5', 'r')
x_train = np.float32(MNIST_data['x_train'][:] )
y_train = np.int32(np.array(MNIST_data['y_train'][:,0]))
x_test = np.float32( MNIST_data['x_test'][:] )
y_test = np.int32( np.array( MNIST_data['y_test'][:,0] ) )
MNIST_data.close()

In [None]:
#number of inputs
num_inputs = 28*28
#number of outputs
num_outputs = 10

In [None]:
#For logistics

def vec_transpose(x):
    y=np.array([x])
    return y.T

def normalize(x):
    y=torch.FloatTensor(x)
    mean=torch.mean(y)
    std=torch.std(y)
    y=(y-mean)/std
    return y

def cross_entropy_error(x,y):
    return -np.log(x[int(y)])

def softmax(z):
    ez=np.exp(z)
    return ez/np.sum(ez)

def grad_C(x,y,theta,output_size):
    ey=np.zeros(output_size)
    ey[int(y)]=1
    #print(np.dot(theta,x))
    out=-np.dot(vec_transpose(ey-softmax(np.dot(theta[0],x))),np.array([x]))
    return out

def f_1(x,theta):
    return softmax(np.dot(theta[0],x))

