In [3]:
import numpy as np

In [4]:

features=0
classes=0
samples=0
def modify_y(classes,y):
    new_train_y = []
    for x in train_y:
        current_y = []
        for i in range(classes):
            current_y.append(0)
        current_y[int(x)-1]=1
        new_train_y.append(current_y)
    return np.array(new_train_y)


def data_loader(filename,isTrainData):
    # open data file
    file = open('./Dataset/'+filename,"r")


    # initialize
    i=0
    global features
    global classes
    global samples


    listx = []
    listy = []

    for line in file:

        fields = line.split()
        templist = []
        features = len(fields)-1
        for j in range(features):
            templist.append(float(fields[j]))

        listx.append(templist)
        listy.append(int(fields[features]))


    if isTrainData:
        samples = len(listx)
        classes = len(set(listy))
        
    # convert into numpy array
    x = np.array(listx)
    y = np.array(listy)
    x= (x - x.mean(axis = 0)) / x.std(axis = 0)
    #print(x[:10])
    
    return x,y


In [5]:
train_x,train_y =  data_loader("trainNN.txt",1)
train_x = train_x.T
train_y  = modify_y(classes,train_y).T
print("Features: ",str(features), " Classes: ",str(classes)," Samples: ",str(samples))


Features:  4  Classes:  4  Samples:  500


In [6]:
test_x,test_y =  data_loader("testNN.txt",0)
test_x = test_x.T

In [7]:
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

def derivative_sigmoid(z):
    return sigmoid(z)*(1-sigmoid(z))

def error(target,output):
    cost = np.sum((output - target) ** 2)
    return cost / 2.0

def derivative_error(target,output):
    return target-output
    
def softmax(x): 
    e_x = np.exp(x - np.max(x)) 
    return e_x / e_x.sum(axis=0) 


In [8]:
class Layer:
    def __init__(self,in_nodes,out_nodes):
        np.random.seed(1)
        self.weight = np.random.randn(out_nodes,in_nodes)
        np.random.seed(0)
        self.bias = np.random.randn(out_nodes,1)

        self.Z = None
        self.A = None

        self.dZ = None
        self.dA = None

        self.prev_A = None

    def forward(self,X):
        self.prev_A = X.copy()

        self.Z = np.matmul(self.weight,X)+self.bias
        self.A = sigmoid(self.Z)
        return self.A


    def backward(self,delta,isLast ):

        if isLast:
            self.dA = self.A-delta.copy()
        else:
            self.dA = delta.copy()

        self.dZ = self.dA * derivative_sigmoid(self.Z)

        self.dW = np.matmul(self.dZ, self.prev_A.T)
        self.db =  np.sum(self.dZ, axis=1, keepdims=True)

        return np.matmul(self.weight.T, self.dA)

    def update_weight(self,learning_rate):
        self.weight = self.weight  - learning_rate * self.dW
        self.bias = self.bias - learning_rate * self.db
        



In [9]:
class Network:
    def __init__(self,features, classes, size_layers):
        self.layers = []

        for i in range(1,len( size_layers)):
            self.layers.append(Layer(size_layers[i-1],size_layers[i]))
            #print(self.layers[-1].weight)
        
    def train(self,X,Y,epoch =1000, learning_rate = 0.1):
        for i in range(epoch):
            A = X
            for layer in self.layers:
                A = layer.forward(A)
                #print(A)
            
            delta = Y
            for layer in reversed(self.layers):
                delta = layer.backward(delta, layer == self.layers[-1] )   

            for layer in self.layers:
                layer.update_weight(learning_rate)

        #     if((i+1)%100==0):
        #         print('Iteration: ',str(i+1),' Error: ',error(self.layers[-1].A,Y))

        # print('Train done!')

    def decide(self, X):
        A = X
        for layer in self.layers:
            A = layer.forward(A)

        y_hat = []
        for row in A.T:
            y_hat.append(np.argmax(row) + 1)
        return np.asarray(y_hat)

    def test(self,X,Y):
        y_hat = self.decide(X)
        matches = len(Y)
        for i in range(len(Y)):
            if y_hat[i]!=Y[i]:
                matches-=1
                print(str(i+1), str(X.T[i]),str(Y[i]),str(y_hat[i]) )
        accuracy = matches / len(Y) * 100.0
        return accuracy


In [11]:
network = Network(features,classes, [features,3,3,classes])
network.train(train_x,train_y,2000,.01)
accuracy = network.test(test_x,test_y)
print(accuracy)

100.0


In [None]:
result = set()
for i in range (1,10):
    for j in range(1,50):
        np.random.seed(i*j)
        layer_structure = [features]+[j]*i+[classes]
        network = Network(features,classes, layer_structure)
        network.train(train_x,train_y,1000+50*(j-1),.01)
        accuracy = network.test(test_x,test_y)
        current_result = []
        current_result.append(i)
        current_result.append(layer_structure)
        current_result.append(accuracy)
        result.add(str(current_result))
        print(str(i),layer_structure,str(accuracy))
print('Final result:')
print(result)