In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.metrics import accuracy_score
import math

In [34]:
class Sigmoid():
    def __call__(self,x):
        return 1 / (1+np.exp(-x))
    def gradient(self,x):
        return self.__call__(x)*(1 - self.__call__(x))
class Softmax():
    def __call__(self,x):
        e_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
        return e_x / np.sum(e_x, axis=-1, keepdims=True)
    def gradient(self,x):
        p = self.__call__(x)
        return p * (1-p)
class CrossEntropy():
    def __init__(self): pass
    
    def loss(self,y,p):
        p = np.clip(p,1e-15,1-1e+15)
        return -y*np.log(p) - (1-y)*np.log(1-p)
    
    def gradient(self,y,p):
        p = np.clip(p,1e-15,1-1e+15)
        return -(y/p) + (1-y)/(1-p)

In [52]:
class dnn():
    def __init__(self,n_hidden,n_iter=300,learning_rate=0.1):
        self.n_hidden = n_hidden # hidden neurons
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.hidden_activation = Sigmoid()
        self.output_activation = Softmax()
        self.loss = CrossEntropy()
    
    def _init_weights(self,x,y):
        n_sample, n_feature = x.shape
        _, n_outputs = y.shape
        
        #hidden layer
        limit = 1 / math.sqrt(n_feature)
        self.w = np.random.uniform(-limit,limit, (n_feature, self.n_hidden))
        self.w0 = np.zeros((1, self.n_hidden))
        
        #output layer
        limit = 1 / math.sqrt(self.n_hidden)
        self.v = np.random.uniform(-limit,limit, (self.n_hidden, n_outputs))
        self.v0 = np.zeros((1, n_outputs))
        
    def fit(self, x, y):
        
        self._init_weights(x,y)
        
        for i in range(self.n_iter):
            # forward pass
#             print(x.dot(self.w).shape,self.w0.shape)
            hidden_input = x.dot(self.w) + self.w0
            hidden_output = self.hidden_activation(hidden_input)
            
            output_layer_input = hidden_output.dot(self.v) + self.v0
            y_pred = self.output_activation(output_layer_input)
            
            #backward pass
            grad_wrt_ouput = self.loss.gradient(y, y_pred)*self.output_activation.gradient(output_layer_input)
            grad_v = hidden_output.T.dot(grad_wrt_ouput)
            grad_v0 = np.sum(grad_wrt_ouput, axis=0, keepdims=True)
            
            grad_wrt_hidden = grad_wrt_ouput.dot(self.v.T)*self.hidden_activation.gradient(hidden_input)
            grad_w = x.T.dot(grad_wrt_hidden)
            grad_w0 = np.sum(grad_wrt_hidden, axis=0, keepdims=True)
            
            self.v -= self.learning_rate*grad_v
            self.v0 -= self.learning_rate*grad_v0
            self.w -= self.learning_rate*grad_w
            self.w0 -= self.learning_rate*grad_w0
    
    def predict(self, x):
        hidden_input = x.dot(self.w) + self.w0
        hidden_output = self.hidden_activation(hidden_input)
        output_layer_input = hidden_output.dot(self.v) + self.v0
        y_pred = self.output_activation(output_layer_input)
        
        return y_pred

def main():
    df = pd.read_csv("iris.csv")
    y = df['Species']

    # y_to_categlorical
    le = LabelEncoder().fit_transform(y)
    y_to_cate = OneHotEncoder().fit_transform(le.reshape(-1,1))
    y = np.array(y_to_cate.toarray())


    x = df.drop(['Species','Id'],axis=1)
    
    x = np.array(x)
    x_train,x_test,y_train,y_test = train_test_split(x,y,
                                                     test_size = 0.2,
                                                     random_state = 22)
    
#     print(x_train.shape,y_train.shape,x_test.shape,y_test.shape)


    clf = dnn(n_hidden=32,n_iter=1300,learning_rate=0.01)
    clf.fit(x_train,y_train)

    y_pred = np.argmax(clf.predict(x_test),axis=1)
    y_test = np.argmax(y_test,axis=1)

    print(accuracy_score(y_pred,y_test))
        
if __name__ == "__main__":
    main()
    

0.4666666666666667
