In [5]:
import numpy as np
import random
#import pandas as pd
#import pickle
#import matplotlib.pyplot as plt

class nn:
    def __init__(self, batchSize=100, feature_layer=None, hidden_layer=list(), output_layer=None, eta=0.00001, activationFunctions=dict(), costFunction='rms', seed=None):
        self.random_generator = np.random.RandomState(seed)
        l=[self.sigmoid]*(len(hidden_layer)+1)
        dl=[self.sigmoidDerivative]*(len(hidden_layer)+1)
        for i in activationFunctions.keys():
            if(str.lower(activationFunctions[i])=='relu'):
                l[i-1]=self.ReLU
                dl[i-1]=self.ReLUDerivative
            elif(str.lower(activationFunctions[i])=='sigmoid'):
                pass
            elif(str.lower(activationFunctions[i])=='linear'):
                l[i-1]=self.linear
                dl[i-1]=self.linearDerivative
            elif(str.lower(activationFunctions[i])=='tanh'):
                l[i-1]=self.tanh
                dl[i-1]=self.tanhDerivative
            else:
                raise Exception('only "relu" and "sigmoid" are valid options for activationFunction parameters')
        
        if (str.lower(costFunction)=='rms'):
            self.costFunction_=self.RMS
            self.costFunctionDerivative_=self.RMSderivative
        elif(str.lower(costFunction)=='bce'):
            self.costFunction_=self.BCE
            self.costFunctionDerivative_=self.BCEderivative
        else:
            raise Exception('only "rms" and "bce" are valid options for costFunction parameters')

        self.activationFunction_=l
        self.activationDerivative_=dl
        self.batchSize_=batchSize
        self.layers=[feature_layer]+hidden_layer+[output_layer]
        self.eta=eta
        self.init_eta_=self.eta
        self.weights_=list()
        self.deltaW_=list()
        self.activations_=list()
        self.neuronInputs_=list()
        self.validationAccuracy_=-1
        self.validation_set_=None
        
        if seed:
            np.random.seed(seed)

        # initialize weights and biases
        l=self.layers
        for i in range((len(l)-1)):
            # self.weights_.append(np.random.rand(l[i]+1, l[i+1]))
            self.weights_.append(np.random.normal(size=(l[i]+1, l[i+1]),scale=1/np.sqrt(l[i])))
            #self.weights_.append(self.random_generator.normal(size=(l[i]+1, l[i+1]),scale=1/np.sqrt(l[i])))
            self.deltaW_.append(np.zeros((l[i]+1, l[i+1])))
            self.activations_.append(np.zeros(l[i]))
            self.neuronInputs_.append(np.zeros(l[i]))
            #initializing bias to zeros
        for i in self.weights_:
            i[i.shape[0]-1]=np.zeros(i.shape[1])

    def resetDeltaW(self):
        self.deltaW_=list()
        l=self.layers
        for i in range((len(l)-1)):
            self.deltaW_.append(np.zeros((l[i]+1, l[i+1])))
    
    def sigmoid(self, l):
        x=l
        # return 1.0 / (1.0 + np.exp(-x))
        for i in range(len(x)):
            if x[i]<0:
                x[i]=np.exp(x[i]) / (1.0 + np.exp(x[i]))
            else:
                x[i]=1.0 / (1.0 + np.exp(-x[i]))
        return x

    def sigmoidDerivative(self,x):
        return x*(1.0 - x)

    def ReLU(self,x):
        return x*(x>0)
    
    def ReLUDerivative(self,l):
        x=l
        for i in range(len(x)):
            if x[i]==0.0:
                x[i]=random.random()
                # x[i]=0.0
            elif x[i]>0.0:
                x[i]=1.0
            else:
                x[i]=0.0
        return x
    
    def linear(self, l):
        return l

    def linearDerivative(self, l):
         return l#np.ones(l.shape)

    def tanh(self, l):
        return np.tanh(l)

    def tanhDerivative(self, l):
        return 1 - l ** 2


    def RMS(self,x,y):
        return np.average((y-x)**2)
    
    def RMSderivative(self,x,y):
        return (y-x)
    
    # binary cross entropy
    def BCE(self,l,y):
        x=np.round(l,5)
        for i in range(len(x)):
            if y[i] == 1:
                x[i]= -np.log(x[i])
            else:
                x[i]= -np.log(1-x[i])
        return np.sum(x)/len(x)
    
    def BCEderivative(self,l,y):
        x=np.round(l,5)
        for i in range(len(x)):
            if y[i] == 1:
                x[i]= -np.log(1/x[i])
            else:
                x[i]= np.log(1/(1-x[i]))
        return x

    def feedForward(self,inputs):
        layer_output=list()
        layer_input=list()
        layer_output.append(inputs)
        layer_input.append(inputs)
        #activations from the first layer is the input
        activations=inputs
        for i in zip(self.weights_, self.activationFunction_):
            # calculating input to the neurons
            neuron_input=np.dot(np.append(activations,1),i[0])
            # neuron_input=np.dot(np.append(activations,1),i[0].T)
            # neuron_input=np.matmul(np.append(activations,1).T,i[0])
            # passing it through the activation function for the given layer
            neuron_output=i[1](neuron_input)
            # activations for next layer is inputs from previous layer
            activations=neuron_output
            # saving the activations from each layers
            layer_output.append(neuron_output)
            layer_input.append(neuron_input)
        self.activations_=layer_output 
        self.neuronInputs_=layer_input
        # print(activations)
        # time.sleep(1)
        return activations
    
    #adaptive learning
    def updateLearningRate(self,epoch):
        self.eta=self.init_eta_/np.sqrt(epoch)
    
    def converge(self):
        return False
        # mask=np.array([0,1,2,3,4,5,6,7,8,9])
        # correct=0
        # for v in self.validation_set_:
        #     modelOutput=self.feedForward(v[0])
        #     # class with the maximum activation is our predicted class
        #     modelOutput=np.array([1 if i==max(modelOutput) else 0 for i in modelOutput])
        #     # creating a class mask for convinience in confusion matrix
        #     modelOutput=np.dot(mask,modelOutput)
        #     targetOutput=int(np.dot(mask,v[1]))
        #     if modelOutput==targetOutput:
        #         correct+=1
        # accuracy=int(correct*100/len(self.validation_set_))
        # print('Validation accuracy: {}%'.format(accuracy))
        # if(accuracy==self.validationAccuracy_):
        #     return True
        # elif(self.validationAccuracy_<accuracy):
        #     self.validationAccuracy_=accuracy
        # return False

    def train(self,inputs,targets):

        # creating a validation set which will be used in checking convergence
        mask=[random.randint(0,len(inputs)-1) for i in range(4000)]
        self.validation_set_=[(inputs[i],targets[i]) for i in mask]

        epoch=1
        while(epoch<5000):#self.converge()!=True
            # epoch
            self.updateLearningRate(epoch)
            for iter in range(int(len(targets)/self.batchSize_)):
                sumError=0
                #batch
                # after each bach reset the deltaW to 0s
                self.resetDeltaW()
                for i in range(self.batchSize_):
                    index=random.randint(0,len(inputs)-1)
                    o=self.feedForward(inputs[index])
                    error=self.costFunctionDerivative_(o,targets[index])
                    self.backPropogation(error)
                    sumError += self.costFunction_(o,targets[index])
                self.gradientDescent()    
                print('epoch: {}  RMS: {}'.format(epoch,sumError))
            print('learning rate used: {}'.format(self.eta))
            epoch+=1
            


    def backPropogation(self,error):
        # moving backwards
        for i in range(len(self.weights_)-1,-1,-1):
            delW=error*self.activationDerivative_[i](self.activations_[i+1])
            # if(type(self.activationFunction_[i])==type(self.sigmoid)):
            #     delW=error*self.activationDerivative_[i](self.activations_[i+1])
            # else:
            #     delW=error*self.activationDerivative_[i](self.neuronInputs_[i+1])
            act=(np.append(self.activations_[i],1)).reshape(self.layers[i]+1,1)
            delW=delW.reshape(1,self.layers[i+1])
            # calculating deltaW for weights and bias of a layer
            self.deltaW_[i]+=np.matmul(act,delW)
            # calculating error for the previous layer
            error=np.dot(delW,self.weights_[i].T)
            # dropping error for bias not, no need to backpropogate it has no back connections
            error=np.delete(error,error.shape[1]-1)


    def gradientDescent(self):
        for i in range(len(self.weights_)):
            self.weights_[i]-=(self.deltaW_[i]/self.batchSize_)*self.eta
            # self.weights_[i]+=self.deltaW_[i]*self.eta



In [6]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# load the Boston dataset
boston = load_boston()
X, y = boston.data, boston.target

# scale the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


mod=nn(feature_layer=13,hidden_layer=[1],costFunction='rms', output_layer=1,batchSize=10,activationFunctions={1:'linear'})

mod.train(X_train, y_train)




    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np

        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows::

        from sklearn.datasets import fetch_california_ho

epoch: 1  RMS: 6776.448647001736
epoch: 1  RMS: 3624.8684372346097
epoch: 1  RMS: 7304.711574217838
epoch: 1  RMS: 6098.846602721142
epoch: 1  RMS: 3799.934223327687
epoch: 1  RMS: 5823.942652937927
epoch: 1  RMS: 9135.154347780124
epoch: 1  RMS: 4225.479798412024
epoch: 1  RMS: 4586.654293957227
epoch: 1  RMS: 8591.346367899148
epoch: 1  RMS: 5596.957012365182
epoch: 1  RMS: 6637.676660434901
epoch: 1  RMS: 6983.833671292716
epoch: 1  RMS: 7363.824857207891
epoch: 1  RMS: 6017.59868366625
epoch: 1  RMS: 5277.3314304298165
epoch: 1  RMS: 4563.566127351795
epoch: 1  RMS: 7335.547076847417
epoch: 1  RMS: 4442.236659534693
epoch: 1  RMS: 5221.328063477124
epoch: 1  RMS: 8403.37945866264
epoch: 1  RMS: 3373.5850214278807
epoch: 1  RMS: 3389.8834207002487
epoch: 1  RMS: 5155.428362821312
epoch: 1  RMS: 7882.13796082745
epoch: 1  RMS: 5549.6062505633945
epoch: 1  RMS: 8505.367416372601
epoch: 1  RMS: 10304.363889684995
epoch: 1  RMS: 3581.4189524658464
epoch: 1  RMS: 5134.22353389111
epoch: 