Import Modules

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# The MLP

In [17]:

class MLP:
    
    def __init__(self,*neurons):
        self.layers = len(neurons)
        self.neuPerL = [n for n in neurons]
    
    def setWeights(self,init):
        if init == 'SND':
            self.weights = [float(np.random.randn(1)) for l in range(self.layers-1)]
        elif init == 'Uniform':
            self.weights = [float(np.random.rand(1)) for l in range(self.layers-1)]
        elif init == 'LeCun':
            self.weights = [float(np.random.normal(0,1/np.sqrt(self.neuPerL[l]),1)) for l in range(self.layers-1)]
        self.W_Hist = [self.weights]
        self.LastChange = [0,0]
  
              
    def tanh(self,x):
        #return 1/(1+np.exp(-x))
        return 1.7159 * np.tanh(2/3 * x)
            
    def der(self,x):
        #return np.exp(x)/(np.exp(x)+1)**2
        return 1.14393 * 1/np.cosh(2*x/3)**2
    
    def forward(self,inp):
        self.activation = [inp]
        for l in range(self.layers-1):
            self.activation.append(self.tanh(np.dot(self.activation[-1],self.weights[l])))       
             
    def sgd(self,LR,epochs,mom,data,targ):
        # Implements Stochastic Gradient Descent
        self.LR = LR
        self.mom = mom
        p = np.zeros([epochs+1])
        p[0] = self.test(data,targ)
        for epoch in range(epochs):
            for sample,target in zip(data,targ):
                self.forward(sample)   
                # Compute errors
                self.deltas = [(self.activation[-1] - target) * self.der(np.dot(self.activation[-2],self.weights[-1]))]
                for l in range(len(self.weights)-1):
                    self.deltas.append(self.der(np.dot(self.activation[-3-l],self.weights[-2-l])) * self.weights[-1-l]*self.deltas[-1-l])
                self.deltas = list(reversed(self.deltas))
                
                # Adapt weights
                self.adaptWeights()   
            self.LR = 0.99 * self.LR # Decaying LR
            p[epoch+1] = self.test(data,targ)
        return p
            
    def batch_gd(self,LR,epochs,mom,data,targets):
        # Implements Batch Gradient Descent
        # DOESNT WORK PROPERLY FOR SOEM REASON!!
        p = np.zeros([epochs+1])
        self.mom = mom
        self.LR = LR
        for epoch in range(epochs):
            activity = np.zeros([len(data),self.layers])
            for ind,sample in enumerate(data):
                self.forward(sample) 
                activity[ind,:] = self.activation
            loss = np.mean(activity[:,-1] - targets) 
            self.deltas = [loss * self.der(np.mean(activity[:,-2])*self.weights[-1])]
            for l in range(len(self.weights)-1):
                self.deltas.append(self.der(np.mean(activity[:,-3-l])*self.weights[-2-l]) * self.weights[-1-l]*self.deltas[-1-l])
            self.deltas = list(reversed(self.deltas))
            self.adaptWeights()
            p[epoch+1] = self.test(data,targets)
            self.LR = 0.99 * self.LR
        return p
    
    def adaptWeights(self):
        #print('w_vorrher', self.weights)
        for l in range(len(self.weights)):
            #print('d',self.deltas[l], 'mom',self.mom * self.W_Hist[-1][l],'a', self.activation[l],'adapt',self.LR * self.deltas[l] * self.activation[l] + self.mom * self.W_Hist[-1][l])
            tmp = self.LR * self.deltas[l] * self.activation[l] + self.mom * self.LastChange[l]
            self.weights[l] = self.weights[l] - tmp
            self.LastChange[l] = tmp
        self.W_Hist.append(self.weights)
        #print('w_nach',self.weights)
        
    def test(self,data,targ):
        correct = 0
        for sample,target in zip(data,targ):
            self.forward(sample)
            correct += 1 if round(self.activation[-1]) == target else 0
        return 100*correct/len(data)
    

    def getMSE(self, data, label):
        errors = []
        for ind,sample in enumerate(data):
            self.forward(sample)
            errors.append((self.activation[-1]-label[ind])**2)
        return np.mean(errors)




Generate and normalize Train Data

In [3]:
sampleSize = 30
np.random.seed(1)
cats = np.random.normal(25,5,sampleSize)
dogs = np.random.normal(45,15,sampleSize)


data = np.append(cats,dogs)
data = (data-np.mean(data)) / np.std(data)
t_c = -1 * np.ones([sampleSize])
t_d = np.ones([sampleSize])
targets = np.append(t_c,t_d)

Set Model Hyperparamter

In [15]:
weight_init = ['Uniform','SND', 'LeCun']
LR = 0.2
epochs = 200
momentum = 0.1

In [None]:
# Call this Method to evaluate the performance of the network
# Call it with True adds Momentum, calling it with False leaves it out
def evaluate(mom):
    perf = np.zeros([2*len(weight_init),epochs+1])
    for ind,w in enumerate(weight_init):
        # SGD
        net = MLP(1,1,1)
        net.setWeights(w)
        perf[ind,:] = net.sgd(LR,epochs,mom,data,targets)
        plt.plot(np.arange(perf.shape[1]),perf[ind,:],label=['SGD',w])

        # Batch
        net2 = MLP(1,1,1)
        net2.setWeights(w)
        perf[ind+len(weight_init),:] = net2.batch_gd(LR,epochs,mom,data,targets)
        plt.plot(np.arange(perf.shape[1]),perf[ind+len(weight_init),:],label=['Batch',w])
    plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.17),ncol=3, fancybox=True, shadow=True)
    plt.show()
    #print(net.W_Hist)

    

def plotErrorSurface(density, net1Hist, net2Hist):
	mlp = MLP(1,1,1)
	mlp.setWeights('LeCun')    
	#dh = DataHandle()
	#data, label = dh.returnLabledBoth()
	weightRange = np.linspace(-4, 4, density)
	W1, W2 = np.meshgrid(weightRange, weightRange)
	allMse = np.zeros(np.shape(W1))
	for i in range(density):
		for j in range(density):
			mlp.weights[0] = W1[i,j]
			mlp.weights[1] = W2[i,j]
			mse = mlp.getMSE(data, targets)
			allMse[i,j] = mse
	
	fig = plt.figure()
	ax = fig.add_subplot(111, projection='3d')
	#cp = ax.plot_surface(np.reshape(allW1,(density, density)),np.reshape(allW2, (density, density)), np.reshape(allMse, (density, density)), cmap = plt.cm.coolwarm)
	cp = ax.plot_surface(W1, W2, allMse, cmap = plt.cm.coolwarm)
	plt.colorbar(cp)
	plt.show()

	net1W1Hist = [timestep[0] for timestep in net1Hist]
	net1W2Hist = [timestep[1] for timestep in net1Hist]
	net2W1Hist = [timestep[0] for timestep in net2Hist]
	net2W2Hist = [timestep[1] for timestep in net2Hist]
	plt.figure()
	cp = plt.contourf(W1, W2, allMse)
	plt.plot(net1W1Hist,net1W2Hist,c='black', label='stochastic')
	plt.plot(net2W1Hist,net2W2Hist,c='green', label='batch')
	plt.colorbar(cp)
	plt.legend()
	plt.show()


In [21]:
np.random.seed(2)
evaluate(0) # Evaluate without Momentum

In [22]:
np.random.seed(3)
evaluate(momentum) # Evaluate with Momentum

In [None]:
mlp1 = MLP(1,1,1)
mlp1.setWeights('LeCun')    
mlp2 = MLP(1,1,1)
mlp2.setWeights('LeCun')    
mlp1.sgd(LR,epochs,decay,data,targets)
mlp2.batch_gd(LR,epochs,decay,data,targets)

plotErrorSurface(200,mlp1.W_Hist,mlp2.W_Hist)