In [32]:
import torch
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn
import numpy as np

#### NetzA

<img src="./img/netzA.png" width="800">

<img src="./img/netzAa.png" width="601">


In [91]:
import math
class NetzA:
    def __init__(self,param):
        self.param = param
   
    def calc(self, x1, x2, y, lr):
        w1, w2, b = self.param   
        # forward
        z = w1 * x1 + w2 * x2 + b   
        a = 1/(1+math.exp(-1.0*z))
        y_hat = a
        
        print("\nParameter:")
        print("w1={:6.4f}, w2={:6.4f}, b={:6.4f}".format(w1,w2,b))
        print("z={:6.4f}, a={:6.4f}".format(z,a))
        
        # backward
        db = (y_hat - y)*a*(1-a)    
        dw1 = x1 * db
        dw2 = x2 * db
        
        # optimize
        w1 -= lr * dw1               
        w2 -= lr * dw2
        b  -= lr * db
                
        print("\nGradienten:")
        print("dw1={:6.4f}, dw2={:6.4f}, db={:6.4f}".format(dw1,dw2,db))
         
        print("\nUpdated Parameter, learningrate =", lr)
        print("w1={:6.4f}, w2={:6.4f}, b={:6.4f}".format(w1,w2,b))
        print("----------------")
        
        self.param = [w1, w2, b]

In [92]:
param = [-1, 1, -1.5]
netz = NetzA(param)
netz.calc(1,0,y=1,lr=0.1)



Parameter:
w1=-1.0000, w2=1.0000, b=-1.5000
z=-2.5000, a=0.0759

Gradienten:
dw1=-0.0648, dw2=-0.0000, db=-0.0648

Updated Parameter, learningrate = 0.1
w1=-0.9935, w2=1.0000, b=-1.4935
----------------


#### NetzA mit Pytorch

In [93]:
class NetA(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(2,1)
 
    def forward(self, t):
        t = self.fc1(t)
        t = torch.sigmoid(t)
        return t

def loss_fn(y, y_hat):
    return 0.5*(y-y_hat)**2

w1, w2, b = [-1., 1., -1.5]

net = NetA()
net.fc1.weight.data = torch.tensor([w1, w2]) 
net.fc1.bias.data = torch.tensor([b]) 

y = torch.tensor([1]).reshape(1,1)
x = torch.tensor([1.0,0]).reshape(1,1,2)

lr = 0.1

print("\nParameter:")
for p in net.parameters():
    print(p)
    
loss = loss_fn(net(x), y)
optimizer = torch.optim.SGD(net.parameters(), lr=lr)
loss.backward()

print("\nGradienten:")
for p in net.parameters():
    print(p.grad)

optimizer.step()
print("\nUpdated Parameter, learningrate =", lr)
for p in net.parameters():
    print(p)



Parameter:
Parameter containing:
tensor([-1.,  1.], requires_grad=True)
Parameter containing:
tensor([-1.5000], requires_grad=True)

Gradienten:
tensor([-0.0648,  0.0000])
tensor([-0.0648])

Updated Parameter, learningrate = 0.1
Parameter containing:
tensor([-0.9935,  1.0000], requires_grad=True)
Parameter containing:
tensor([-1.4935], requires_grad=True)


In [94]:
loss.type()
print(net.fc1.weight.data)
 

tensor([-0.9935,  1.0000])


#### NetzB

<img src="./img/netzB.png" width="900">

<img src="./img/netzBa.png" width="701">

In [95]:
import math
class NetzB:
    def __init__(self,param):
        self.param = param
   
    def calc(self, x1, x2, y, lr):
        w11, w12, b1, w21, w22, b2, w31, w32, b3 = self.param
        
        # forward
        z1 = w11 * x1 + w12 * x2 + b1      
        a1 = z1 if z1 > 0 else 0 
        
        z2 = w21 * x1 + w22 * x2 + b2 
        a2 = z2 if z2 > 0 else 0 
        
        z3 = w31 * a1 + w32 * a2 + b3
        a3 = 1/(1+math.exp(-1.0*z3))
        y_hat = a3
        
        print("\nForward:")
        print("w11={:6.4f}, w12={:6.4f}, b1={:6.4f}".format(w11,w12,b1))
        print("w21={:6.4f}, w22={:6.4f}, b2={:6.4f}".format(w21,w22,b2))
        print("w31={:6.4f}, w32={:6.4f}, b3={:6.4f}".format(w31,w32,b3))
        
        print("z1={:6.4f}, a1={:6.4f}".format(z1,a1))
        print("z2={:6.4f}, a2={:6.4f}".format(z2,a2))
        print("z3={:6.4f}, a3={:6.4f}".format(z3,a3))
        
        # backward
        db3 = (y_hat-y)*a3*(1-a3)              
        dw31 = a1 * db3
        dw32 = a2 * db3
        
        if a1 > 0:
            db1 = db3 * w31  
            dw11 = db1 * x1 
            dw12 = db1 * x2
        else:
            db1 = dw11 = dw12 = 0
            
        if a2 > 0:
            db2 = db3 * w32  
            dw21 = db2 * x1 
            dw22 = db2 * x2
        else:
            db2 = dw21 = dw22 = 0
           
        # optimize
        b1 -= lr * db1                    
        w11 -= lr * dw11
        w12 -= lr * dw12
             
        b2 -= lr * db2
        w21 -= lr * dw21
        w22 -= lr * dw22
            
        b3 -= lr * db3
        w31 -= lr * dw31
        w32 -= lr * dw32
        
        print("\nGradienten:")
        print("dw11={:6.4f}, dw12={:6.4f}, db1={:6.4f}".format(dw11,dw12,db1))
        print("dw21={:6.4f}, dw22={:6.4f}, db2={:6.4f}".format(dw21,dw22,db2))
        print("dw31={:6.4f}, dw32={:6.4f}, db3={:6.4f}".format(dw31,dw32,db3))

        print("\nUpdated Parameter:")
        print("w11={:6.4f}, w12={:6.4f}, b1={:6.4f}".format(w11,w12,b1))
        print("w21={:6.4f}, w22={:6.4f}, b2={:6.4f}".format(w21,w22,b2))
        print("w31={:6.4f}, w32={:6.4f}, b3={:6.4f}".format(w31,w32,b3))
    

        
        self.param = [w11, w12, b1, w21, w22, b2, w31, w32, b3]
 

In [96]:
param = [-3, 2, 5, 3, -1, -1, -1, 5, -1]
netz = NetzB(param)
netz.calc(1,2,y=1,lr=0.3)



Forward:
w11=-3.0000, w12=2.0000, b1=5.0000
w21=3.0000, w22=-1.0000, b2=-1.0000
w31=-1.0000, w32=5.0000, b3=-1.0000
z1=6.0000, a1=6.0000
z2=0.0000, a2=0.0000
z3=-7.0000, a3=0.0009

Gradienten:
dw11=0.0009, dw12=0.0018, db1=0.0009
dw21=0.0000, dw22=0.0000, db2=0.0000
dw31=-0.0055, dw32=-0.0000, db3=-0.0009

Updated Parameter:
w11=-3.0003, w12=1.9995, b1=4.9997
w21=3.0000, w22=-1.0000, b2=-1.0000
w31=-0.9984, w32=5.0000, b3=-0.9997


#### NetzB mit Pytorch

In [98]:
class NetB(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(2,2)
        self.fc2 = nn.Linear(2,1)
 
    def forward(self, t):
        t = self.fc1(t)
        t = torch.relu(t)
        t = self.fc2(t)
        t = torch.sigmoid(t)
        return t

def loss_fn(y, y_hat):
    return 0.5*(y-y_hat)**2

w11, w12, b1, w21, w22, b2, w31, w32, b3 = [-3, 2, 5, 3, -1, -1, -1, 5, -1]

net = NetB()
net.fc1.weight.data = torch.tensor([[w11, w12], [w21, w22]]).float()
net.fc1.bias.data = torch.tensor([b1, b2]).float()

net.fc2.weight.data = torch.tensor([w31, w32]).float()
net.fc2.bias.data = torch.tensor([b3]).float()

y = torch.tensor([1]).reshape(1,1)
x = torch.tensor([1.0,2]).reshape(1,1,2)

lr = 0.3

print("\nAusgangslage:")
for p in net.parameters():
    print(p)
    
loss = loss_fn(net(x), y)
optimizer = torch.optim.SGD(net.parameters(), lr=lr)
loss.backward()

print("\nGradienten:")
for p in net.parameters():
    print(p.grad)

optimizer.step()
print("\nUpdated Parameter, learningrate =", lr)
for p in net.parameters():
    print(p)


Ausgangslage:
Parameter containing:
tensor([[-3.,  2.],
        [ 3., -1.]], requires_grad=True)
Parameter containing:
tensor([ 5., -1.], requires_grad=True)
Parameter containing:
tensor([-1.,  5.], requires_grad=True)
Parameter containing:
tensor([-1.], requires_grad=True)

Gradienten:
tensor([[0.0009, 0.0018],
        [0.0000, 0.0000]])
tensor([0.0009, 0.0000])
tensor([-0.0055,  0.0000])
tensor([-0.0009])

Updated Parameter, learningrate = 0.3
Parameter containing:
tensor([[-3.0003,  1.9995],
        [ 3.0000, -1.0000]], requires_grad=True)
Parameter containing:
tensor([ 4.9997, -1.0000], requires_grad=True)
Parameter containing:
tensor([-0.9984,  5.0000], requires_grad=True)
Parameter containing:
tensor([-0.9997], requires_grad=True)


#### NetzC

<img src="./img/netzCa.png" width="601">

<img src="./img/netzC.png" width="701">

In [99]:
class NetC(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(2,2)
        self.fc2 = nn.Linear(2,2)
        self.fc3 = nn.Linear(2,1)
 
    def forward(self, t):
        t = self.fc1(t)
        t = torch.relu(t)
        t = self.fc2(t)
        t = torch.relu(t)
        t = self.fc3(t)
        t = torch.sigmoid(t)
        return t

#### Training für die OR-Funktion

In [105]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(2,1)
 
    def forward(self, t):
        t = self.fc1(t)
        t = torch.sigmoid(t)
        return t

def loss_fn(y, y_hat):
    return 0.5*(y-y_hat)**2


X  = [[0,0],[0,1],[1,0],[1,1.]]
Y = [0,1,1,1]

X = torch.Tensor(X).reshape(4,1,1,2)   # nr, kanäle, width, height
Y = torch.Tensor(Y).reshape(4,1,1)

net = Net()
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

 