# Lab 8-2: xor nn

In [3]:
import torch

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
if device == 'cuda':
    torch.cuda.manual_seed_all(777)
else:
    torch.manual_seed(777)

# Backpropagation

In [5]:
def sigmoid(x):
    # sigmoid function
    return 1.0 / (1.0 + torch.exp(-x))
    # return torch.div(torch.tensor(1), torch.add(torch.tensor(1.0), torch.exp(-x)))

In [6]:
def sigmoid_prime(x):
    # derivative sigmoid function
    return sigmoid(x) * (1 - sigmoid(x))

In [13]:
X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]).to(device)
Y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)
# nn layers
w1 = torch.Tensor(2, 2).to(device)
b1 = torch.Tensor(2).to(device)
w2 = torch.Tensor(2, 1).to(device)
b2 = torch.Tensor(1).to(device)
learning_rate = 1
for step in range(10001):
    # forward
    l1 = torch.add(torch.matmul(X, w1), b1)
    a1 = sigmoid(l1)
    l2 = torch.add(torch.matmul(a1, w2), b2)
    Y_pred = sigmoid(l2)
    
    # binary_cross_entropy loss
    cost = -torch.mean(Y * torch.log(Y_pred) + (1 - Y) * torch.log(1 - Y_pred))
    
    # Back prop (chain rule)
    # Loss derivative
    d_Y_pred = (Y_pred - Y) / (Y_pred * (1.0 - Y_pred) + 1e-7)
    
    # Layer 2
    d_l2 =  d_Y_pred * sigmoid_prime(l2)
    d_b2 = d_l2
    d_w2 = torch.matmul(torch.transpose(a1, 0, 1), d_b2)
    
    # Layer 1
    d_a1 = torch.matmul(d_b2, torch.transpose(w2, 0, 1))
    d_l1 =  d_a1 * sigmoid_prime(l1)
    d_b1 = d_l1
    d_w1 = torch.matmul(torch.transpose(X, 0, 1), d_b1)
    
    # Weight update
    w1 = w1 - learning_rate * d_w1
    b1 = b1 - learning_rate * torch.mean(d_b1, 0)
    w2 = w2 - learning_rate * d_w2
    b2 = b2 - learning_rate * torch.mean(d_b2, 0)
    
    if step % 100 == 0:
        print(step, cost.item())

0 0.8132616877555847
100 0.5142272114753723
200 0.03853989392518997
300 0.017614422366023064
400 0.011352245695888996
500 0.00835411436855793
600 0.006600319407880306
700 0.005451038479804993
800 0.0046404218301177025
900 0.004038388840854168
1000 0.0035737543366849422
1100 0.003204474225640297
1200 0.00290397135540843
1300 0.002654673997312784
1400 0.002444619545713067
1500 0.0022652132902294397
1600 0.0021102330647408962
1700 0.0019749589264392853
1800 0.0018559297313913703
1900 0.001750390394590795
2000 0.0016561250668019056
2100 0.00157148705329746
2200 0.0014950251206755638
2300 0.0014256321592256427
2400 0.0013623657869175076
2500 0.0013044481165707111
2600 0.001251266454346478
2700 0.0012021776055917144
2800 0.001156838028691709
2900 0.001114754006266594
3000 0.001075612148270011
3100 0.001039113150909543
3200 0.0010050033451989293
3300 0.0009730585152283311
3400 0.0009430844220332801
3500 0.0009148719254881144
3600 0.0008883163682185113
3700 0.0008632834069430828
3800 0.0008395

# Code: xor-nn

In [21]:
X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]).to(device)
Y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)
# nn layers
linear1 = torch.nn.Linear(2, 2, bias=True)
linear2 = torch.nn.Linear(2, 1, bias=True)
sigmoid = torch.nn.Sigmoid()
# model
model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid).to(device)
# define cost/loss & optimizer
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1)
for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)

    # cost/loss function
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()

    if step % 100 == 0:
        print(step, cost.item())

0 0.8614465594291687
100 0.6930066347122192
200 0.6925849318504333
300 0.6894930005073547
400 0.6601967811584473
500 0.5637608170509338
600 0.47203925251960754
700 0.3573346734046936
800 0.11411625891923904
900 0.05688459426164627
1000 0.036910295486450195
1100 0.02710331603884697
1200 0.02133595198392868
1300 0.017556410282850266
1400 0.01489521935582161
1500 0.012923291884362698
1600 0.01140536367893219
1700 0.010201824828982353
1800 0.00922483392059803
1900 0.008416290394961834
2000 0.007736333180218935
2100 0.007156721316277981
2200 0.006656938698142767
2300 0.006221615709364414
2400 0.005839183926582336
2500 0.005500460043549538
2600 0.0051985206082463264
2700 0.004927643574774265
2800 0.004683340899646282
2900 0.004461850970983505
3000 0.0042602247558534145
3100 0.004075816832482815
3200 0.0039065685123205185
3300 0.003750707022845745
3400 0.003606699872761965
3500 0.0034732415806502104
3600 0.0033491759095340967
3700 0.0032336628064513206
3800 0.0031257416121661663
3900 0.003024

In [22]:
# Accuracy computation
# True if hypothesis>0.5 else False
with torch.no_grad():
    hypothesis = model(X)
    predicted = (hypothesis > 0.5).float()
    accuracy = (predicted == Y).float().mean()
    print('\nHypothesis: ', hypothesis.detach().cpu().numpy(), '\nCorrect: ', predicted.detach().cpu().numpy(), '\nAccuracy: ', accuracy.item())


Hypothesis:  [[1.3017117e-03]
 [9.9909627e-01]
 [9.9909198e-01]
 [9.3076332e-04]] 
Correct:  [[0.]
 [1.]
 [1.]
 [0.]] 
Accuracy:  1.0
