In [3]:
import torch

# .to(device) : GPU를 사용하게 하는 코드 
device = 'cuda' if torch.cuda.is_available() else 'cpu'

X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]).to(device)
Y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)

# nn layers
w1 = torch.Tensor(2, 2).to(device) #2개에서 2개로 가는 weight
b1 = torch.Tensor(2).to(device) # 2 bias
w2 = torch.Tensor(2, 1).to(device) #2개에서 1개로 가는 weight
b2 = torch.Tensor(1).to(device)

def sigmoid(x):
    # sigmoid function
    return 1.0 / (1.0 + torch.exp(-x))
    # return torch.div(torch.tensor(1), torch.add(torch.tensor(1.0), torch.exp(-x)))

def sigmoid_prime(x): #sigmoid_prime 미분함수
    # derivative of the sigmoid function
    return sigmoid(x) * (1 - sigmoid(x))

for step in range(10001):
    # forward
    l1 = torch.add(torch.matmul(X, w1), b1) # weight*X + bias
    a1 = sigmoid(l1)
    l2 = torch.add(torch.matmul(a1, w2), b2)
    Y_pred = sigmoid(l2)
    cost = -torch.mean(Y * torch.log(Y_pred) + (1 - Y) * torch.log(1 - Y_pred))

    # Back prop (chain rule)
    # Loss derivative
    d_Y_pred = (Y_pred - Y) / (Y_pred * (1.0 - Y_pred) + 1e-7) # b를 미분한 값 # 1e-7은 0으로 나누어지는 경우를 막아주기 위함

    # Layer 2
    d_l2 = d_Y_pred * sigmoid_prime(l2) 
    d_b2 = d_l2
    d_w2 = torch.matmul(torch.transpose(a1, 0, 1), d_b2) # w에 대한 미분 #transpose swap함 ex) 10,5 => 5,10
    
    # Layer 1
    d_a1 = torch.matmul(d_b2, torch.transpose(w2, 0, 1)) 
    d_l1 = d_a1 * sigmoid_prime(l1)
    d_b1 = d_l1
    d_w1 = torch.matmul(torch.transpose(X, 0, 1), d_b1)

    learning_rate = 1

    # Weight update
    w1 = w1 - learning_rate * d_w1 # 기존의 weight에 미분값을 learning_rate에 곱해서 빼줌
    b1 = b1 - learning_rate * torch.mean(d_b1, 0)
    w2 = w2 - learning_rate * d_w2
    b2 = b2 - learning_rate * torch.mean(d_b2, 0)
    if step % 100 == 0:
      print(step, cost.item())

0 0.6932968497276306
100 0.6931382417678833
200 0.6930323243141174
300 0.6917732357978821
400 0.38876402378082275
500 0.0358712375164032
600 0.01722850650548935
700 0.01122412458062172
800 0.00829465501010418
900 0.0065671238116919994
1000 0.005430174991488457
1100 0.004626153968274593
1200 0.004028014373034239
1300 0.0035657971166074276
1400 0.003198168007656932
1500 0.00289878505282104
1600 0.0026503729168325663
1700 0.0024409075267612934
1800 0.002261972986161709
1900 0.0021073315292596817
2000 0.0019723824225366116
2100 0.0018536190036684275
2200 0.0017482263501733541
2300 0.001654153224080801
2400 0.0015696330228820443
2500 0.001493304269388318
2600 0.0014239996671676636
2700 0.0013608515728265047
2800 0.0013030229602009058
2900 0.0012498851865530014
3000 0.0012008855119347572
3100 0.0011555601377040148
3200 0.001113565289415419
3300 0.0010744676692411304
3400 0.001038028160110116
3500 0.0010039479238912463
3600 0.0009720476227812469
3700 0.0009420881397090852
3800 0.0009139500325

In [4]:
# Code:xor-nn

X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]).to(device)
Y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)

# nn layers MLP(muti linear perceptron)
linear1 = torch.nn.Linear(2, 2, bias=True)
linear2 = torch.nn.Linear(2, 1, bias=True)
sigmoid = torch.nn.Sigmoid()
model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid).to(device)

# define cost/loss & optimizer
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1)
for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    # cost/loss function
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
    if step % 100 == 0:
        print(step, cost.item())


0 0.6949105262756348
100 0.6928117275238037
200 0.6915610432624817
300 0.683072566986084
400 0.5224964618682861
500 0.15488702058792114
600 0.06879375874996185
700 0.04215976968407631
800 0.02996267005801201
900 0.023098690435290337
1000 0.018734430894255638
1100 0.015728149563074112
1200 0.01353730633854866
1300 0.011872634291648865
1400 0.010566464625298977
1500 0.009515144862234592
1600 0.008651325479149818
1700 0.0079293018206954
1800 0.007317027077078819
1900 0.0067914011888206005
2000 0.0063354140147566795
2100 0.005936122499406338
2200 0.005583610851317644
2300 0.005270235240459442
2400 0.0049898019060492516
2500 0.004737417213618755
2600 0.0045091076754033566
2700 0.00430157920345664
2800 0.0041121249087154865
2900 0.003938550129532814
3000 0.003778901882469654
3100 0.0036316034384071827
3200 0.0034952133428305387
3300 0.0033686356619000435
3400 0.003250834997743368
3500 0.003140941495075822
3600 0.0030381898395717144
3700 0.0029418901540338993
3800 0.002851443365216255
3900 0.

In [6]:
#Code: xor-nn-wide-deep

X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]).to(device)
Y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)

# nn layers
linear1 = torch.nn.Linear(2, 10, bias=True)
linear2 = torch.nn.Linear(10, 10, bias=True)
linear3 = torch.nn.Linear(10, 10, bias=True)
linear4 = torch.nn.Linear(10, 1, bias=True)
sigmoid = torch.nn.Sigmoid()
model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid, linear3, sigmoid, linear4, sigmoid).to(device)

# define cost/loss & optimizer
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1)
for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    # cost/loss function
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
    if step % 100 == 0:
        print(step, cost.item())

0 0.6953924298286438
100 0.6931321620941162
200 0.6931297779083252
300 0.6931272745132446
400 0.6931246519088745
500 0.6931217908859253
600 0.6931189298629761
700 0.6931157112121582
800 0.6931122541427612
900 0.6931085586547852
1000 0.6931045055389404
1100 0.693100094795227
1200 0.6930952072143555
1300 0.6930897235870361
1400 0.693083643913269
1500 0.6930768489837646
1600 0.6930692195892334
1700 0.6930602192878723
1800 0.6930500268936157
1900 0.6930381059646606
2000 0.6930240988731384
2100 0.6930074691772461
2200 0.6929874420166016
2300 0.6929630041122437
2400 0.6929327249526978
2500 0.692894458770752
2600 0.6928451657295227
2700 0.6927798986434937
2800 0.6926906704902649
2900 0.6925640106201172
3000 0.6923754215240479
3100 0.6920760273933411
3200 0.6915575265884399
3300 0.6905418634414673
3400 0.6881334781646729
3500 0.6801938414573669
3600 0.6327389478683472
3700 0.5360207557678223
3800 0.02153431624174118
3900 0.0070287808775901794
4000 0.003920086659491062
4100 0.002649389673024416