# Backpropagation

In [10]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms

In [11]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [12]:
# parameters
learning_rate = 0.5
batch_size = 10

In [13]:
# MNIST dataset
mnist_train = dsets.MNIST(root='MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

In [14]:

# dataset loader
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

In [15]:
# nn Layers
w1 = torch.nn.Parameter(torch.Tensor(784, 30)).to(device)
b1 = torch.nn.Parameter(torch.Tensor(30)).to(device)
w2 = torch.nn.Parameter(torch.Tensor(30, 10)).to(device)
b2 = torch.nn.Parameter(torch.Tensor(10)).to(device)

#nn.linear 2개 사용한 것과 같음

In [17]:
torch.nn.init.normal_(w1)
torch.nn.init.normal_(b1)
torch.nn.init.normal_(w2)
torch.nn.init.normal_(b2)

Parameter containing:
tensor([ 1.0955,  0.6607,  0.0665,  0.6106,  0.8472, -0.1985,  0.2002,  0.4387,
        -0.5780, -0.4606], requires_grad=True)

In [7]:
def sigmoid(x):
    #sigmoid function
    return 1.0 / (1.0 + torch.exp(-x))

def sigmoid_prime(x):  #derivative of the sigmoid function
    return sigmoid(x) * (1 - sigmoid(x))

In [30]:
X_test = mnist_test.test_data.view(-1, 28*28).float().to(device)[:1000]
Y_test = mnist_test.test_labels.to(device)[:1000]
i = 0
while not i == 10000:
    for X, Y in data_loader:
        i += 1
        
        # forward
        X = X.view(-1, 28 * 28).to(device)
        Y = torch.zeros((batch_size, 10)).scatter_(1, Y.unsqueeze(1), 1).to(device) #one-hot으로 바꿈
        l1 = torch.add(torch.matmul(X, w1), b1)
        a1 = sigmoid(l1)
        l2 = torch.add(torch.matmul(a1, w2), b2)
        y_pred = sigmoid(l2)
        # Error = 1/2(Y - y_pred)^2 -> 미분하면 y_pred -Y 
        diff = y_pred - Y
        
        # Back prop (chain rule)
        d_l2 = diff * sigmoid_prime(l2)
        d_b2 = d_l2
        d_w2 = torch.matmul(torch.transpose(a1, 0, 1), d_l2)
        
        d_a1 = torch.matmul(d_l2, torch.transpose(w2, 0, 1))
        d_l1 = d_a1 * sigmoid_prime(l1)
        d_b1 = d_l1
        d_w1 = torch.matmul(torch.transpose(X, 0, 1), d_l1)

        w1 = w1 - learning_rate * d_w1
        b1 = b1 - learning_rate * torch.mean(d_b1, 0)
        w2 = w2 - learning_rate * d_w2
        b2 = b2 - learning_rate * torch.mean(d_b2, 0)

        if i % 1000 == 0:
            l1 = torch.add(torch.matmul(X_test, w1), b1)
            a1 = sigmoid(l1)
            l2 = torch.add(torch.matmul(a1, w2), b2)
            y_pred = sigmoid(l2)
            acct_mat = torch.argmax(y_pred, 1) == Y_test
            
            acct_res = acct_mat.sum()
            print(acct_res.item())

        if i == 10000:
            break

    

929
923
928
938
928
931
935
929
934
932


## xor- nn

In [18]:
X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]).to(device)
Y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)

In [19]:
#nn layers
linear1 = torch.nn.Linear(2, 2, bias=True)  # MLP
linear2 = torch.nn.Linear(2, 1, bias=True)
sigmoid = torch.nn.Sigmoid()
model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid).to(device)

# define cost/Loss * optimizer
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1)
for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    # cost/Loss function
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
    if step % 100 == 0:
        print(step, cost.item())

0 0.698509693145752
100 0.6929240226745605
200 0.6923138499259949
300 0.6899295449256897
400 0.6721627712249756
500 0.5749703049659729
600 0.3096465766429901
700 0.09439206123352051
800 0.05063152313232422
900 0.033998824656009674
1000 0.02542046457529068
1100 0.020230667665600777
1200 0.016768211498856544
1300 0.014300236478447914
1400 0.012455277144908905
1500 0.01102551631629467
1600 0.009885938838124275
1700 0.008956958539783955
1800 0.008185474202036858
1900 0.007534815929830074
2000 0.006978826597332954
2100 0.006498412694782019
2200 0.00607915548607707
2300 0.005710202734917402
2400 0.005383028648793697
2500 0.005090941675007343
2600 0.004828638397157192
2700 0.0045917825773358345
2800 0.004376884084194899
2900 0.004181038588285446
3000 0.004001826513558626
3100 0.003837218740954995
3200 0.003685502801090479
3300 0.003545236773788929
3400 0.0034151594154536724
3500 0.003294220194220543
3600 0.00318150338716805
3700 0.0030761698726564646
3800 0.002977558644488454
3900 0.002884995

## xor-nn-wide-deep



In [20]:
X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]).to(device)
Y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)

In [25]:
#nn layers
linear1 = torch.nn.Linear(2, 10, bias=True)  # MLP
linear2 = torch.nn.Linear(10, 10, bias=True)
linear3 = torch.nn.Linear(10, 10, bias=True)
linear4 = torch.nn.Linear(10, 1, bias=True)
sigmoid = torch.nn.Sigmoid()

model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid, linear3, sigmoid, linear4, sigmoid).to(device)

# define cost/Loss * optimizer
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1)

for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    # cost/Loss function
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
    if step % 100 == 0:
        print(step, cost.item())

0 0.7792053818702698
100 0.6931605339050293
200 0.6931594610214233
300 0.6931584477424622
400 0.693157434463501
500 0.6931564807891846
600 0.6931555271148682
700 0.6931545734405518
800 0.6931536197662354
900 0.6931527853012085
1000 0.6931518912315369
1100 0.6931511163711548
1200 0.6931502223014832
1300 0.6931493878364563
1400 0.6931484937667847
1500 0.6931477189064026
1600 0.6931468844413757
1700 0.6931460499763489
1800 0.6931451559066772
1900 0.6931443214416504
2000 0.693143367767334
2100 0.6931425333023071
2200 0.6931415796279907
2300 0.6931406855583191
2400 0.6931396722793579
2500 0.693138599395752
2600 0.6931375861167908
2700 0.6931365728378296
2800 0.6931354403495789
2900 0.6931343078613281
3000 0.6931330561637878
3100 0.6931318044662476
3200 0.6931304335594177
3300 0.6931289434432983
3400 0.6931275129318237
3500 0.69312584400177
3600 0.6931241750717163
3700 0.6931222677230835
3800 0.6931203603744507
3900 0.6931183338165283
4000 0.6931160092353821
4100 0.6931135654449463
4200 0.69