## Neuron

In [1]:
import torch

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [9]:
X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]).to(device)
Y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)

In [10]:
linear = torch.nn.Linear(2, 1, bias=True)
sigmoid = torch.nn.Sigmoid()

In [11]:
model = torch.nn.Sequential(linear, sigmoid).to(device)

In [12]:
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1)

In [13]:
for step in range(1, 1000 + 1):
    optimizer.zero_grad()
    hypothesis = model(X)

    # cost/loss function
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()

    if step % 100 == 0:
        print(step, cost.item())

100 0.6931473016738892
200 0.6931471824645996
300 0.6931471228599548
400 0.6931471824645996
500 0.6931471824645996
600 0.6931471824645996
700 0.6931471824645996
800 0.6931471824645996
900 0.6931471824645996
1000 0.6931471824645996


In [14]:
with torch.no_grad():
    hypothesis = model(X)
    predicted = (hypothesis > 0.5).float()
    accuracy = (predicted == Y).float().mean()
    print('\nHypothesis: ', hypothesis.detach().cpu().numpy(), '\nCorrect: ', predicted.detach().cpu().numpy(), '\nAccuracy: ', accuracy.item())


Hypothesis:  [[0.5]
 [0.5]
 [0.5]
 [0.5]] 
Correct:  [[0.]
 [0.]
 [0.]
 [0.]] 
Accuracy:  0.5


In [16]:
linear1 = torch.nn.Linear(2, 2, bias=True)
linear2 = torch.nn.Linear(2, 1, bias=True)
sigmoid = torch.nn.Sigmoid()
model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid).to(device)
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1)  # modified learning rate from 0.1 to 1

In [17]:
for step in range(1, 1001):
    optimizer.zero_grad()
    hypothesis = model(X)

    # cost/loss function
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()

    if step % 100 == 0:
        print(step, cost.item())

100 0.6918689012527466
200 0.6841354370117188
300 0.6265922784805298
400 0.4906574487686157
500 0.41159769892692566
600 0.3829862177371979
700 0.37076425552368164
800 0.364362895488739
900 0.3605150580406189
1000 0.35797661542892456


In [18]:
with torch.no_grad():
    hypothesis = model(X)
    predicted = (hypothesis > 0.5).float()
    accuracy = (predicted == Y).float().mean()
    print('\nHypothesis: ', hypothesis.detach().cpu().numpy(), '\nCorrect: ', predicted.detach().cpu().numpy(), '\nAccuracy: ', accuracy.item())


Hypothesis:  [[0.01046313]
 [0.4975911 ]
 [0.9842061 ]
 [0.50707996]] 
Correct:  [[0.]
 [0.]
 [1.]
 [1.]] 
Accuracy:  0.5


In [31]:
learning_rate = 0.001
training_epochs = 15
batch_size = 100

In [19]:
linear1 = torch.nn.Linear(784, 256, bias=True)
linear2 = torch.nn.Linear(256, 256, bias=True)
linear3 = torch.nn.Linear(256, 10, bias=True)
relu = torch.nn.ReLU()

In [20]:
torch.nn.init.normal_(linear1.weight)
torch.nn.init.normal_(linear2.weight)
torch.nn.init.normal_(linear3.weight)

Parameter containing:
tensor([[ 0.6112, -0.1613, -0.8438,  ..., -1.9287, -1.2689, -0.3246],
        [-0.6433,  0.0267, -0.5696,  ..., -0.9983,  1.0426,  1.6787],
        [-0.9363, -1.1257, -0.0134,  ..., -1.5749, -1.7829,  0.9579],
        ...,
        [-0.6495,  0.2433, -1.3107,  ..., -2.3015, -0.0464,  0.9989],
        [-1.1110,  0.0055,  3.2523,  ...,  0.0344,  1.8057,  0.6736],
        [ 0.3127,  0.7949,  1.1529,  ..., -0.3934, -0.4496, -0.4538]],
       requires_grad=True)

In [21]:
model = torch.nn.Sequential(linear1, relu, linear2, relu, linear3).to(device)

In [24]:
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [26]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import random

In [27]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
random.seed(111)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [32]:
mnist_train = dsets.MNIST(root='MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

In [33]:
total_batch = len(data_loader)
for epoch in range(training_epochs):
    avg_cost = 0

    for X, Y in data_loader:
        # reshape input image into [batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

Epoch: 0001 cost = 157.098037720
Epoch: 0002 cost = 38.048709869
Epoch: 0003 cost = 23.522697449
Epoch: 0004 cost = 16.428840637
Epoch: 0005 cost = 12.003909111
Epoch: 0006 cost = 8.852062225
Epoch: 0007 cost = 6.621397495
Epoch: 0008 cost = 5.022294044
Epoch: 0009 cost = 3.792726517
Epoch: 0010 cost = 2.738862276
Epoch: 0011 cost = 2.056757212
Epoch: 0012 cost = 1.563541651
Epoch: 0013 cost = 1.161724329
Epoch: 0014 cost = 0.946739972
Epoch: 0015 cost = 0.730698824
Learning finished
