In [2]:
from sklearn.datasets import load_iris
import torch

In [3]:
# load iris data set
x, t = load_iris(return_X_y=True)

# check size
x.shape, t.shape

((150, 4), (150,))

In [4]:
# check type
type(x), type(t)

(numpy.ndarray, numpy.ndarray)

In [5]:
# check data type
x.dtype, t.dtype

(dtype('float64'), dtype('int32'))

In [6]:
x = torch.tensor(x, dtype=torch.float32)
t = torch.tensor(t, dtype=torch.int64)

# check type
type(x), type(t)

(torch.Tensor, torch.Tensor)

In [7]:
# check data type
x.dtype, t.dtype

(torch.float32, torch.int64)

In [8]:
from torch.utils.data import TensorDataset

# concat input values and target values, and convert to a Dataset object
dataset = TensorDataset(x, t)
dataset

<torch.utils.data.dataset.TensorDataset at 0x13f0141d4e0>

In [9]:
# check type
type(dataset)

torch.utils.data.dataset.TensorDataset

In [10]:
# values of sample 1
dataset[0]

(tensor([5.1000, 3.5000, 1.4000, 0.2000]), tensor(0))

In [11]:
# check type
type(dataset[0])

tuple

In [12]:
# input value of sample 1
dataset[0][0]

tensor([5.1000, 3.5000, 1.4000, 0.2000])

In [13]:
# target value of sample 1
dataset[0][1]

tensor(0)

In [14]:
# number of samples
len(dataset)

150

In [15]:
# number of sammples for each dataset
# train : val : test = 60% : 20% : 20%
n_train = int(len(dataset) * 0.6)
n_val = int(len(dataset) * 0.2)
n_test = len(dataset) - n_train - n_val

# check each number of sample
n_train, n_val, n_test

(90, 30, 30)

In [16]:
from torch.utils.data import random_split

In [17]:
# fix seed to maintain reproductivity for random split
torch.manual_seed(0)

# split dataset
train, val, test = random_split(dataset, [n_train, n_val, n_test])

# check sample num
len(train), len(val), len(test)

(90, 30, 30)

In [18]:
# batch size
batch_size = 10

In [19]:
from torch.utils.data import DataLoader

# shuffle -> True only for train data
train_loader = DataLoader(train, batch_size, shuffle=True)
val_loader = DataLoader(val, batch_size)
test_loader = DataLoader(test, batch_size)

In [20]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    # define objects
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(4, 4)
        self.fc2 = nn.Linear(4, 3)

    # forward prop.
    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        return x

In [21]:
# fix random number seed to maintain reproductivity
torch.manual_seed(0)

# generate instance
net = Net()

# check network
net

Net(
  (fc1): Linear(in_features=4, out_features=4, bias=True)
  (fc2): Linear(in_features=4, out_features=3, bias=True)
)

In [22]:
criterion = nn.CrossEntropyLoss()
criterion

CrossEntropyLoss()

In [23]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)
optimizer

SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    lr: 0.1
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)

In [24]:
# get samples of batch size
batch = next(iter(train_loader))
batch

[tensor([[5.4000, 3.9000, 1.7000, 0.4000],
         [4.6000, 3.6000, 1.0000, 0.2000],
         [6.5000, 3.0000, 5.5000, 1.8000],
         [6.9000, 3.1000, 5.4000, 2.1000],
         [6.3000, 2.5000, 4.9000, 1.5000],
         [7.1000, 3.0000, 5.9000, 2.1000],
         [5.8000, 2.7000, 4.1000, 1.0000],
         [7.0000, 3.2000, 4.7000, 1.4000],
         [6.7000, 3.0000, 5.0000, 1.7000],
         [7.2000, 3.6000, 6.1000, 2.5000]]),
 tensor([0, 0, 2, 2, 1, 2, 1, 1, 1, 2])]

In [25]:
# split input values and target values
x, t = batch

# check input values
x

tensor([[5.4000, 3.9000, 1.7000, 0.4000],
        [4.6000, 3.6000, 1.0000, 0.2000],
        [6.5000, 3.0000, 5.5000, 1.8000],
        [6.9000, 3.1000, 5.4000, 2.1000],
        [6.3000, 2.5000, 4.9000, 1.5000],
        [7.1000, 3.0000, 5.9000, 2.1000],
        [5.8000, 2.7000, 4.1000, 1.0000],
        [7.0000, 3.2000, 4.7000, 1.4000],
        [6.7000, 3.0000, 5.0000, 1.7000],
        [7.2000, 3.6000, 6.1000, 2.5000]])

In [26]:
# check target values
t

tensor([0, 0, 2, 2, 1, 2, 1, 1, 1, 2])

In [27]:
# weight of fully connected layer fc1
net.fc1.weight

Parameter containing:
tensor([[-0.0037,  0.2682, -0.4115, -0.3680],
        [-0.1926,  0.1341, -0.0099,  0.3964],
        [-0.0444,  0.1323, -0.1511, -0.0983],
        [-0.4777, -0.3311, -0.2061,  0.0185]], requires_grad=True)

In [28]:
# bias of fully connected layer fc1
net.fc1.bias

Parameter containing:
tensor([ 0.1977,  0.3000, -0.3390, -0.2177], requires_grad=True)

In [29]:
# weight of fully connected layer fc2
net.fc2.weight

Parameter containing:
tensor([[ 0.1816,  0.4152, -0.1029,  0.3742],
        [-0.0806,  0.0529,  0.4527, -0.4638],
        [-0.3148, -0.1266, -0.1949,  0.4320]], requires_grad=True)

In [30]:
# bias of fully connected layer fc2
net.fc2.bias

Parameter containing:
tensor([-0.3241, -0.2302, -0.3493], requires_grad=True)

In [31]:
# calculate predicted values
y = net.forward(x)
y

tensor([[-0.2557, -0.2605, -0.4679],
        [-0.2041, -0.2834, -0.5574],
        [-0.2786, -0.2244, -0.3632],
        [-0.2552, -0.2214, -0.3703],
        [-0.3241, -0.2302, -0.3493],
        [-0.2788, -0.2244, -0.3631],
        [-0.3241, -0.2302, -0.3493],
        [-0.3241, -0.2302, -0.3493],
        [-0.3090, -0.2282, -0.3539],
        [-0.1884, -0.2129, -0.3907]], grad_fn=<AddmmBackward0>)

In [32]:
y = net(x)
y

tensor([[-0.2557, -0.2605, -0.4679],
        [-0.2041, -0.2834, -0.5574],
        [-0.2786, -0.2244, -0.3632],
        [-0.2552, -0.2214, -0.3703],
        [-0.3241, -0.2302, -0.3493],
        [-0.2788, -0.2244, -0.3631],
        [-0.3241, -0.2302, -0.3493],
        [-0.3241, -0.2302, -0.3493],
        [-0.3090, -0.2282, -0.3539],
        [-0.1884, -0.2129, -0.3907]], grad_fn=<AddmmBackward0>)

In [33]:
# calculate loss function
# use call method of criterion
loss = criterion(y, t)
loss

tensor(1.0882, grad_fn=<NllLossBackward0>)

In [34]:
# check grads
net.fc1.weight.grad
net.fc1.bias.grad
net.fc2.weight.grad
net.fc2.bias.grad

In [35]:
loss.backward()

In [36]:
net.fc1.weight.grad

tensor([[-0.2311, -0.1731, -0.0627, -0.0139],
        [ 0.7327,  0.3358,  0.6025,  0.2229],
        [ 0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000]])

In [37]:
net.fc1.bias.grad

tensor([-0.0461,  0.1060,  0.0000,  0.0000])

In [38]:
net.fc2.weight.grad

tensor([[-0.0652,  0.0259,  0.0000,  0.0000],
        [ 0.0366,  0.0227,  0.0000,  0.0000],
        [ 0.0285, -0.0486,  0.0000,  0.0000]])

In [39]:
net.fc2.bias.grad

tensor([ 0.1415, -0.0452, -0.0963])

In [40]:
# update parameters with information of grads
optimizer.step()

In [41]:
net.fc1.weight

Parameter containing:
tensor([[ 0.0194,  0.2855, -0.4053, -0.3666],
        [-0.2659,  0.1005, -0.0702,  0.3742],
        [-0.0444,  0.1323, -0.1511, -0.0983],
        [-0.4777, -0.3311, -0.2061,  0.0185]], requires_grad=True)

In [42]:
net.fc1.bias

Parameter containing:
tensor([ 0.2023,  0.2894, -0.3390, -0.2177], requires_grad=True)

In [43]:
net.fc2.weight

Parameter containing:
tensor([[ 0.1881,  0.4126, -0.1029,  0.3742],
        [-0.0843,  0.0506,  0.4527, -0.4638],
        [-0.3176, -0.1217, -0.1949,  0.4320]], requires_grad=True)

In [44]:
net.fc2.bias

Parameter containing:
tensor([-0.3382, -0.2256, -0.3397], requires_grad=True)

In [45]:
torch.cuda.is_available()

False

In [46]:
# epoch num
max_epoch = 1

# initialize network
torch.manual_seed(0)

# device selection according to the situation of GPU setting
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# network instantiation and transfer to device
net = Net().to(device)

# select optimization technique
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)

for epoch in range(max_epoch):
    for batch in train_loader:
            # get samples of batich size number
            x, t = batch

            # transfer data to device for learning
            x = x.to(device)
            t = t.to(device)

            # initialization of grads
            optimizer.zero_grad()

            # calculate prediction value
            y = net(x)

            # calculate loss value by target value and prediction value
            loss = criterion(y, t)

            # show loss function value
            # .item() : tensor.Tensor => float
            print('loss: ', loss.item())

            # calculate grad of each parameter
            loss.backward()

            # update parameters with grads
            optimizer.step()


loss:  1.0881630182266235
loss:  1.0393922328948975
loss:  1.002811312675476
loss:  1.0250868797302246
loss:  1.0088638067245483
loss:  0.9351975321769714
loss:  0.8939588665962219
loss:  0.9765418171882629
loss:  0.9651519060134888


In [47]:
# get index num of max value in each row
y_label = torch.argmax(y, dim=1)

# show result
y_label

tensor([0, 1, 0, 1, 1, 1, 0, 1, 0, 0])

In [48]:
# target value
t

tensor([1, 1, 0, 2, 1, 2, 0, 2, 1, 1])

In [49]:
y_label == t

tensor([False,  True,  True, False,  True, False,  True, False, False, False])

In [50]:
# sum of True
(y_label == t).sum()

tensor(4)

In [51]:
# int => float
(y_label == t).sum() * 1.0

tensor(4.)

In [53]:
# accuracy
acc = (y_label == t).sum() * 1.0 / len(t)
acc

tensor(0.4000)

In [57]:
# network initialization
torch.manual_seed(0)

# instantiation of network and transfer to device
net = Net().to(device)

# select optimization technique
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)

max_epoch = 10

for epoch in range(max_epoch):
    for batch in train_loader:
        x, t = batch
        x = x.to(device)
        t - t.to(device)

        optimizer.zero_grad()

        y = net(x)

        loss = criterion(y, t)

        # calculate accuracy
        y_label = torch.argmax(y, dim=1)
        acc = (y_label == t).sum() * 1.0 / len(t)
        print('accuracy: ', acc)

        loss.backward()

        optimizer.step()

accuracy:  tensor(0.6000)
accuracy:  tensor(0.8000)
accuracy:  tensor(0.6000)
accuracy:  tensor(0.6000)
accuracy:  tensor(0.6000)
accuracy:  tensor(0.5000)
accuracy:  tensor(0.4000)
accuracy:  tensor(0.2000)
accuracy:  tensor(0.4000)
accuracy:  tensor(0.7000)
accuracy:  tensor(0.2000)
accuracy:  tensor(0.8000)
accuracy:  tensor(0.5000)
accuracy:  tensor(0.6000)
accuracy:  tensor(0.6000)
accuracy:  tensor(0.4000)
accuracy:  tensor(0.8000)
accuracy:  tensor(0.2000)
accuracy:  tensor(0.5000)
accuracy:  tensor(1.)
accuracy:  tensor(0.7000)
accuracy:  tensor(0.9000)
accuracy:  tensor(0.7000)
accuracy:  tensor(0.6000)
accuracy:  tensor(0.5000)
accuracy:  tensor(0.7000)
accuracy:  tensor(0.7000)
accuracy:  tensor(0.8000)
accuracy:  tensor(0.6000)
accuracy:  tensor(1.)
accuracy:  tensor(0.6000)
accuracy:  tensor(0.7000)
accuracy:  tensor(0.4000)
accuracy:  tensor(0.8000)
accuracy:  tensor(0.6000)
accuracy:  tensor(0.9000)
accuracy:  tensor(0.7000)
accuracy:  tensor(0.7000)
accuracy:  tensor(0.

In [58]:
def calc_acc(data_loader):
    with torch.no_grad():
        accs = [] # for result of each batch

        for batch in data_loader:
            x, t = batch
            x = x.to(device)
            t = t.to(device)
            y = net(x)

            y_label = torch.argmax(y, dim=1)
            acc = (y_label == t).sum() * 1.0 / len(t)
            accs.append(acc)

    # calculate whole average
    avg_acc = torch.tensor(accs).mean()

    return avg_acc

# check with val_data
val_acc = calc_acc(val_loader)
val_acc

tensor(0.7667)

In [59]:
# check with test_data
test_acc = calc_acc(test_loader)
test_acc

tensor(0.6667)