In [1]:
from sklearn.datasets import load_iris

x, t = load_iris(return_X_y=True)
print(x.shape, t.shape)

(150, 4) (150,)


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data

In [3]:
x = torch.tensor(x, dtype=torch.float32)
t = torch.tensor(t, dtype=torch.int64)

print(type(x), type(t), x.dtype, t.dtype)

<class 'torch.Tensor'> <class 'torch.Tensor'> torch.float32 torch.int64


In [5]:
dataset = torch.utils.data.TensorDataset(x, t)
print(dataset)
print(dataset[0])
print(dataset[0][0], dataset[0][1])
print(len(dataset))

<torch.utils.data.dataset.TensorDataset object at 0x12d926fd0>
(tensor([5.1000, 3.5000, 1.4000, 0.2000]), tensor(0))
tensor([5.1000, 3.5000, 1.4000, 0.2000]) tensor(0)
150


In [6]:
n_train = int(len(dataset) * 0.6)
n_val = int(len(dataset) * 0.2)
n_test = len(dataset) - n_train - n_val
print(n_train, n_val, n_test)

90 30 30


In [7]:
torch.manual_seed(0)

train, val, test = torch.utils.data.random_split(dataset, [n_train, n_val, n_test])
print(len(train), len(val), len(test))

90 30 30


In [8]:
batch_size = 10

train_loader = torch.utils.data.DataLoader(train, batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val, batch_size)
test_loader = torch.utils.data.DataLoader(test, batch_size)

In [9]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(4, 4)
        self.fc2 = nn.Linear(4, 3)

    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        return x

In [10]:
torch.manual_seed(0)

net = Net()
print(net)

Net(
  (fc1): Linear(in_features=4, out_features=4, bias=True)
  (fc2): Linear(in_features=4, out_features=3, bias=True)
)


In [12]:
criterion = nn.CrossEntropyLoss()
print(criterion)

CrossEntropyLoss()


In [13]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)
print(optimizer)

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.1
    momentum: 0
    nesterov: False
    weight_decay: 0
)


In [14]:
batch = next(iter(train_loader))
print(batch)

[tensor([[6.4000, 3.2000, 5.3000, 2.3000],
        [6.4000, 2.7000, 5.3000, 1.9000],
        [5.9000, 3.0000, 4.2000, 1.5000],
        [6.9000, 3.1000, 5.4000, 2.1000],
        [5.5000, 2.4000, 3.8000, 1.1000],
        [5.6000, 2.7000, 4.2000, 1.3000],
        [5.8000, 2.7000, 5.1000, 1.9000],
        [6.9000, 3.1000, 4.9000, 1.5000],
        [6.3000, 2.3000, 4.4000, 1.3000],
        [7.2000, 3.0000, 5.8000, 1.6000]]), tensor([2, 2, 1, 2, 1, 1, 2, 1, 1, 2])]


In [15]:
x, t = batch
print(x)
print(t)

tensor([[6.4000, 3.2000, 5.3000, 2.3000],
        [6.4000, 2.7000, 5.3000, 1.9000],
        [5.9000, 3.0000, 4.2000, 1.5000],
        [6.9000, 3.1000, 5.4000, 2.1000],
        [5.5000, 2.4000, 3.8000, 1.1000],
        [5.6000, 2.7000, 4.2000, 1.3000],
        [5.8000, 2.7000, 5.1000, 1.9000],
        [6.9000, 3.1000, 4.9000, 1.5000],
        [6.3000, 2.3000, 4.4000, 1.3000],
        [7.2000, 3.0000, 5.8000, 1.6000]])
tensor([2, 2, 1, 2, 1, 1, 2, 1, 1, 2])


In [16]:
print(net.fc1.weight)
print(net.fc1.bias)

print(net.fc2.weight)
print(net.fc2.bias)

Parameter containing:
tensor([[-0.0037,  0.2682, -0.4115, -0.3680],
        [-0.1926,  0.1341, -0.0099,  0.3964],
        [-0.0444,  0.1323, -0.1511, -0.0983],
        [-0.4777, -0.3311, -0.2061,  0.0185]], requires_grad=True)
Parameter containing:
tensor([ 0.1977,  0.3000, -0.3390, -0.2177], requires_grad=True)
Parameter containing:
tensor([[ 0.1816,  0.4152, -0.1029,  0.3742],
        [-0.0806,  0.0529,  0.4527, -0.4638],
        [-0.3148, -0.1266, -0.1949,  0.4320]], requires_grad=True)
Parameter containing:
tensor([-0.3241, -0.2302, -0.3493], requires_grad=True)


In [17]:
y = net.forward(x)
print(y)

tensor([[-0.1763, -0.2113, -0.3944],
        [-0.2700, -0.2233, -0.3658],
        [-0.2746, -0.2239, -0.3644],
        [-0.2552, -0.2214, -0.3703],
        [-0.3241, -0.2302, -0.3493],
        [-0.3003, -0.2271, -0.3566],
        [-0.2212, -0.2171, -0.3807],
        [-0.3241, -0.2302, -0.3493],
        [-0.3241, -0.2302, -0.3493],
        [-0.3241, -0.2302, -0.3493]], grad_fn=<AddmmBackward>)


In [18]:
y = net(x)
print(y)

tensor([[-0.1763, -0.2113, -0.3944],
        [-0.2700, -0.2233, -0.3658],
        [-0.2746, -0.2239, -0.3644],
        [-0.2552, -0.2214, -0.3703],
        [-0.3241, -0.2302, -0.3493],
        [-0.3003, -0.2271, -0.3566],
        [-0.2212, -0.2171, -0.3807],
        [-0.3241, -0.2302, -0.3493],
        [-0.3241, -0.2302, -0.3493],
        [-0.3241, -0.2302, -0.3493]], grad_fn=<AddmmBackward>)


In [19]:
loss = criterion(y, t)
print(loss)

tensor(1.1118, grad_fn=<NllLossBackward>)


In [20]:
print(net.fc1.weight.grad)
print(net.fc1.bias.grad)
print(net.fc2.weight.grad)
print(net.fc2.bias.grad)

None
None
None
None


In [21]:
loss.backward()

In [22]:
print(net.fc1.weight.grad)
print(net.fc1.bias.grad)
print(net.fc2.weight.grad)
print(net.fc2.bias.grad)

tensor([[0.0000, 0.0000, 0.0000, 0.0000],
        [0.7165, 0.3319, 0.5857, 0.2248],
        [0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000]])
tensor([0.0000, 0.1137, 0.0000, 0.0000])
tensor([[ 0.0000,  0.0375,  0.0000,  0.0000],
        [ 0.0000,  0.0202,  0.0000,  0.0000],
        [ 0.0000, -0.0577,  0.0000,  0.0000]])
tensor([ 0.3361, -0.1451, -0.1909])


In [23]:
optimizer.step()

In [24]:
print(net.fc1.weight)
print(net.fc1.bias)
print(net.fc2.weight)
print(net.fc2.bias)

Parameter containing:
tensor([[-0.0037,  0.2682, -0.4115, -0.3680],
        [-0.2642,  0.1009, -0.0685,  0.3740],
        [-0.0444,  0.1323, -0.1511, -0.0983],
        [-0.4777, -0.3311, -0.2061,  0.0185]], requires_grad=True)
Parameter containing:
tensor([ 0.1977,  0.2886, -0.3390, -0.2177], requires_grad=True)
Parameter containing:
tensor([[ 0.1816,  0.4114, -0.1029,  0.3742],
        [-0.0806,  0.0509,  0.4527, -0.4638],
        [-0.3148, -0.1208, -0.1949,  0.4320]], requires_grad=True)
Parameter containing:
tensor([-0.3577, -0.2157, -0.3302], requires_grad=True)


In [25]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

max_epoch = 1

for epoch in range(max_epoch):

    for batch in train_loader:

        x, t = batch

        x = x.to(device)
        t = t.to(device)

        optimizer.zero_grad()

        y = net(x)

        loss = criterion(y, t)

        print('loss: ', loss.item())

        loss.backward()

        optimizer.step()

cpu
loss:  1.0713388919830322
loss:  1.0183022022247314
loss:  1.0249149799346924
loss:  1.0236347913742065
loss:  0.9283109903335571
loss:  1.0791465044021606
loss:  0.9329780340194702
loss:  0.9698535203933716
loss:  0.9219404458999634


In [26]:
print(net.fc1.weight.grad)
print(net.fc1.bias.grad)
print(net.fc2.weight.grad)
print(net.fc2.bias.grad)

tensor([[-0.2115, -0.1763, -0.0029,  0.0160],
        [ 0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000]])
tensor([-0.0469,  0.0000,  0.0000,  0.0000])
tensor([[-0.2135,  0.0000,  0.0000,  0.0000],
        [ 0.0884,  0.0000,  0.0000,  0.0000],
        [ 0.1251,  0.0000,  0.0000,  0.0000]])
tensor([ 0.0920,  0.0332, -0.1252])
