In [70]:
import torch
import torch.nn as nn
torch.manual_seed(0)

<torch._C.Generator at 0x11c0de3d0>

### Linear model

In [71]:
# 3 nodes -> 2 nodes (fully-connected layer)
fc = nn.Linear(3,2)
fc

Linear(in_features=3, out_features=2, bias=True)

In [72]:
fc.weight

Parameter containing:
tensor([[-0.0043,  0.3097, -0.4752],
        [-0.4249, -0.2224,  0.1548]], requires_grad=True)

In [73]:
fc.bias

Parameter containing:
tensor([-0.0114,  0.4578], requires_grad=True)

### Use tensor as a data

In [74]:
x = torch.tensor([[1,2,3]],dtype=torch.float)
x.dtype

torch.float32

In [75]:
u = fc(x)
u

tensor([[-0.8219,  0.0526]], grad_fn=<AddmmBackward0>)

### Nonlinear 

In [76]:
import torch.nn.functional as F

In [77]:
# ReLU
h = F.relu(u)
h

tensor([[0.0000, 0.0526]], grad_fn=<ReluBackward0>)

### loss function

In [78]:
# True
t = torch.tensor([[1],[3]],dtype=torch.float)
# Predict
y = torch.tensor([[2],[4]],dtype=torch.float)
# MSE
F.mse_loss(y,t)

tensor(1.)

## Train with real data

In [79]:
from sklearn.datasets import load_iris

In [80]:
iris = load_iris()

In [81]:
x = iris['data']
t = iris['target']
type(x), type(t)

(numpy.ndarray, numpy.ndarray)

In [82]:
x = torch.tensor(x,dtype=torch.float32)
t = torch.tensor(t,dtype=torch.int64)
x.shape, t.shape

(torch.Size([150, 4]), torch.Size([150]))

### Dataloader

<img src="1.png" width=500>

In [83]:
dataset = torch.utils.data.TensorDataset(x,t)
dataset[0]

(tensor([5.1000, 3.5000, 1.4000, 0.2000]), tensor(0))

### Dataset split

In [84]:
# train : val : test = 60 : 20 : 20
n_train = int(len(dataset)*0.6)
n_val = int(len(dataset)*0.2)
n_test = len(dataset) - n_train - n_val

In [85]:
train, val, test = torch.utils.data.random_split(dataset, [n_train, n_val, n_test])

### Minibatch
<img src="2.png" width=500>

In [86]:
batch_size = 10
train_loader = torch.utils.data.DataLoader(train, batch_size, shuffle=True, drop_last=True)
val_loader = torch.utils.data.DataLoader(val, batch_size)
test_loader = torch.utils.data.DataLoader(test, batch_size)

In [87]:
x, t = next(iter(train_loader))
print(x, t)

tensor([[5.1000, 3.7000, 1.5000, 0.4000],
        [7.2000, 3.2000, 6.0000, 1.8000],
        [4.8000, 3.4000, 1.6000, 0.2000],
        [4.4000, 3.0000, 1.3000, 0.2000],
        [5.4000, 3.0000, 4.5000, 1.5000],
        [6.8000, 3.2000, 5.9000, 2.3000],
        [5.1000, 3.3000, 1.7000, 0.5000],
        [7.7000, 2.8000, 6.7000, 2.0000],
        [6.3000, 2.8000, 5.1000, 1.5000],
        [5.8000, 2.7000, 4.1000, 1.0000]]) tensor([0, 2, 0, 0, 1, 2, 0, 2, 2, 1])


### Define NN model
<img src="3.png" width=500>

In [102]:
# 4 -> 4 -> 3 
import torch.nn as nn
class Net(nn.Module):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.fc1 = nn.Linear(4,4)
        self.fc2 = nn.Linear(4,3)
    
    def forward(self, x):
        h = self.fc1(x)
        h = F.relu(h)
        h = self.fc2(h)
        return h

In [89]:
torch.manual_seed(0)
net = Net()
net

Net(
  (fc1): Linear(in_features=4, out_features=4, bias=True)
  (fc2): Linear(in_features=4, out_features=3, bias=True)
)

In [90]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

In [91]:
batch = next(iter(train_loader))
x, t = batch
batch

[tensor([[6.7000, 3.1000, 5.6000, 2.4000],
         [6.2000, 2.2000, 4.5000, 1.5000],
         [6.4000, 3.2000, 5.3000, 2.3000],
         [6.2000, 3.4000, 5.4000, 2.3000],
         [5.0000, 3.5000, 1.3000, 0.3000],
         [5.5000, 2.4000, 3.7000, 1.0000],
         [6.4000, 3.2000, 4.5000, 1.5000],
         [5.0000, 3.0000, 1.6000, 0.2000],
         [4.9000, 2.5000, 4.5000, 1.7000],
         [5.4000, 3.4000, 1.5000, 0.4000]]),
 tensor([2, 1, 2, 2, 0, 1, 1, 0, 2, 0])]

In [92]:
y = net.foward(x)
loss = F.cross_entropy(y,t)
loss

tensor(1.1148, grad_fn=<NllLossBackward0>)

In [93]:
print(net.fc1.weight.grad) # -> None

None


In [94]:
loss.backward()

In [95]:
print(net.fc1.weight.grad)

tensor([[-0.3655, -0.2347, -0.1046, -0.0213],
        [ 0.6653,  0.3353,  0.5655,  0.2341],
        [ 0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000]])


In [96]:
optimizer.step()

### GPU
Mac does not accept cuda, so use MPS

In [97]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

In [98]:
net.to(device)
x = x.to(device)
t = t.to(device)

### In loop

In [124]:
max_epoch = 1
net = Net().to(device)
optimizer = torch.optim.SGD(net.parameters(),lr=0.1)

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

for epoch in range(max_epoch):

    for batch in train_loader:
        
        x, t = batch

        x = x.to(device)
        t = t.to(device)

        y_pred = net(x)

        loss = F.cross_entropy(y_pred, t)

        y_pred_label = torch.argmax(y_pred, dim=1)
        accuracy = (y_pred_label==t).sum().float()/len(t)
        print(f'accuracy: {accuracy:.2f}')

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

accuracy: 0.30
accuracy: 0.50
accuracy: 0.90
accuracy: 0.50
accuracy: 0.30
accuracy: 0.40
accuracy: 0.60
accuracy: 0.60
accuracy: 0.70


### Evaluation metrics

In [None]:
x, t = next(iter(train_loader))
x = x.to(device)
t = t.to(device)
y_pred = net(x)

y_pred_label = torch.argmax(y_pred,dim=1)
accuracy = sum(y_pred_label==t).float()/len(t)
accuracy

tensor(0.3000, device='mps:0')

### whole program

tensor(0.0477, grad_fn=<DivBackward0>)


In [130]:
for batch in test_loader:
    x, t = batch
    x = x.to(device)
    t = t.to(device)
    y_pred = net.forward(x)
    print(y_pred, t)
    break
y_pred

tensor([[ 1.3932,  0.3338, -0.6462],
        [-0.8548,  0.2410,  0.3896],
        [-0.3207,  0.0399,  0.1365],
        [-0.6582,  0.0810,  0.2937],
        [ 1.1375,  0.1803, -0.5328],
        [-0.1489, -0.0412,  0.0546],
        [-0.5277, -0.0252,  0.2301],
        [-0.7388,  0.1466,  0.3330],
        [-0.6743,  0.0941,  0.3016],
        [ 1.1537,  0.1501, -0.5412]], device='mps:0',
       grad_fn=<LinearBackward0>) tensor([0, 2, 1, 2, 0, 1, 2, 2, 2, 0], device='mps:0')


tensor([[ 1.3932,  0.3338, -0.6462],
        [-0.8548,  0.2410,  0.3896],
        [-0.3207,  0.0399,  0.1365],
        [-0.6582,  0.0810,  0.2937],
        [ 1.1375,  0.1803, -0.5328],
        [-0.1489, -0.0412,  0.0546],
        [-0.5277, -0.0252,  0.2301],
        [-0.7388,  0.1466,  0.3330],
        [-0.6743,  0.0941,  0.3016],
        [ 1.1537,  0.1501, -0.5412]], device='mps:0',
       grad_fn=<LinearBackward0>)

In [126]:
import numpy as np
a= [1,2,3]
np.mean(a)

np.float64(2.0)