# Lecture 16. PyTorch Basics

> Eunmi Kim    
 계산과학 프로그래밍 및 실습


---


In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch

In [2]:
print("Torch version: {}".format(torch.__version__))

Torch version: 2.1.0+cu118


## 1. Tensors

### 1.1 Tensor 생성

In [3]:
# tensor:pytorch에서 사용하는 array
# Directly from data
data = [[1, 2], [3, 4]]
x_data = torch.tensor(data)
print(x_data)

tensor([[1, 2],
        [3, 4]])


In [4]:
# From a NumPy array (and vice versa)
#np.array랑 비슷
np_array = np.array(data)
x_np = torch.tensor(np_array)
print(x_np)
np.array(x_np)

tensor([[1, 2],
        [3, 4]])


array([[1, 2],
       [3, 4]])

In [5]:
shape = (2, 3)
zeros_tensor = torch.zeros(shape)
print("Zeros Tensor:\n", zeros_tensor)

x_ones = torch.ones_like(x_data) # retains the properties of x_data
print("\n Ones Tensor: \n", x_ones)

x_rand = torch.rand_like(x_data, dtype=torch.float) # overrides the datatype of x_data
print("\n Random Tensor: \n", x_rand)

Zeros Tensor:
 tensor([[0., 0., 0.],
        [0., 0., 0.]])

 Ones Tensor: 
 tensor([[1, 1],
        [1, 1]])

 Random Tensor: 
 tensor([[0.2883, 0.7178],
        [0.2758, 0.1301]])


In [None]:
# shape, type, device
t1 = torch.rand(4, 3)

print("Shape of t1: ", t1.shape)
print("Datatype of t1: ", t1.dtype)
print("Device t1 is stored on: ", t1.device) #tensor가 어디에 저장되어있는지

Shape of t1:  torch.Size([4, 3])
Size of t1:  <built-in method size of Tensor object at 0x7aa0005992b0>
Datatype of t1:  torch.float32
Device t1 is stored on:  cpu


In [None]:
# gpu 사용
if torch.cuda.is_available():
    t1 = t1.to('cuda')
print("Device t1 is stored on: ", t1.device)

Device t1 is stored on:  cuda:0


In [None]:
# type casting
lt = torch.LongTensor([1, 2, 3, 4]) #64bit integer 저장
print(lt)
print(lt.float()) #이렇게 float으로 casting

tensor([1, 2, 3, 4])
tensor([1., 2., 3., 4.])


### 1.2 Tensor Operations

NumPy와 비슷하다.

In [None]:
t2 = torch.FloatTensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
print('First row: ', t2[0])
print('First column: ', t2[:, 0])

First row:  tensor([1., 2., 3.])
First column:  tensor([ 1.,  4.,  7., 10.])


In [None]:
t2[:, 1] = 0
print(t2)

tensor([[ 1.,  0.,  3.],
        [ 4.,  0.,  6.],
        [ 7.,  0.,  9.],
        [10.,  0., 12.]])


In [None]:
# multiplication
x = t1 * t2 #t1은 gpu, t2는 cpu에 있어서 서로 연산 안됨 (같은 device에 저장되어있어야함)
print(x)

RuntimeError: ignored

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using {} device'.format(device))

t2 = t2.to(device)

Using cuda device


In [None]:
# matrix multiplication (y1, y2, y3 will have the same value)
y1 = t1 @ t2.T
y2 = t1.matmul(t2.T)
y3 = torch.matmul(t1, t2.T)
#다 같은 디바이스~ 다 같은 결과 ~ 다 같은`~~
print(y1, y2, y3, sep='\n')

tensor([[ 2.4529,  6.0683,  9.6836, 13.2989],
        [ 0.5882,  1.4042,  2.2203,  3.0364],
        [ 0.6441,  2.4146,  4.1851,  5.9557],
        [ 1.9146,  5.3327,  8.7508, 12.1689]], device='cuda:0')
tensor([[ 2.4529,  6.0683,  9.6836, 13.2989],
        [ 0.5882,  1.4042,  2.2203,  3.0364],
        [ 0.6441,  2.4146,  4.1851,  5.9557],
        [ 1.9146,  5.3327,  8.7508, 12.1689]], device='cuda:0')
tensor([[ 2.4529,  6.0683,  9.6836, 13.2989],
        [ 0.5882,  1.4042,  2.2203,  3.0364],
        [ 0.6441,  2.4146,  4.1851,  5.9557],
        [ 1.9146,  5.3327,  8.7508, 12.1689]], device='cuda:0')


In [None]:
# sum, mean
print(t2.mean()) #default=전체의 평균
print(t2.mean(dim=0))
print(t2.mean(dim=1))
print(t2.mean(dim=-1))

tensor(4.3333, device='cuda:0')
tensor([5.5000, 0.0000, 7.5000], device='cuda:0')
tensor([1.3333, 3.3333, 5.3333, 7.3333], device='cuda:0')
tensor([1.3333, 3.3333, 5.3333, 7.3333], device='cuda:0')


In [None]:
# max, argmax
print(t2.max())

tensor(12., device='cuda:0')


In [None]:
print(t2.max(dim=0))  # return two values: max and argmax

torch.return_types.max(
values=tensor([10.,  0., 12.], device='cuda:0'),
indices=tensor([3, 0, 3], device='cuda:0'))


In [None]:
print('Max: ', t2.max(dim=0)[0])
print('Argmax: ', t2.max(dim=0)[1])
print('Argmax: ', t2.argmax(dim=0))

Max:  tensor([10.,  0., 12.], device='cuda:0')
Argmax:  tensor([3, 0, 3], device='cuda:0')
Argmax:  tensor([3, 0, 3], device='cuda:0')


In [None]:
# view (reshape)
t = torch.tensor([0, 1, 2, 3, 4, 5])
print(t.shape)

t1 = t.view(2, -1) #np.reshape이랑 비슷
#앞은 2로 맞추고 뒤는 니가 알아서 맞춰라~

print(t1, t1.shape, sep='\t')

t1 = t.view(-1,3)
#뒤는 3으로 맞추고 앞은 니가 알아서 해라~
print(t1, t1.shape, sep='\t')

torch.Size([6])
tensor([[0, 1, 2],
        [3, 4, 5]])	torch.Size([2, 3])
tensor([[0, 1, 2],
        [3, 4, 5]])	torch.Size([2, 3])


In [None]:
# concatnate, stack
x = torch.FloatTensor([[1, 2], [3, 4], [5, 6]])
y = torch.FloatTensor([[7, 8], [9, 10], [11, 12]])

print(torch.cat([x, y]))
print(torch.cat([x, y], dim=1))

tensor([[ 1.,  2.],
        [ 3.,  4.],
        [ 5.,  6.],
        [ 7.,  8.],
        [ 9., 10.],
        [11., 12.]])
tensor([[ 1.,  2.,  7.,  8.],
        [ 3.,  4.,  9., 10.],
        [ 5.,  6., 11., 12.]])


In [None]:
z = torch.stack([x, y])
print(z, z.shape, sep='\n')

tensor([[[ 1.,  2.],
         [ 3.,  4.],
         [ 5.,  6.]],

        [[ 7.,  8.],
         [ 9., 10.],
         [11., 12.]]])
torch.Size([2, 3, 2])


In [None]:
# in-place operation
x = torch.FloatTensor([[1, 2], [3, 4]])

print('2*x=', x.mul(2.))  # 2*x
print('x=', x) #x는 안 바뀜

2*x= tensor([[2., 4.],
        [6., 8.]])
x= tensor([[1., 2.],
        [3., 4.]])


In [None]:
print('2*x=', x.mul_(2.))    # x = x.mul(2.)
#underscore하면 x도 바뀜!
print('x=', x)

2*x= tensor([[ 4.,  8.],
        [12., 16.]])
x= tensor([[ 4.,  8.],
        [12., 16.]])


### 1.3 autograd

In [None]:
w = torch.tensor(3., requires_grad=True)
y = 2 * w
print(y)

tensor(6., grad_fn=<MulBackward0>)


In [None]:
y.backward()
print(w.grad)

tensor(2.)


In [None]:
z = 2 * w
z.backward()
print(w.grad)

tensor(4.)


## 2. 선형 회기 (Lecture 9)

### 2.1 Data

In [None]:
x = torch.arange(0, 10, 0.5)
y = torch.tensor([-1.52129006, -1.27491772, -0.96940479, -0.40476183,  0.98989321,
                  0.34758961,  1.39646606,  1.50900255,  2.59080334,  2.77800334,
                  3.45137288,  4.7229806 ,  4.7882944 ,  4.96923468,  5.73584226,
                  6.1385543 ,  7.17248654,  8.29641142,  7.59675138,  8.72590778])

In [None]:
plt.plot(x, y, '*')
plt.show()

### 2.2 One Layer Network Training

In [None]:
# initial weight and bias
W = torch.randn(1, requires_grad=True)
b = torch.randn(1, requires_grad=True)

# optimizer 설정
optimizer = torch.optim.SGD([W, b], lr=0.03)


loss_list = []

# training
for i in range(200):
    optimizer.zero_grad()  # 기울기 초기화

    # 순전파
    pred_y = W * x + b
    loss = torch.mean((pred_y - y)**2)   # MSE loss
    loss_list.append(loss.item())

    if (i+1) % 20 == 0:
        print('iteration %3d  loss=%.5f' %(i+1, loss))

    # 역전파 (기울기 계산)
    loss.backward()

    # weight update
    optimizer.step()

print('='*30)
print('linear regression:  y = %.3f x + %.3f' %(W, b))

In [None]:
plt.plot(loss_list)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.show()

In [None]:
plt.plot(x, y, '*')

xx = torch.arange(0, 10, 0.1)
plt.plot(xx, W.item() * xx + b.item())
plt.show()

In [None]:
# model
model = torch.nn.Linear(1, 1, bias=True)

# loss and optimizer
lr = 0.03

criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

In [None]:
# training
epochs = 200

train_loss_list = []


for epoch in range(epochs):
    optimizer.zero_grad()  # 기울기 초기화

    #forward
    y_pred = model.forward(x.view(-1, 1))
    loss = criterion(y_pred, y.view(-1, 1))
    train_loss_list.append(loss.item())

    if (epoch+1) % 20 == 0:
        print("Epoch %5d: loss %.4f" % (epoch+1, loss))

    # backward
    loss.backward()

    # weight update
    optimizer.step()

In [None]:
list(model.parameters())

## 3. 다중 분류 (Lecture 12)

### 3.1 Data

스탠포드대학교 CS231n의 분류 예시 데이터

In [None]:
# data generation
np.random.seed(0)
N = 100 # number of points per class
D = 2   # dimensionality
K = 3   # number of classes
X = np.zeros((N*K,D))
y = np.zeros(N*K, dtype='uint8')
for j in range(K):
    ix = range(N*j,N*(j+1))
    r = np.linspace(0.0, 1, N) # radius
    t = np.linspace(j*4, (j+1)*4, N) + np.random.randn(N)*0.2 # theta
    X[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
    y[ix] = j
plt.figure(figsize=(6, 6))
plt.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap=plt.cm.tab10_r)
plt.xlim([-1, 1])
plt.ylim([-1, 1])
plt.show()

In [None]:
# torch tensor로 전환 (입력은 float, class는 long tensor)
# PyTorch의 Cross Entropy사용에서는 one-hot encoding을 하지 않는다.

x_train = torch.FloatTensor(X)
y_train = torch.LongTensor(y)

print('x shape: ', x_train.shape)
print('y shape: ', y_train.shape)

### 3.2 Sequential Model

In [None]:
# layers
linear1 = torch.nn.Linear(2, 100, bias=True)
linear2 = torch.nn.Linear(100, 3)
relu = torch.nn.ReLU()

# model
model = torch.nn.Sequential(linear1, relu, linear2)

# loss and optimizer
lr = 0.1

criterion = torch.nn.CrossEntropyLoss()   # Softmax is internally computed.
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

In [None]:
print(model)

Sequential(
  (0): Linear(in_features=2, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=3, bias=True)
)


In [None]:
# training
epochs = 1000

train_loss_list = []


for epoch in range(epochs):
    optimizer.zero_grad()  # 기울기 초기화

    #forward
    y_pred = model.forward(x_train)
    loss = criterion(y_pred, y_train)
    train_loss_list.append(loss.item())

    if (epoch+1) % 100 == 0:
        print("Epoch %5d: loss %.4f" % (epoch+1, loss))

    # backward
    loss.backward()

    # weight update
    optimizer.step()

NameError: ignored

In [None]:
y_pred = model.forward(torch.FloatTensor([[-1, 0.5],[0.5, 1]]))
print(y_pred)

In [None]:
y_pred.argmax(dim=1)

### 3.3 Two Layer Class

In [None]:
class TwoLayerNet(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(TwoLayerNet, self).__init__()

        self.linear1 = torch.nn.Linear(input_dim, hidden_dim)
        self.linear2 = torch.nn.Linear(hidden_dim, output_dim)
        self.relu = torch.nn.ReLU()

    def forward(self, x):
        x = self.relu(self.linear1(x))
        x = self.linear2(x)
        return x

    def predict(self, x):
        y = self.forward(x)
        return y.argmax(dim=1)

    # x : 입력 데이터, t : 정답 레이블
    def accuracy(self, x, t):
        y = self.predict(x)
        accuracy = sum(y == t) / x.shape[0]
        return accuracy

In [None]:
# model
input_dim = x_train.shape[-1]
hidden_dim = 100
output_dim = 3  # number of classes

model = TwoLayerNet(input_dim, hidden_dim, output_dim)

# loss and optimizer
lr = 0.1

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

In [None]:
# training
epochs = 5000

train_loss_list = []
train_acc_list = []


for epoch in range(epochs):
    optimizer.zero_grad()

    #forward
    y_pred = model.forward(x_train)
    loss = criterion(y_pred, y_train)
    train_loss_list.append(loss.item())

    train_acc = model.accuracy(x_train, y_train)
    train_acc_list.append(train_acc)

    if (epoch+1) % 500 == 0:
        print("Epoch %5d: loss %.4f,  accuracy %.4f" % (epoch+1, loss, train_acc))

    # backward
    loss.backward()

    # weight update
    optimizer.step()

In [None]:
# plot loss and accuracy
plt.figure(figsize=(12, 4))
plt.subplot(1,2,1)
plt.plot(train_loss_list)
plt.title('Loss')
plt.subplot(1,2,2)
plt.plot(train_acc_list)
plt.title('Accuracy')
plt.show()

In [None]:
# plot the resulting classifier
xx, yy = np.meshgrid(np.linspace(-1.1, 1.1, 100), np.linspace(-1.1, 1.1, 100))
Z = model.predict(torch.FloatTensor(np.column_stack((xx.flatten(), yy.flatten()))))
Z = Z.reshape(xx.shape)

plt.figure(figsize=(6, 6))
plt.contourf(xx, yy, Z, cmap=plt.cm.Pastel1, alpha=0.5)
plt.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap=plt.cm.tab10_r)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.show()

In [None]:
model.predict(torch.FloatTensor([0.5, -1]))