In [1]:
import torch

In [2]:
torch.__version__

'1.7.1+cu110'

# scalar, vector, matrix, tensor

## scalar

In [3]:
scalar1 = torch.tensor([1.])
scalar1

tensor([1.])

In [4]:
scalar2 = torch.tensor([3.])
print(scalar2)

tensor([3.])


In [7]:
scalar1+scalar2, torch.add(scalar1, scalar2)

(tensor([4.]), tensor([4.]))

In [9]:
scalar1-scalar2, torch.sub(scalar1, scalar2)

(tensor([-2.]), tensor([-2.]))

In [8]:
scalar1*scalar2, torch.mul(scalar1, scalar2)

(tensor([3.]), tensor([3.]))

In [10]:
scalar1/scalar2, torch.div(scalar1, scalar2)

(tensor([0.3333]), tensor([0.3333]))

## Vector

In [11]:
vector1 = torch.tensor([1., 2., 3.])
vector2 = torch.tensor([4., 5., 6.])

In [13]:
vector1+vector2, torch.add(vector1, vector2)

(tensor([5., 7., 9.]), tensor([5., 7., 9.]))

In [14]:
vector1-vector2, torch.sub(vector1, vector2)

(tensor([-3., -3., -3.]), tensor([-3., -3., -3.]))

In [15]:
vector1*vector2, torch.mul(vector1, vector2)

(tensor([ 4., 10., 18.]), tensor([ 4., 10., 18.]))

In [16]:
vector1/vector2, torch.div(vector1, vector2)

(tensor([0.2500, 0.4000, 0.5000]), tensor([0.2500, 0.4000, 0.5000]))

In [17]:
torch.dot(vector1, vector2)

tensor(32.)

## Matrix

In [20]:
matrix1 = torch.tensor([ [1., 2.,], [3., 4.]] )
matrix2 = torch.tensor([ [5., 6.], [7., 8.]])
matrix1, matrix2

(tensor([[1., 2.],
         [3., 4.]]),
 tensor([[5., 6.],
         [7., 8.]]))

In [21]:
sum_matrix = matrix1 + matrix2
print(sum_matrix)

tensor([[ 6.,  8.],
        [10., 12.]])


In [22]:
sub_matrix = matrix1 - matrix2
print(sub_matrix)

tensor([[-4., -4.],
        [-4., -4.]])


In [23]:
mul_matrix = matrix1 * matrix2
print(mul_matrix)

tensor([[ 5., 12.],
        [21., 32.]])


In [25]:
div_matrix = matrix1 / matrix2
print(div_matrix)

tensor([[0.2000, 0.3333],
        [0.4286, 0.5000]])


In [26]:
matmul_matrix = torch.matmul(matrix1, matrix2)
print(matmul_matrix)

tensor([[19., 22.],
        [43., 50.]])


## Tensor

In [30]:
tensor1 = torch.tensor([ [[1., 2.], [3., 4.]], [[5., 6.], [7., 8.]] ])
tensor2 = torch.tensor([ [[9., 10.], [11., 12.]], [[13., 14.], [15., 16.]] ])
tensor1, tensor2

(tensor([[[1., 2.],
          [3., 4.]],
 
         [[5., 6.],
          [7., 8.]]]),
 tensor([[[ 9., 10.],
          [11., 12.]],
 
         [[13., 14.],
          [15., 16.]]]))

In [31]:
torch.add(tensor1, tensor2)

tensor([[[10., 12.],
         [14., 16.]],

        [[18., 20.],
         [22., 24.]]])

In [32]:
torch.sub(tensor1, tensor2)

tensor([[[-8., -8.],
         [-8., -8.]],

        [[-8., -8.],
         [-8., -8.]]])

In [33]:
torch.mul(tensor1, tensor2)

tensor([[[  9.,  20.],
         [ 33.,  48.]],

        [[ 65.,  84.],
         [105., 128.]]])

In [34]:
torch.div(tensor1, tensor2)

tensor([[[0.1111, 0.2000],
         [0.2727, 0.3333]],

        [[0.3846, 0.4286],
         [0.4667, 0.5000]]])

In [35]:
torch.matmul(tensor1, tensor2)

tensor([[[ 31.,  34.],
         [ 71.,  78.]],

        [[155., 166.],
         [211., 226.]]])

# Autograd

Back Propagation을 이용해 파라미터를 업데이트

In [39]:
if torch.cuda.is_available():
    DEVICE = torch.device("cuda")
else:
    DEVICE = torch.device("cpu")
    
DEVICE

device(type='cuda')

In [40]:
BATCH_SIZE = 64
INPUT_SIZE = 1000
HIDDEN_SIZE = 100
OUTPUT_SIZE = 10

In [51]:
x = torch.randn(BATCH_SIZE,
               INPUT_SIZE,
               device = DEVICE,
               dtype = torch.float,
               requires_grad = False)
y = torch.randn(BATCH_SIZE,
               OUTPUT_SIZE,
               device = DEVICE,
               dtype = torch.float,
               requires_grad = False)
w1 = torch.randn(INPUT_SIZE,
               HIDDEN_SIZE,
               device = DEVICE,
               dtype = torch.float,
               requires_grad = True)
w2 = torch.randn(HIDDEN_SIZE,
               OUTPUT_SIZE,
               device = DEVICE,
               dtype = torch.float,
               requires_grad = True)

In [52]:
print(x.shape, y.shape, w1.shape, w2.shape)

torch.Size([64, 1000]) torch.Size([64, 10]) torch.Size([1000, 100]) torch.Size([100, 10])


In [53]:
learning_rate = 1e-6
for t in range(1, 501):
    y_pred = x.mm(w1).clamp(min=0).mm(w2)
    loss = (y_pred - y).pow(2).sum()
    if t %100 ==0:
        print("Iteration : ", t, "\t", "Loss : ", loss.item() )
    loss.backward()
    
    with torch.no_grad(): # 파라미터 값을 업데이트 할때는 no_grad 를 한뒤
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad
        
        w1.grad.zero_()
        w2.grad.zero_()

Iteration :  100 	 Loss :  513.5570068359375
Iteration :  200 	 Loss :  3.1462719440460205
Iteration :  300 	 Loss :  0.06644710153341293
Iteration :  400 	 Loss :  0.06912317126989365
Iteration :  500 	 Loss :  0.059134259819984436
