In [2]:
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
print(torch.cuda.is_available())

False


In [5]:
data = torch.randn(2,3)

In [None]:
print(torch.sum(data))
print(torch.mean(data))
print(torch.var(data))
print(torch.std(data))
print()
#scalar value에 대해서 tensor 속의 값만 추출함
print(torch.sum(data).item())
print(torch.mean(data).item())
print(torch.var(data).item())
print(torch.std(data).item())

* pytorch 조건문
  

In [6]:
print(torch.where(data>0, torch.ones(data.shape),torch.zeros(data.shape)))

tensor([[1., 0., 1.],
        [0., 1., 0.]])


*numpy 호환

In [10]:
a = torch.ones(5)
print(f"torch 5행 1열 : {a}")
b = a.numpy()
print(f"torch->numpy 변환 5행 1열 : {b}")
c = torch.from_numpy(b)
print(f"numpy->torch 변환 5행 1열 : {c}")

torch 5행 1열 : tensor([1., 1., 1., 1., 1.])
torch->numpy 변환 5행 1열 : [1. 1. 1. 1. 1.]
numpy->torch 변환 5행 1열 : tensor([1., 1., 1., 1., 1.])


* gpu의 사용

In [14]:
print(torch.cuda.is_available()) # 가능한지
print(torch.cuda.device_count()) # 몇개가 있는지
# print(torch.cuda.current_device()) # 추후 확인

False
0


In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:",device) 

Using device: cpu


In [16]:
# gpu 자원이 사용가능하다면
x = torch.tensor([1,1]).to(device)# 사용할 자원에 맞춰서
print(x)
#추후 활용
# https://m.blog.naver.com/ptm0228/222048480521
# 직접적으로 장치를 선택하여
# x.cuda()
# x.cpu()
# 로 지정도 가능하다

tensor([1, 1])


In [None]:
x = torch.tensor([1,1]).cuda()
y = torch.tensor([1,1]).cpu()
print(x+y)#동일한 위치가 아니기 때문에 에러가 남

In [18]:
a = torch.tensor([2.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)
c = a+b
d = b+1
e = c+d
print(a)
print(b)
print(c)
print(d)
print(e)

tensor([2.], requires_grad=True)
tensor([1.], requires_grad=True)
tensor([3.], grad_fn=<AddBackward0>)
tensor([2.], grad_fn=<AddBackward0>)
tensor([5.], grad_fn=<AddBackward0>)


In [19]:
print(a.data)
print(a.grad)
print(a.grad_fn)

tensor([2.])
None
None


In [20]:
# gradient가 계산되려면
e.backward(retain_graph=True)

In [None]:
print(a.grad)
print(b.grad)
print(c.grad)
print(d.grad)
print(e.grad)
# tensor([1.])
# tensor([2.])
# None
# None
# None

In [23]:
a = torch.tensor([2.],requires_grad=True)
b = torch.tensor([1.],requires_grad=True)
c = a+b
d = c+1
e = c+d

c.retain_grad()
d.retain_grad()
e.retain_grad()

e.backward(retain_graph=True)

print(a.grad)
print(b.grad)
print(c.grad)
print(d.grad)
print(e.grad)

tensor([2.])
tensor([2.])
tensor([2.])
tensor([1.])
tensor([1.])


In [24]:
# gradient 값은 초기화를 시키지 않으면 계속 누적하여 곱셈이 이루어지기 때문에
# backward를 한 이후에 바로 grad_zero_() method를 진행해야함

a.grad.zero_()
b.grad.zero_()
c.grad.zero_()
d.grad.zero_()
e.grad.zero_()

print(a.grad)
print(b.grad)
print(c.grad)
print(d.grad)
print(e.grad)

tensor([0.])
tensor([0.])
tensor([0.])
tensor([0.])
tensor([0.])


In [25]:
# 그럴 일은 거의 없지만 requires_grad 를 false를 하여 grad 계산을 멈춤
a.requires_grad = False

a = a.detach()
print(a.requires_grad)

False


In [26]:
import torch.nn as nn
import torch.optim as optim


In [40]:
# 인공 신경망 형성
# 보통 클래스로 만듬

class Net(nn.Module): ## nn.Module  을 상속해야함
    def __init__(self):
        super(Net,self).__init__() #상속을 위한 스텝
        self.fc1 = nn.Linear(in_features=100,out_features=100)
        self.fc1_act = nn.ReLU()
        self.fc2 = nn.Linear(in_features=100,out_features=10)
        
    def forward(self,x):
        out = self.fc1(x)
        out = self.fc1_act(out)
        out = self.fc2(out)
        
        return out

In [41]:
net = Net()
print(net)

Net(
  (fc1): Linear(in_features=100, out_features=100, bias=True)
  (fc1_act): ReLU()
  (fc2): Linear(in_features=100, out_features=10, bias=True)
)


In [42]:
params = list(net.parameters())
# print(params)
print(len(params)) #레이어 개수(현재 2개)*(weight 관련 param + bias 관련 param = 2개)
print(params[0])
print(params[0].size())

4
Parameter containing:
tensor([[-0.0178, -0.0774,  0.0715,  ...,  0.0563, -0.0517,  0.0817],
        [-0.0192,  0.0492,  0.0698,  ..., -0.0926,  0.0926, -0.0624],
        [-0.0692, -0.0939, -0.0616,  ...,  0.0123, -0.0827, -0.0470],
        ...,
        [ 0.0741,  0.0673,  0.0804,  ..., -0.0495, -0.0031, -0.0048],
        [-0.0955,  0.0500, -0.0799,  ..., -0.0822,  0.0782,  0.0008],
        [-0.0539, -0.0503, -0.0601,  ..., -0.0452,  0.0069, -0.0424]],
       requires_grad=True)
torch.Size([100, 100])


1. net instance에서 direct하게 접근

In [43]:
for name, param in net.named_parameters():
    print(name)

fc1.weight
fc1.bias
fc2.weight
fc2.bias


In [44]:
# 배치 사이즈 # 한 번에 계산하려는 데이터의 사이즈
batch_size = 10
input = torch.randn(batch_size,100)
output = net(input)

target = torch.randn(batch_size,10)
criterion = nn.MSELoss() #mean squared loss

loss = criterion(output,target)
print(loss)

tensor(1.1844, grad_fn=<MseLossBackward0>)


In [46]:
net.zero_grad()

print("fc1.bias.grad before backward")
print(net.fc1.bias.grad)

loss.backward()

print("fc1.bias.grad before backward")
print(net.fc1.bias.grad)

fc1.bias.grad before backward
None
fc1.bias.grad before backward
tensor([ 4.2509e-03, -8.8796e-04,  8.2000e-03,  7.7617e-03, -8.8292e-03,
         2.9105e-02,  7.3921e-03,  7.9663e-03,  7.1408e-03,  8.1933e-03,
         2.6814e-03, -1.4268e-03, -1.4156e-02,  2.6951e-03, -2.4230e-03,
        -3.2404e-04,  6.3983e-03,  1.3524e-02, -1.1312e-02,  8.8882e-03,
         3.4021e-03, -5.2625e-03,  9.2296e-03, -1.7122e-02,  2.1895e-02,
         1.3825e-02,  6.9349e-03, -1.7986e-02, -8.4713e-03, -9.9984e-03,
         9.2334e-04, -5.1077e-03,  1.8640e-02, -7.0314e-03, -1.6431e-02,
        -5.2081e-03, -7.1744e-03,  1.8677e-03,  1.4005e-02, -2.0012e-02,
        -1.1166e-02,  8.7468e-05,  9.2844e-03, -7.6586e-03, -7.1273e-03,
        -1.5413e-03, -4.4460e-03, -1.0996e-02, -1.3622e-02, -2.9072e-03,
         1.6537e-03, -1.5929e-02, -6.1317e-03, -5.1228e-03, -5.7224e-04,
         6.0707e-03,  1.8488e-02,  1.5422e-02,  6.9745e-03,  1.2333e-02,
         5.1620e-03, -6.0328e-03, -1.7460e-03,  7.6466e-03,

In [47]:
lr = 0.01
for param in net.parameters():
    param.data.sub_(param.grad.data *lr)

In [58]:
## 위와 같이 계산하기 힘드니, optimizer를 통해 적용

# Create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)

#In your training loop:
num_epoch = 100
for i in range(num_epoch):
    optimizer.zero_grad()#grad 중첩 계산이 안 되게끔, 초기화
    output = net(input)
    loss = criterion(output, target) #criterion = nn.MSELoss() #mean squared loss
    print(loss)
    loss.backward()
    optimizer.step()#params grad가 update 됨
    

tensor(5.1261e-06, grad_fn=<MseLossBackward0>)
tensor(5.0804e-06, grad_fn=<MseLossBackward0>)
tensor(5.0351e-06, grad_fn=<MseLossBackward0>)
tensor(4.9900e-06, grad_fn=<MseLossBackward0>)
tensor(4.9456e-06, grad_fn=<MseLossBackward0>)
tensor(4.9015e-06, grad_fn=<MseLossBackward0>)
tensor(4.8578e-06, grad_fn=<MseLossBackward0>)
tensor(4.8144e-06, grad_fn=<MseLossBackward0>)
tensor(4.7715e-06, grad_fn=<MseLossBackward0>)
tensor(4.7291e-06, grad_fn=<MseLossBackward0>)
tensor(4.6869e-06, grad_fn=<MseLossBackward0>)
tensor(4.6451e-06, grad_fn=<MseLossBackward0>)
tensor(4.6039e-06, grad_fn=<MseLossBackward0>)
tensor(4.5628e-06, grad_fn=<MseLossBackward0>)
tensor(4.5223e-06, grad_fn=<MseLossBackward0>)
tensor(4.4820e-06, grad_fn=<MseLossBackward0>)
tensor(4.4421e-06, grad_fn=<MseLossBackward0>)
tensor(4.4026e-06, grad_fn=<MseLossBackward0>)
tensor(4.3635e-06, grad_fn=<MseLossBackward0>)
tensor(4.3247e-06, grad_fn=<MseLossBackward0>)
tensor(4.2863e-06, grad_fn=<MseLossBackward0>)
tensor(4.2482