# 1. Linear regression with Pytorch

### 1) Library

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

### 2) trainable variable

In [2]:
x_train = torch.FloatTensor([[1],[2],[3]])
y_train = torch.FloatTensor([[2],[4],[6]])

In [3]:
print(x_train,x_train.shape)

tensor([[1.],
        [2.],
        [3.]]) torch.Size([3, 1])


In [4]:
print(y_train,y_train.shape)

tensor([[2.],
        [4.],
        [6.]]) torch.Size([3, 1])


### 3) parameter 

```python
torch.zeors( , requires_grad = True )
```
requires_grad = True 는 향후 학습을 통해 내부 값은 변경됨을 명시하는 것

In [5]:
W = torch.zeros(1,requires_grad = True)
print(W)

tensor([0.], requires_grad=True)


In [6]:
b = torch.zeros(1,requires_grad = True)
print(b)

tensor([0.], requires_grad=True)


### 4) hypothesis

In [7]:
hypothesis = W*x_train + b
print(hypothesis)

tensor([[0.],
        [0.],
        [0.]], grad_fn=<AddBackward0>)


### 5) cost-function

In [8]:
cost = torch.mean((hypothesis - y_train)**2)
print(cost)

tensor(18.6667, grad_fn=<MeanBackward0>)


### 6) Gradient Descent

```
0. cost 계산

1. optimizer 선언
2. optimizer.zero_grad() ==> pytorch가 기울기를 누적하는 것 방지
3. cost.backward()
4. optimizer.step()
```

In [9]:
optimizer = optim.SGD([W,b],lr = 0.01)

**optimizer.zero_grad()**

- gradient를 0으로 초기화.
- 첫 단계에서 초기화 해야만, 새로운 가중치에 대한 학습 진행 가능

In [10]:
optimizer.zero_grad()

In [11]:
cost.backward()

In [12]:
# W,b를 업데이트
optimizer.step()

### 7) 종합

```
error :  new() received an invalid combination of arguments - got (numpy.ndarray, requires_grad=bool), but expected one of:
```


In [13]:
def make_data(data_sample,feature_dim = 1):
    x = torch.FloatTensor(np.random.normal(loc=0,scale=1,size=(data_sample,feature_dim)))
    y = x*5
    
    return x,y


def gd(x,y,lr,epoch,th,b):
    
    th = torch.FloatTensor(th,requires_grad = True)
    b = torch.FloatTensor(np.array([b]),requires_grad = True)
    
    optimizer = optim.SGD([th,b],lr)
    
    for epochs in range(epoch):
        
        # forward(loss 계산)
        cost = torch.mean((y - x*th)**2)
        
        # backward(gradient)
        cost.backward()

        # update
        optimizer.zero_grad()
        optimizer.step()
        
        if epochs % 100 == 0:
            print("epoch : {}, th : {:.3f}, b : {:.3f}, cost : {:.4f}".
                 format(epochs,th.item(),b.item,cost.item()))

In [14]:
x,y = make_data(100)

In [15]:
gd(x,y,0.01,2000,0.01,1)

TypeError: new() received an invalid combination of arguments - got (float, requires_grad=bool), but expected one of:
 * (torch.device device)
 * (torch.Storage storage)
 * (Tensor other)
 * (tuple of ints size, torch.device device)
      didn't match because some of the keywords were incorrect: requires_grad
 * (object data, torch.device device)
      didn't match because some of the keywords were incorrect: requires_grad


**trouble_shooting**

```python
th = torch.FloatTensor(th,requires_grad = True) ==> th가 integer 혹은 ndarray이면 안됨.
그냥 torch.FloatTensor() 새로 만들 땐, ndarray가 들어가야 하지만,
'requires_grad'로 학습 대상 paramter로 명시할 때는 torch로 넣어줘야 함
```

- **requires_grad 명시 parameter 선언 시,**
    - flaot 타입 or  torch 객체

In [16]:
torch.FloatTensor(np.array([1,2]))

tensor([1., 2.])

In [17]:
torch.FloatTensor(np.array([1,2]),requires_grad = True)

TypeError: new() received an invalid combination of arguments - got (numpy.ndarray, requires_grad=bool), but expected one of:
 * (torch.device device)
 * (torch.Storage storage)
 * (Tensor other)
 * (tuple of ints size, torch.device device)
      didn't match because some of the keywords were incorrect: requires_grad
 * (object data, torch.device device)
      didn't match because some of the keywords were incorrect: requires_grad


In [18]:
torch.tensor([1.0,2.0],requires_grad=True)

tensor([1., 2.], requires_grad=True)

### 8) 종합 (시도 2)

```
error :  Only Tensors of floating point dtype can require gradients
```

```python
torch.tensor(1, requires_grad = True) ===> 에러남. requires_grad 학습시킬 땐, element가 float type이어야 함
따라서,
torch.tensor(1.0, requires_grad = True) 가 적절함
```

In [19]:
def make_data(data_sample,feature_dim = 1):
    x = torch.FloatTensor(np.random.normal(loc=0,scale=1,size=(data_sample,feature_dim)))
    y = x*5
    
    return x,y


def gd(x,y,lr,epoch,th,b):
    
    th = torch.tensor(0.01,requires_grad = True)
    b = torch.tensor(1,requires_grad = True)
    
    optimizer = optim.SGD([th,b],lr)
    
    for epochs in range(epoch+1):
        
        # forward(loss 계산)
        cost = torch.mean((y - (x*th + b))**2)
    
        optimizer.zero_grad() # 기울기가 0으로 초기화됨 
        
        # backward
        cost.backward()
        
        # update
        optimizer.step()
        
        if epochs % 100 == 0:
            print("epoch : {}, th : {:.3f}, b : {:.3f}, cost : {:.4f}".format(
                epochs,th.item(),b.item(),cost.item()))

In [20]:
gd(x,y,0.01,2000,0.01,1)

RuntimeError: Only Tensors of floating point dtype can require gradients

**trouble shooting**

- requires_grad 로 명시할 땐, float type으로 넣어줘야 함

In [21]:
# b = torch.tensor(1,requires_grad = True)
b = torch.tensor(1.0,requires_grad=True)

### 8) 종합 (시도 3)

In [22]:
def make_data(data_sample,feature_dim = 1):
    x = torch.FloatTensor(np.random.normal(loc=0,scale=1,size=(data_sample,feature_dim)))
    y = x*5
    
    return x,y


def gd(x,y,lr,epoch,th,b):
    
    th = torch.tensor(0.01,requires_grad = True)
    b = torch.tensor(1.0,requires_grad = True)
    
    optimizer = optim.SGD([th,b],lr)
    
    for epochs in range(epoch+1):
        
        # forward(loss 계산)
        cost = torch.mean((y - (x*th + b))**2)
    
        optimizer.zero_grad() # 기울기가 0으로 초기화됨 
        
        # backward
        cost.backward()
        
        # update
        optimizer.step()
        
        if epochs % 100 == 0:
            print("epoch : {}, th : {:.3f}, b : {:.3f}, cost : {:.4f}".format(
                epochs,th.item(),b.item(),cost.item()))

In [23]:
gd(x,y,0.01,500,0.01,1)

epoch : 0, th : 0.102, b : 0.963, cost : 24.9434
epoch : 100, th : 4.194, b : -0.128, cost : 0.5797
epoch : 200, th : 4.852, b : -0.062, cost : 0.0209
epoch : 300, th : 4.971, b : -0.017, cost : 0.0009
epoch : 400, th : 4.994, b : -0.004, cost : 0.0000
epoch : 500, th : 4.999, b : -0.001, cost : 0.0000


### 8) optimizer.zero_grad() 가 필요한 이유

- 파이토치는 미분으로 얻은 기울기를 누적시키는 특징이 있음

In [26]:
import torch
w = torch.tensor(2.0, requires_grad=True)

nb_epochs = 20
for epoch in range(nb_epochs + 1):
    z = 2*w

    z.backward()
    print('수식을 w로 미분한 값 : {}'.format(w.grad))

수식을 w로 미분한 값 : 2.0
수식을 w로 미분한 값 : 4.0
수식을 w로 미분한 값 : 6.0
수식을 w로 미분한 값 : 8.0
수식을 w로 미분한 값 : 10.0
수식을 w로 미분한 값 : 12.0
수식을 w로 미분한 값 : 14.0
수식을 w로 미분한 값 : 16.0
수식을 w로 미분한 값 : 18.0
수식을 w로 미분한 값 : 20.0
수식을 w로 미분한 값 : 22.0
수식을 w로 미분한 값 : 24.0
수식을 w로 미분한 값 : 26.0
수식을 w로 미분한 값 : 28.0
수식을 w로 미분한 값 : 30.0
수식을 w로 미분한 값 : 32.0
수식을 w로 미분한 값 : 34.0
수식을 w로 미분한 값 : 36.0
수식을 w로 미분한 값 : 38.0
수식을 w로 미분한 값 : 40.0
수식을 w로 미분한 값 : 42.0
