In [1]:
#!matplotlib inline
import numpy as np
import torch
torch.set_printoptions(edgeitems=2, linewidth=75)


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/Users/apple/Library/Python/3.9/lib/python/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/apple

In [2]:
t_c = [0.5,  14.0, 15.0, 28.0, 11.0,  8.0,  3.0, -4.0,  6.0, 13.0, 21.0]
t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]
t_c = torch.tensor(t_c)
t_u = torch.tensor(t_u)

* 모델 생성

In [3]:
def model(t_u, w, b):
    return w * t_u + b

* 손실 함수 설정 (mean_squared_error)

In [4]:
def loss_fn(t_p, t_c):
    squared_diffs = (t_p - t_c)**2
    return squared_diffs.mean()

In [5]:
w = torch.ones(())
b = torch.zeros(())

t_p = model(t_u, w , b)
t_p

tensor([35.7000, 55.9000, 58.2000, 81.9000, 56.3000, 48.9000, 33.9000,
        21.8000, 48.4000, 60.4000, 68.4000])

In [6]:
loss = loss_fn(t_p, t_c)
loss

tensor(1763.8848)

# 손실 줄이기

* 편미분 공식

In [7]:
# bias 값
delta = 0.1

loss_rate_of_change_w = (loss_fn(model(t_u, w + delta,b), t_c) - loss_fn(model(t_u, w + delta,b), t_c)) / (2.0 * delta) 

In [8]:
import math
learning_rate = 1e-2

w = w - learning_rate * loss_rate_of_change_w
w

tensor(1.)

In [9]:
loss_rate_of_change_b = (loss_fn(model(t_u, w, b + delta), t_c) - loss_fn(model(t_u, w,b - delta), t_c)) / (2.0 * delta) 
b = b - learning_rate * loss_rate_of_change_b
b


tensor(-0.8260)

* MSE 공식

In [10]:
def loss_fn(t_p , t_c):
    squared_diffs = (t_p - t_c) ** 2
    return squared_diffs.mean()

### MSE를 미분하여 모델 파라미터의 기울기를 구할 수 있다.

In [11]:
def dloss_fn(t_p , t_c):
    dsq_diff = 2 * (t_p - t_c)  / t_p.size(0)
    return dsq_diff

# 모델에 미분 적용하기

In [12]:
# 모델 생성
def model(t_u , w , b):
    return t_u * w + b

* 가중치 w 에 대한 편미분
    - t_u * w  + b 공식에서 w 에 대한 편미분을 진행했으므로 t_u 만 남는다

In [13]:
def dmodel_dw(t_u, w , b):
    return t_u


* 편향 b에 대한 편미분
    - t_u * w 공식에서 b에 대한 편미분을 진행햤는데 b는 상수이므로 1.0 만 남는다 

In [14]:
def dmodel_db(t_u, w , b):
    return 1.0

### 경사 함수 정의
- 모델이 학습할 때 필요한 기울기 계산
- 모델의 파라미터에 대한 손실 함수의 기울기를 계산
- 미분 연쇄 법칙이 적용
    - 복합함수의 미분을 구할 때 각 함수의 미분을 곱하는 방법
    - 여기서는 weight 와 bias 가 기울기(mse의 미분값)와 곱해진다

In [15]:
def grad_fn(t_u ,t_c, t_p, w , b):
    dloss_dtp = dloss_fn(t_p, t_c)
    dloss_dw = dloss_dtp * dmodel_dw(t_u, w , b)
    dloss_db = dloss_dtp *  dmodel_db(t_u, w , b)
    return torch.stack([dloss_dw.sum(), dloss_db.sum()])


## 모델 적합을 위하여 반복

In [16]:
def training_loop(n_epochs , learning_rate , params, t_u, t_c):
    for epoch in range(1, n_epochs + 1):
        w , b = params
        
        t_p = model(t_u , w , b) #순방향 전달
        loss = loss_fn(t_p , t_c) #Mean squeared Error
        grad = grad_fn(t_u ,t_c, t_p, w , b) #역방향 전달

        # 파라미터 조정
        params = params - learning_rate * grad

        print(f'Epoch : {epoch}  Loss : {loss:.2f}  Grad : {grad}')
    return params
        

In [17]:
'''
def training_loop(n_epochs, learning_rate, params, t_u, t_c,
                  print_params=True):
    for epoch in range(1, n_epochs + 1):
        w, b = params

        t_p = model(t_u, w, b)  # <1>
        loss = loss_fn(t_p, t_c)
        grad = grad_fn(t_u, t_c, t_p, w, b)  # <2>

        params = params - learning_rate * grad

        if epoch in {1, 2, 3, 10, 11, 99, 100, 4000, 5000}:  # <3>
            print('Epoch %d, Loss %f' % (epoch, float(loss)))
            if print_params:
                print('    Params:', params)
                print('    Grad:  ', grad)
        if epoch in {4, 12, 101}:
            print('...')

        if not torch.isfinite(loss).all():
            break  # <3>
            
    return params
'''

"\ndef training_loop(n_epochs, learning_rate, params, t_u, t_c,\n                  print_params=True):\n    for epoch in range(1, n_epochs + 1):\n        w, b = params\n\n        t_p = model(t_u, w, b)  # <1>\n        loss = loss_fn(t_p, t_c)\n        grad = grad_fn(t_u, t_c, t_p, w, b)  # <2>\n\n        params = params - learning_rate * grad\n\n        if epoch in {1, 2, 3, 10, 11, 99, 100, 4000, 5000}:  # <3>\n            print('Epoch %d, Loss %f' % (epoch, float(loss)))\n            if print_params:\n                print('    Params:', params)\n                print('    Grad:  ', grad)\n        if epoch in {4, 12, 101}:\n            print('...')\n\n        if not torch.isfinite(loss).all():\n            break  # <3>\n            \n    return params\n"

# 휸련 진행
- loss가 무한대가 되어버린다
- params 조정이 너무 크다는 신호
    - 값이 앞뒤로 진동하면서 조정 값이 점점 커진다
- 최적화는 불안정해지고 , 수렴이 아닌 발산 해버린다

In [18]:
training_loop(
    n_epochs = 100 ,
    learning_rate = 1e-2 , 
    params = torch.tensor([1.0 ,0.0]), 
    t_u = t_u, 
    t_c = t_c)

Epoch : 1  Loss : 1763.88  Grad : tensor([4517.2964,   82.6000])
Epoch : 2  Loss : 5802484.50  Grad : tensor([-261257.4062,   -4598.9702])
Epoch : 3  Loss : 19408029696.00  Grad : tensor([15109614.0000,   266155.6875])
Epoch : 4  Loss : 64915905708032.00  Grad : tensor([-8.7385e+08, -1.5393e+07])
Epoch : 5  Loss : 217130525461053440.00  Grad : tensor([5.0539e+10, 8.9023e+08])
Epoch : 6  Loss : 726257583152928129024.00  Grad : tensor([-2.9229e+12, -5.1486e+10])
Epoch : 7  Loss : 2429183416467662896627712.00  Grad : tensor([1.6904e+14, 2.9776e+12])
Epoch : 8  Loss : 8125122549611731432050262016.00  Grad : tensor([-9.7764e+15, -1.7221e+14])
Epoch : 9  Loss : 27176882120842590626938030653440.00  Grad : tensor([5.6541e+17, 9.9596e+15])
Epoch : 10  Loss : 90901105189019073810297959556841472.00  Grad : tensor([-3.2700e+19, -5.7600e+17])
Epoch : 11  Loss : inf  Grad : tensor([1.8912e+21, 3.3313e+19])
Epoch : 12  Loss : inf  Grad : tensor([-1.0937e+23, -1.9266e+21])
Epoch : 13  Loss : inf  Grad

tensor([nan, nan])

## 해결방법
- learning_rate 를 줄인다

In [19]:
import numpy as no
training_loop(
    n_epochs = 100 ,
    learning_rate = 1e-4 , 
    params = torch.tensor([1.0 ,0.0]), 
    t_u = t_u, 
    t_c = t_c)

Epoch : 1  Loss : 1763.88  Grad : tensor([4517.2964,   82.6000])
Epoch : 2  Loss : 323.09  Grad : tensor([1859.5493,   35.7843])
Epoch : 3  Loss : 78.93  Grad : tensor([765.4666,  16.5122])
Epoch : 4  Loss : 37.55  Grad : tensor([315.0790,   8.5787])
Epoch : 5  Loss : 30.54  Grad : tensor([129.6733,   5.3127])
Epoch : 6  Loss : 29.35  Grad : tensor([53.3495,  3.9682])
Epoch : 7  Loss : 29.15  Grad : tensor([21.9304,  3.4148])
Epoch : 8  Loss : 29.11  Grad : tensor([8.9964, 3.1869])
Epoch : 9  Loss : 29.11  Grad : tensor([3.6721, 3.0930])
Epoch : 10  Loss : 29.11  Grad : tensor([1.4803, 3.0544])
Epoch : 11  Loss : 29.10  Grad : tensor([0.5781, 3.0384])
Epoch : 12  Loss : 29.10  Grad : tensor([0.2066, 3.0318])
Epoch : 13  Loss : 29.10  Grad : tensor([0.0537, 3.0291])
Epoch : 14  Loss : 29.10  Grad : tensor([-0.0093,  3.0279])
Epoch : 15  Loss : 29.10  Grad : tensor([-0.0353,  3.0274])
Epoch : 16  Loss : 29.10  Grad : tensor([-0.0459,  3.0272])
Epoch : 17  Loss : 29.10  Grad : tensor([-0.

tensor([ 0.2327, -0.0438])

* 위의 코드에서 가중치에 대한 기울기는 편향에 대한 기울기보다 약 50배정도 큼
    - 가중치와 편향값의 범위가 다르다는 뜻
    - 각각 범위를 조정하지 않고 t_u 값을 정규화 해준다

In [20]:
t_un = t_u * 0.1

In [21]:
import numpy as no
training_loop(
    n_epochs = 100 ,
    learning_rate = 1e-2 , 
    params = torch.tensor([1.0 ,0.0]), 
    t_u = t_un, 
    t_c = t_c)

Epoch : 1  Loss : 80.36  Grad : tensor([-77.6140, -10.6400])
Epoch : 2  Loss : 37.57  Grad : tensor([-30.8623,  -2.3864])
Epoch : 3  Loss : 30.87  Grad : tensor([-12.4631,   0.8587])
Epoch : 4  Loss : 29.76  Grad : tensor([-5.2218,  2.1327])
Epoch : 5  Loss : 29.51  Grad : tensor([-2.3715,  2.6310])
Epoch : 6  Loss : 29.39  Grad : tensor([-1.2492,  2.8241])
Epoch : 7  Loss : 29.30  Grad : tensor([-0.8071,  2.8970])
Epoch : 8  Loss : 29.21  Grad : tensor([-0.6325,  2.9227])
Epoch : 9  Loss : 29.12  Grad : tensor([-0.5633,  2.9298])
Epoch : 10  Loss : 29.03  Grad : tensor([-0.5355,  2.9295])
Epoch : 11  Loss : 28.94  Grad : tensor([-0.5240,  2.9264])
Epoch : 12  Loss : 28.85  Grad : tensor([-0.5190,  2.9222])
Epoch : 13  Loss : 28.77  Grad : tensor([-0.5165,  2.9175])
Epoch : 14  Loss : 28.68  Grad : tensor([-0.5150,  2.9126])
Epoch : 15  Loss : 28.59  Grad : tensor([-0.5138,  2.9077])
Epoch : 16  Loss : 28.50  Grad : tensor([-0.5129,  2.9028])
Epoch : 17  Loss : 28.42  Grad : tensor([-0

tensor([ 2.7553, -2.5162])

* parmams 값의 변화량이 줄어들 때까지 학습

In [22]:
import numpy as no
training_loop(
    n_epochs = 5000 ,
    learning_rate = 1e-2 , 
    params = torch.tensor([1.0 ,0.0]), 
    t_u = t_un, 
    t_c = t_c)

Epoch : 1  Loss : 80.36  Grad : tensor([-77.6140, -10.6400])
Epoch : 2  Loss : 37.57  Grad : tensor([-30.8623,  -2.3864])
Epoch : 3  Loss : 30.87  Grad : tensor([-12.4631,   0.8587])
Epoch : 4  Loss : 29.76  Grad : tensor([-5.2218,  2.1327])
Epoch : 5  Loss : 29.51  Grad : tensor([-2.3715,  2.6310])
Epoch : 6  Loss : 29.39  Grad : tensor([-1.2492,  2.8241])
Epoch : 7  Loss : 29.30  Grad : tensor([-0.8071,  2.8970])
Epoch : 8  Loss : 29.21  Grad : tensor([-0.6325,  2.9227])
Epoch : 9  Loss : 29.12  Grad : tensor([-0.5633,  2.9298])
Epoch : 10  Loss : 29.03  Grad : tensor([-0.5355,  2.9295])
Epoch : 11  Loss : 28.94  Grad : tensor([-0.5240,  2.9264])
Epoch : 12  Loss : 28.85  Grad : tensor([-0.5190,  2.9222])
Epoch : 13  Loss : 28.77  Grad : tensor([-0.5165,  2.9175])
Epoch : 14  Loss : 28.68  Grad : tensor([-0.5150,  2.9126])
Epoch : 15  Loss : 28.59  Grad : tensor([-0.5138,  2.9077])
Epoch : 16  Loss : 28.50  Grad : tensor([-0.5129,  2.9028])
Epoch : 17  Loss : 28.42  Grad : tensor([-0

Epoch : 78  Loss : 23.64  Grad : tensor([-0.4615,  2.6124])
Epoch : 79  Loss : 23.57  Grad : tensor([-0.4607,  2.6080])
Epoch : 80  Loss : 23.50  Grad : tensor([-0.4599,  2.6035])
Epoch : 81  Loss : 23.43  Grad : tensor([-0.4591,  2.5991])
Epoch : 82  Loss : 23.36  Grad : tensor([-0.4584,  2.5947])
Epoch : 83  Loss : 23.29  Grad : tensor([-0.4576,  2.5903])
Epoch : 84  Loss : 23.22  Grad : tensor([-0.4568,  2.5859])
Epoch : 85  Loss : 23.15  Grad : tensor([-0.4560,  2.5815])
Epoch : 86  Loss : 23.09  Grad : tensor([-0.4553,  2.5771])
Epoch : 87  Loss : 23.02  Grad : tensor([-0.4545,  2.5727])
Epoch : 88  Loss : 22.95  Grad : tensor([-0.4537,  2.5684])
Epoch : 89  Loss : 22.88  Grad : tensor([-0.4529,  2.5640])
Epoch : 90  Loss : 22.81  Grad : tensor([-0.4522,  2.5597])
Epoch : 91  Loss : 22.75  Grad : tensor([-0.4514,  2.5553])
Epoch : 92  Loss : 22.68  Grad : tensor([-0.4506,  2.5510])
Epoch : 93  Loss : 22.61  Grad : tensor([-0.4499,  2.5466])
Epoch : 94  Loss : 22.54  Grad : tensor(

tensor([  5.3671, -17.3012])

* 시각화

In [28]:
!which python
!python --version


/opt/anaconda3/bin/python
Python 3.12.2


In [None]:
%matplotlib inline
from matplotlib import pyplot as plt

t_p = model(t_un, *params)  # <1>

fig = plt.figure(dpi=600)
plt.xlabel("Temperature (°Fahrenheit)")
plt.ylabel("Temperature (°Celsius)")
plt.plot(t_u.numpy(), t_p.detach().numpy()) # <2>
plt.plot(t_u.numpy(), t_c.numpy(), 'o')
plt.savefig("temp_unknown_plot.png", format="png")  # bookskip