# SGD(Stochastic Gradient Descent)

In [1]:
import pandas as pd
boston_df = pd.read_csv("./boston_price.csv")

In [2]:
import torch

def get_update_weights_value_sgd(bias, w1, w2, rm_sgd, lstat_sgd, target_sgd, learning_rate=0.01):
    # 데이터 건수
    N = target_sgd.shape[0]
    # 예측 값. 
    predicted_sgd = w1 * rm_sgd + w2 * lstat_sgd + bias
    # 실제값과 예측값의 차이
    diff_sgd = target_sgd - predicted_sgd 
    
    # weight와 bias를 얼마나 update할 것인지를 계산.  
    w1_update = -(2/N) * learning_rate * (torch.matmul(rm_sgd, diff_sgd))
    w2_update = -(2/N) * learning_rate * (torch.matmul(lstat_sgd, diff_sgd))
    bias_update = -(2/N) * learning_rate * torch.sum(diff_sgd)
    
    # weight와 bias가 update되어야 할 값 반환. 
    return bias_update, w1_update, w2_update

In [11]:
def st_gradient_descent(features, target, iter_epochs=1000, learning_rate=0.01, verbose=True):
    torch.manual_seed(2025)

    w1 = torch.zeros(1, dtype=torch.float32)
    w2 = torch.zeros(1, dtype=torch.float32)
    bias = torch.ones(1, dtype=torch.float32)
    print('최초 w1, w2, bias:', w1.item(), w2.item(), bias.item())
    
    rm = features[:, 0]
    lstat = features[:, 1]
    
    for i in range(1, iter_epochs+1):
        ######### 바뀐 부분 #########
        stochastic_index = torch.randint(0, target.shape[0], size=(1,))
        rm_sgd = rm[stochastic_index]
        lstat_sgd = lstat[stochastic_index]
        target_sgd = target[stochastic_index]
        
        bias_update, w1_update, w2_update = get_update_weights_value_sgd(bias, w1, w2, rm_sgd, lstat_sgd, 
                                                                     target_sgd, learning_rate=0.01)

        w1 = w1 - w1_update
        w2 = w2 - w2_update
        bias = bias - bias_update
        if verbose: # 100회 iteration 시마다 출력
            if i % 100 == 0:
                print(f'Epoch: {i}/{iter_epochs}')
                # Loss는 전체 학습 데이터 기반으로 구해야 함. 아래는 전체 학습 feature 기반의 예측 및 loss임.  
                predicted = w1 * rm + w2*lstat + bias
                diff = target - predicted
                loss = torch.mean(diff ** 2)
                print(f'w1: {w1.item()}, w2: {w2.item()}, bias: {bias.item()}, loss: {loss.item()}')
        
    return w1, w2, bias

In [12]:
import torch
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# 학습과 테스트용 feature와 target 분리. 
def get_scaled_train_test_feature_target_ts(data_df):
    # RM, LSTAT Feature에 Scaling 적용
    scaler = MinMaxScaler()
    scaled_features_np = scaler.fit_transform(data_df[['RM', 'LSTAT']])
    # 학습 feature, 테스트 feature, 학습 target, test_target으로 분리. 
    tr_features, test_features, tr_target, test_target = train_test_split(scaled_features_np, 
                                                                          data_df['PRICE'].values, 
                                                                          test_size=0.3, random_state=2025)
    # 학습 feature와 target을 tensor로 변환. 
    tr_ftr_ts = torch.from_numpy(tr_features)
    tr_tgt_ts = torch.from_numpy(tr_target)
    test_ftr_ts = torch.from_numpy(test_features)
    test_tgt_ts = torch.from_numpy(test_target)
    
    return tr_ftr_ts, tr_tgt_ts, test_ftr_ts, test_tgt_ts

tr_ftr_ts, tr_tgt_ts, test_ftr_ts, test_tgt_ts = get_scaled_train_test_feature_target_ts(data_df=boston_df)

print(f"tr_ftr_ts shape:{tr_ftr_ts.shape} tr_tgt_ts shape:{tr_tgt_ts.shape}")
print(f"test_ftr_ts shape:{test_ftr_ts.shape} test_tgt_ts shape: {test_tgt_ts.shape}")

tr_ftr_ts shape:torch.Size([354, 2]) tr_tgt_ts shape:torch.Size([354])
test_ftr_ts shape:torch.Size([152, 2]) test_tgt_ts shape: torch.Size([152])


In [13]:
# 학습 feature와 target으로 Stochastic Gradient Descent 수행. 
w1, w2, bias = st_gradient_descent(tr_ftr_ts, tr_tgt_ts, iter_epochs=5000, verbose=True)
print('##### 최종 w1, w2, bias #######')
print(w1, w2, bias)

최초 w1, w2, bias: 0.0 0.0 1.0
Epoch: 100/5000
w1: 9.10193920135498, w2: 2.380099058151245, bias: 16.32122802734375, loss: 79.20577943326704
Epoch: 200/5000
w1: 11.38884162902832, w2: 0.6377108693122864, bias: 17.6075439453125, loss: 72.33473168920669
Epoch: 300/5000
w1: 11.43453311920166, w2: -1.9443591833114624, bias: 15.634873390197754, loss: 67.57221355061512
Epoch: 400/5000
w1: 13.544651985168457, w2: -3.477250814437866, bias: 16.884212493896484, loss: 59.431842244690046
Epoch: 500/5000
w1: 14.300559997558594, w2: -5.690975189208984, bias: 16.270288467407227, loss: 54.669107955151254
Epoch: 600/5000
w1: 15.722941398620605, w2: -7.305695056915283, bias: 16.813228607177734, loss: 50.19557187179603
Epoch: 700/5000
w1: 16.91908073425293, w2: -8.649571418762207, bias: 16.845500946044922, loss: 47.19992843074546
Epoch: 800/5000
w1: 17.07891273498535, w2: -10.086063385009766, bias: 16.271669387817383, loss: 45.02960586768405
Epoch: 900/5000
w1: 17.974334716796875, w2: -10.912721633911133, 

In [14]:
import torch
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

scaler = MinMaxScaler()
scaled_features_np = scaler.fit_transform(boston_df[['RM', 'LSTAT']])

print(scaled_features_np)

tr_features, test_features, tr_target, test_target = train_test_split(scaled_features_np, boston_df['PRICE'].values, 
                                                                      test_size=0.3, random_state=2025)

[[0.57750527 0.08967991]
 [0.5479977  0.2044702 ]
 [0.6943859  0.06346578]
 ...
 [0.65433991 0.10789183]
 [0.61946733 0.13107064]
 [0.47307913 0.16970199]]


In [15]:
# 테스트 데이터에서 예측 수행 및 결과를 DataFrame으로 생성. 
test_predicted_ts = test_ftr_ts[:, 0]*w1 + test_ftr_ts[:, 1]*w2 + bias

boston_test_df = pd.DataFrame({
    'RM': test_features[:, 0],
    'LSTAT': test_ftr_ts[:, 1],
    'PRICE': test_tgt_ts,
    'PREDICTED_PRICE_SGD': test_predicted_ts.cpu().numpy()
})

boston_test_df.head(20)

Unnamed: 0,RM,LSTAT,PRICE,PREDICTED_PRICE_SGD
0,0.504311,0.546082,11.0,16.638467
1,0.727534,0.082781,31.5,33.931732
2,0.442422,0.348786,22.0,19.718124
3,0.44338,0.197296,50.0,23.406238
4,0.51964,0.139349,24.1,26.888865
5,0.511401,0.309051,20.1,22.561742
6,0.425752,0.450607,22.5,16.801722
7,0.612569,0.049669,32.4,31.593653
8,0.623683,0.061258,31.6,31.616892
9,0.571757,0.53394,10.9,18.773215


In [16]:
from sklearn.metrics import mean_squared_error

test_total_mse = mean_squared_error(boston_test_df['PRICE'], boston_test_df['PREDICTED_PRICE_SGD'])
print(test_total_mse)

28.984664879873662
