<a href="https://colab.research.google.com/github/bjungweapon/mjc.ai.ml/blob/BDU/BDU_HanBit_FullBatch_SGD_miniBatch_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# 1. 데이터 생성
X, y = make_regression(n_samples=1000, n_features=20, noise=10, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ----------------------
# 1. Full-batch Gradient Descent (LinearRegression)
# ----------------------
model_full = LinearRegression()  # 내부적으로 전체 데이터를 사용
model_full.fit(X_train, y_train)
y_pred_full = model_full.predict(X_test)
mse_full = mean_squared_error(y_test, y_pred_full)

# ----------------------
# 2. Stochastic Gradient Descent (SGDRegressor, batch_size=1)
# ----------------------
model_sgd = SGDRegressor(learning_rate='constant', eta0=0.001, max_iter=1, tol=None, random_state=42)

# 반복적으로 한 샘플씩 학습
for epoch in range(100):
    for i in range(len(X_train)):
        model_sgd.partial_fit(X_train[i:i+1], y_train[i:i+1])

y_pred_sgd = model_sgd.predict(X_test)
mse_sgd = mean_squared_error(y_test, y_pred_sgd)

# ----------------------
# 3. Mini-batch Gradient Descent (SGDRegressor, batch_size=32)
# ----------------------
model_minibatch = SGDRegressor(learning_rate='constant', eta0=0.001, max_iter=1, tol=None, random_state=42)

batch_size = 32
n_batches = int(np.ceil(len(X_train) / batch_size))

for epoch in range(100):
    indices = np.random.permutation(len(X_train))
    X_train_shuffled = X_train[indices]
    y_train_shuffled = y_train[indices]
    for i in range(n_batches):
        start = i * batch_size
        end = start + batch_size
        X_batch = X_train_shuffled[start:end]
        y_batch = y_train_shuffled[start:end]
        model_minibatch.partial_fit(X_batch, y_batch)

y_pred_minibatch = model_minibatch.predict(X_test)
mse_minibatch = mean_squared_error(y_test, y_pred_minibatch)

# ----------------------
# 결과 비교
# ----------------------
print(f"Full-batch MSE     : {mse_full:.4f}")
print(f"SGD MSE (batch=1)  : {mse_sgd:.4f}")
print(f"Mini-batch MSE (32): {mse_minibatch:.4f}")


In [None]:
import matplotlib.pyplot as plt

# 예측 vs 실제값 시각화 함수
def plot_predictions(y_test, preds, labels):
    plt.figure(figsize=(18, 5))

    for i, (y_pred, label) in enumerate(zip(preds, labels)):
        plt.subplot(1, 3, i + 1)
        plt.scatter(y_test, y_pred, alpha=0.5)
        plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')  # y=x 선
        plt.xlabel("True Values")
        plt.ylabel("Predictions")
        plt.title(f"{label} Prediction")

    plt.tight_layout()
    plt.show()

# 시각화 실행
plot_predictions(
    y_test,
    [y_pred_full, y_pred_sgd, y_pred_minibatch],
    ["Full-batch", "SGD (batch=1)", "Mini-batch (32)"]
)


"예측 결과만 보면 세 방법 모두 학습을 잘 했기 때문에 그래프가 비슷하게 나옵니다.
하지만 학습하는 과정에서의 차이는 분명히 존재하고, 특히 데이터가 커지거나 모델이 복잡해질수록
'속도', '안정성', '자원 사용량'에서 큰 차이를 보이게 됩니다.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import time

# 1. 데이터 생성
X, y = make_regression(n_samples=3000, n_features=20, noise=15, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 학습 파라미터
epochs = 50
eta0 = 0.001

def run_training(name, batch_size):
    model = SGDRegressor(learning_rate='constant', eta0=eta0, max_iter=1, tol=None, random_state=42)
    n = len(X_train)
    losses = []
    start = time.time()

    for epoch in range(epochs):
        indices = np.random.permutation(n)
        X_shuffled = X_train[indices]
        y_shuffled = y_train[indices]
        epoch_loss = 0
        count = 0

        for i in range(0, n, batch_size):
            X_batch = X_shuffled[i:i + batch_size]
            y_batch = y_shuffled[i:i + batch_size]
            model.partial_fit(X_batch, y_batch)

            # 예측 및 손실 계산
            y_pred = model.predict(X_batch)
            loss = mean_squared_error(y_batch, y_pred)
            epoch_loss += loss
            count += 1

        losses.append(epoch_loss / count)

    end = time.time()
    total_time = end - start
    return losses, total_time

# Run Full-batch (batch_size = 전체 데이터 수)
loss_full, time_full = run_training("Full-batch", batch_size=len(X_train))

# Run SGD (batch_size = 1)
loss_sgd, time_sgd = run_training("SGD", batch_size=1)

# Run Mini-batch (batch_size = 32)
loss_mini, time_mini = run_training("Mini-batch", batch_size=32)

# -------------------------------
# 📈 그래프 1: Loss vs Epoch
# -------------------------------
plt.figure(figsize=(10, 6))
plt.plot(loss_full, label=f"Full-batch ({time_full:.2f}s)")
plt.plot(loss_sgd, label=f"SGD (batch=1) ({time_sgd:.2f}s)")
plt.plot(loss_mini, label=f"Mini-batch (batch=32) ({time_mini:.2f}s)")
plt.xlabel("Epoch")
plt.ylabel("Mean Squared Error (Loss)")
plt.title("Loss vs Epoch (Training Stability & Speed)")
plt.legend()
plt.grid(True)
plt.show()

# -------------------------------
# 📋 결과 출력
# -------------------------------
print("총 학습 시간:")
print(f"Full-batch : {time_full:.2f}초")
print(f"SGD        : {time_sgd:.2f}초")
print(f"Mini-batch : {time_mini:.2f}초")


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import time

# 1. 데이터 준비
X, y = make_regression(n_samples=2000, n_features=10, noise=20, random_state=0)
X_train, _, y_train, _ = train_test_split(X, y, test_size=0.2, random_state=0)

# 2. 학습 함수
def train_model(batch_size, eta0, epochs=50):
    model = SGDRegressor(learning_rate='constant', eta0=eta0, max_iter=1, tol=None, random_state=0)
    losses = []
    n = len(X_train)

    for epoch in range(epochs):
        indices = np.random.permutation(n)
        X_shuffled = X_train[indices]
        y_shuffled = y_train[indices]
        epoch_loss = 0
        count = 0

        for i in range(0, n, batch_size):
            X_batch = X_shuffled[i:i+batch_size]
            y_batch = y_shuffled[i:i+batch_size]
            model.partial_fit(X_batch, y_batch)
            y_pred = model.predict(X_batch)
            loss = mean_squared_error(y_batch, y_pred)
            epoch_loss += loss
            count += 1
        losses.append(epoch_loss / count)
    return losses

# 3. 실험 조합: (batch_size, eta0)
configs = [
    (1, 0.0001),   # 작은 배치 + 작은 학습률
    (1, 0.01),     # 작은 배치 + 큰 학습률
    (32, 0.001),   # 중간 배치 + 중간 학습률
    (len(X_train), 0.001),  # Full-batch + 중간 학습률
    (len(X_train), 0.05),   # Full-batch + 큰 학습률
]

# 4. 학습 및 그래프 시각화
plt.figure(figsize=(12, 7))

for batch_size, eta0 in configs:
    losses = train_model(batch_size, eta0)
    label = f"Batch={batch_size}, LR={eta0}"
    plt.plot(losses, label=label)

plt.title("Loss vs Epoch (Learning Rate vs Batch Size)")
plt.xlabel("Epoch")
plt.ylabel("Loss (MSE)")
plt.grid(True)
plt.legend()
plt.show()


시각화 해석 가이드
진동이 큰 그래프: 작은 배치, 큰 학습률

느리게 수렴하는 그래프: 작은 학습률

빠르고 안정적으로 수렴: 중간 배치 + 적당한 학습률

Full-batch는 매우 부드러운 수렴, 그러나 느릴 수 있음