In [None]:
from ISLP import load_data
from ISLP.models import (ModelSpec as MS,
                         summarize,
                         poly)

In [None]:
import ISLP
import numpy as np
import pandas as pd
from matplotlib.pyplot import subplots

import statsmodels.api as sm



#1 Simple Linear Regression using Boston data

In [None]:
Boston = load_data("Boston")
Boston.columns

Index(['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax',
       'ptratio', 'lstat', 'medv'],
      dtype='object')

In [None]:
design = MS(['lstat'])
X = design.fit_transform(Boston)
y = Boston['medv']
X

Unnamed: 0,intercept,lstat
0,1.0,4.98
1,1.0,9.14
2,1.0,4.03
3,1.0,2.94
4,1.0,5.33
...,...,...
501,1.0,9.67
502,1.0,9.08
503,1.0,5.64
504,1.0,6.48


Now we do standardization on lstat in X matrix.

In [None]:
lstat_mean = np.mean(X["lstat"])
lstat_std = np.std(X["lstat"])
X["lstat"] = ( X["lstat"]- lstat_mean) / lstat_std
X

Unnamed: 0,intercept,lstat
0,1.0,-1.075562
1,1.0,-0.492439
2,1.0,-1.208727
3,1.0,-1.361517
4,1.0,-1.026501
...,...,...
501,1.0,-0.418147
502,1.0,-0.500850
503,1.0,-0.983048
504,1.0,-0.865302


Equivalently, you can use the following code to create the design matrix `X`.

```
lstat = np.array(Boston["lstat"])
lstat = (lstat-np.mean(lstat))/np.std(lstat)
print(Boston.shape)
n = Boston.shape[0]
ones = np.ones([n])
X = np.stack([ones, lstat], axis=1 )
y = Boston['medv']
```

In [None]:
model = sm.OLS(y, X)
results = model.fit()

In [None]:
summarize(results)

Unnamed: 0,coef,std err,t,P>|t|
intercept,22.5328,0.276,81.545,0.0
lstat,-6.7777,0.276,-24.528,0.0


In [None]:
new_predictions = results.get_prediction(X)

In [None]:
yhat = new_predictions.predicted_mean
yhat.shape

(506,)

In [None]:
np.mean((y-yhat)**2)

38.48296722989415

In [None]:
np.mean((y-np.mean(y))**2)

84.41955615616556

# Exercise 1.1:

Do the estimatsion procedure using `torch.nn.Linear(1,1, bias=True)`. Answer the following questions.

i. Calculate $\widehat{Y}$ when `lstat`s are given by 5, 10, 15, respectively.

ii. Calculate $R^2$.

iii. Calculate AIC.

# Exercise 1.2:

Do the estimatsion procedure using `torch.nn.Linear(2,1, bias=False)`.Answer the following questions.

i. Calculate $\widehat{Y}$ when `lstat`s are given by 5, 10, 15, respectively.

ii. Calculate $R^2$.

iii. Calculate AIC.

# Exercise 1.3:

Do the estimatsion procedure using `torch.nn.Linear(1,1, bias=False)` and an additional bias parameter defined by `torch.tensor`. Model itself should be the same as in Exercise 1.1 and 1.2. (Caution: However, the estimated parameter(s) should not be the exactly the same due to the randomness in the parameter initialization and batch learning.)

Answer the following questions.

i. Calculate $\widehat{Y}$ when `lstat`s are given by 5, 10, 15, respectively.

ii. Calculate $R^2$.

iii. Calculate AIC.

In [None]:
#Ex1.1
import torch
import torch.nn as nn
import numpy as np
from ISLP import load_data
from sklearn.metrics import r2_score
import math


Boston = load_data("Boston")
X = Boston[['lstat']].values  # x= lstats
y = Boston['medv'].values # y= medv
X_tensor = torch.tensor(X, dtype=torch.float32) # numpy -> tensor 변환
y_tensor = torch.tensor(y, dtype=torch.float32).view(-1, 1)

model = torch.nn.Linear(1, 1, bias=True) #모델 정의 (1차원 입력, 1차원 출력, bias 사용)
criterion = nn.MSELoss()  # MSE 손실 함수
optimizer = torch.optim.SGD(model.parameters(), lr=0.001) # SGD 옵티마이저

#학습
epochs = 10000
for epoch in range(epochs):
    optimizer.zero_grad()
    y_pred = model(X_tensor) # 예측값 계산
    loss = criterion(y_pred, y_tensor) # 손실 계산
    loss.backward()
    optimizer.step()

lstats_new = torch.tensor([[5.0], [10.0], [15.0]]) # 예측할 lstats 값 (5, 10, 15)

y_hat_new = model(lstats_new)# 예측값 계산
print(f"Predicted Y values : {y_hat_new}")

y_hat_all = model(X_tensor).detach().numpy() # 전체 데이터에 대한 예측값 계산

# R^2 계산
Y_train_pred = model(X_tensor)
SS_res = ((y_tensor - Y_train_pred) ** 2).sum()
SS_tot = ((y_tensor - y_tensor.mean()) ** 2).sum()
R2 = 1 - SS_res / SS_tot
print("R^2:", R2.item())

# AIC 계산
n = len(y_tensor)
k = 2  # 파라미터는 w, b
AIC = 2 * k + n * torch.log(SS_res / n)
print("AIC:", AIC.item())


Predicted Y values : tensor([[29.6031],
        [24.9389],
        [20.2746]], grad_fn=<AddmmBackward0>)
R^2: 0.5439119338989258
AIC: 1851.269287109375


In [None]:
#EX1-2
import torch
import torch.nn as nn
import numpy as np
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

# lstat^2 추가
X = Boston[['lstat']].values
X_squared = X ** 2  # lstat^2
X_combined = np.hstack((X, X_squared))  # lstat와 lstat^2 결합
y = Boston['medv'].values

# 데이터 정규화 #lstat 값과 medv 값을 정규화
scaler_x = StandardScaler()
scaler_y = StandardScaler()
X_scaled = scaler_x.fit_transform(X_combined)  # 입력 데이터 정규화
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))  # 출력 데이터 정규화

# 텐서로 변환
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y_scaled, dtype=torch.float32).view(-1, 1)

# 모델 정의 (입력 2차원, 출력 1차원, bias=False)
model = torch.nn.Linear(2, 1, bias=False)

# 손실 함수, 옵티마이저 설정
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

#학습
epochs = 10000
for epoch in range(epochs):
    optimizer.zero_grad()
    y_pred = model(X_tensor)
    loss = criterion(y_pred, y_tensor)
    loss.backward()
    optimizer.step()

# lstats가 각각 5, 10, 15일 때의 예측값 계산 (lstat^2 추가해서)
lstats_new = torch.tensor([[5.0, 5.0**2], [10.0, 10.0**2], [15.0, 15.0**2]], dtype=torch.float32)
y_hat_new = model(lstats_new)
print(f"Predicted Y values: {y_hat_new}")

# 전체 데이터에 대한 예측값 계산
y_hat_all = model(X_tensor).detach().numpy()

# R^2 계산
Y_train_pred = model(X_tensor)
SS_res = ((y_tensor - Y_train_pred) ** 2).sum()
SS_tot = ((y_tensor - y_tensor.mean()) ** 2).sum()
R2 = 1 - SS_res / SS_tot
print("R^2:", R2.item())

# AIC 계산
n = len(y_tensor)
k = 2  # 파라미터 2개
AIC = 2 * k + n * torch.log(SS_res / n)
print("AIC:", AIC.item())





Predicted Y values: tensor([[ 0.3190],
        [10.1054],
        [29.3592]], grad_fn=<MmBackward0>)
R^2: 0.5727543234825134
AIC: -426.30047607421875


In [None]:
#EX1-3
import torch
import torch.nn as nn
import numpy as np
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

X = Boston[['lstat']].values
y = Boston['medv'].values

# 데이터 정규화
scaler_x = StandardScaler()
scaler_y = StandardScaler()
X_scaled = scaler_x.fit_transform(X)  # 입력 데이터 정규화
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))  # 출력 데이터 정규화

# 텐서로 변환
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y_scaled, dtype=torch.float32).view(-1, 1)

# 모델 정의 (입력 1차원, 출력 1차원, bias=False)
model = torch.nn.Linear(1, 1, bias=False)

# bias 파라미터 직접 정의
bias = torch.tensor([0.0], dtype=torch.float32, requires_grad=True)

# 손실 함수, 옵티마이저 설정
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(list(model.parameters()) + [bias], lr=0.0001)

# 학습
epochs = 10000
for epoch in range(epochs):
    optimizer.zero_grad()
    y_pred = model(X_tensor) + bias  # bias 추가
    loss = criterion(y_pred, y_tensor)
    loss.backward()
    optimizer.step()

# lstats가 각각 5, 10, 15일 때의 예측값 계산
lstats_new = np.array([[5.0], [10.0], [15.0]])
lstats_new_scaled = scaler_x.transform(lstats_new)  # 새로운 값도 정규화
lstats_new_tensor = torch.tensor(lstats_new_scaled, dtype=torch.float32)
y_hat_new_scaled = model(lstats_new_tensor) + bias

# 예측값을 다시 원래 스케일로 변환
y_hat_new = scaler_y.inverse_transform(y_hat_new_scaled.detach().numpy())
print(f"Predicted Y values: {y_hat_new}")

# 전체 데이터에 대한 예측값 계산
y_hat_all_scaled = model(X_tensor).detach().numpy() + bias.detach().numpy()
y_hat_all = scaler_y.inverse_transform(y_hat_all_scaled)

# R² 계산
R2 = r2_score(y, y_hat_all)
print(f"R²: {R2}")

# AIC 계산
n = len(y)
k = 1  #파라미터 수 (lstat)
residuals = y - y_hat_all.flatten()
RSS = np.sum(residuals ** 2)  # 잔차 제곱합
AIC = n * np.log(RSS / n) + 2 * k
print(f"AIC: {AIC}")


Predicted Y values: [[29.508688]
 [24.951113]
 [20.393538]]
R²: 0.5432510902454406
AIC: 1850.001871714063


#2. Multiple regression

In [None]:
Boston = load_data("Boston")
Boston.columns

Index(['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax',
       'ptratio', 'lstat', 'medv'],
      dtype='object')

In [None]:
design = MS(['lstat', "age"])
X = design.fit_transform(Boston)
y = Boston['medv']
X

Unnamed: 0,intercept,lstat,age
0,1.0,4.98,65.2
1,1.0,9.14,78.9
2,1.0,4.03,61.1
3,1.0,2.94,45.8
4,1.0,5.33,54.2
...,...,...,...
501,1.0,9.67,69.1
502,1.0,9.08,76.7
503,1.0,5.64,91.0
504,1.0,6.48,89.3


Now we do standardization on lstat in X matrix.

In [None]:
lstat_mean = np.mean(X["lstat"])
lstat_std = np.std(X["lstat"])
age_mean = np.mean(X["age"])
age_std = np.std(X["age"])

X["lstat"] = ( X["lstat"]- lstat_mean) / lstat_std
X["age"] = ( X["age"]- age_mean) / age_std

In [None]:
X

Unnamed: 0,intercept,lstat,age
0,1.0,-1.075562,-0.120013
1,1.0,-0.492439,0.367166
2,1.0,-1.208727,-0.265812
3,1.0,-1.361517,-0.809889
4,1.0,-1.026501,-0.511180
...,...,...,...
501,1.0,-0.418147,0.018673
502,1.0,-0.500850,0.288933
503,1.0,-0.983048,0.797449
504,1.0,-0.865302,0.736996


The same design matrix `X` can be obtained from the following code.

```
lstat = np.array(Boston["lstat"])
lstat = (lstat-np.mean(lstat))/np.std(lstat)

age = np.array(Boston["age"])
age = (lstat-np.mean(age))/np.std(age)

X = np.stack([ones, lstat, age], axis=1)
X.shape
X
```

In [None]:
model = sm.OLS(y, X)
results = model.fit()
summarize(results)

Unnamed: 0,coef,std err,t,P>|t|
intercept,22.5328,0.274,82.108,0.0
lstat,-7.3628,0.344,-21.416,0.0
age,0.9714,0.344,2.826,0.005


## 2.1 Exercise:

Using the standardizied `Boston` data answer the following questions.

(a) Using pytorch and customized layer, calculate the train MSE of the following model:

`medv ~ age + lstat`

(b) Using pytorch and customized layer, calculate the train MSE of the following model:

`medv ~ age + lstat + lstat**2`

(c) Using pytorch and customized layer, calculate $R^2$ of each model in (a) and (b).

(d) Repeat (a) and (b) by making use of 70% of data as train set and the remaining 30% as test set.


In [None]:
import torch
import torch.nn as nn
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


X = Boston[['age', 'lstat']].values
X_with_lstat2 = np.hstack((X, (X[:, 1]**2).reshape(-1, 1)))  # lstat^2 추가한 데이터
y = Boston['medv'].values

#정규화
scaler_x = StandardScaler()
scaler_y = StandardScaler()

X_scaled = scaler_x.fit_transform(X)  # age + lstat 정규화
X_with_lstat2_scaled = scaler_x.fit_transform(X_with_lstat2)  # age + lstat + lstat^2 정규화
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

# 텐서로 변환
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
X_with_lstat2_tensor = torch.tensor(X_with_lstat2_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y_scaled, dtype=torch.float32).view(-1, 1)

#모델정의
class CustomLinearModel(nn.Module):
    def __init__(self, input_size):
        super(CustomLinearModel, self).__init__()
        self.linear = nn.Linear(input_size, 1, bias=True)

    def forward(self, x):
        return self.linear(x)

# 손실 함수 정의
criterion = nn.MSELoss()

# 학습 함수 정의
def train_model(model, X_train, y_train, epochs=10000, lr=0.001):
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    for epoch in range(epochs):
        optimizer.zero_grad() #기울기 초기화
        y_pred = model(X_train)
        loss = criterion(y_pred, y_train)
        loss.backward() #손실의 기울기 계산
        optimizer.step() #파라미터 업데이트
    return model

# 평가 함수 정의
def evaluate_model(model, X_train, y_train, X_test=None, y_test=None):
    model.eval()

    # 훈련 세트 예측값 계산
    y_pred_train = model(X_train).detach().numpy()
    y_train_inv = scaler_y.inverse_transform(y_train.detach().numpy())
    y_pred_train_inv = scaler_y.inverse_transform(y_pred_train)

    # MSE 및 R² 계산
    mse_train = mean_squared_error(y_train_inv, y_pred_train_inv)
    r2_train = r2_score(y_train_inv, y_pred_train_inv)

    print(f"Train MSE: {mse_train}, Train R²: {r2_train}")

    if X_test is not None and y_test is not None:
        # 테스트 세트 예측값 계산
        y_pred_test = model(X_test).detach().numpy()
        y_test_inv = scaler_y.inverse_transform(y_test.detach().numpy())
        y_pred_test_inv = scaler_y.inverse_transform(y_pred_test)

        # MSE 및 R² 계산
        mse_test = mean_squared_error(y_test_inv, y_pred_test_inv)
        r2_test = r2_score(y_test_inv, y_pred_test_inv)

        print(f"Test MSE: {mse_test}, Test R²: {r2_test}")
    return mse_train, r2_train

# (a) medv ~ age + lstat 모델 학습 및 MSE, R² 계산
model_a = CustomLinearModel(2)  # age + lstat
model_a = train_model(model_a, X_tensor, y_tensor)
print("\nModel (a): medv ~ age + lstat")
evaluate_model(model_a, X_tensor, y_tensor)

# (b) medv ~ age + lstat + lstat^2 모델 학습 및 MSE, R² 계산
model_b = CustomLinearModel(3)  # age + lstat + lstat^2
model_b = train_model(model_b, X_with_lstat2_tensor, y_tensor)
print("\nModel (b): medv ~ age + lstat + lstat^2")
evaluate_model(model_b, X_with_lstat2_tensor, y_tensor)

# 70% 훈련, 30% 테스트 데이터로 분할
X_train_a, X_test_a, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.3, random_state=42)
X_train_b, X_test_b, _, _ = train_test_split(X_with_lstat2_scaled, y_scaled, test_size=0.3, random_state=42)

# 텐서로 변환
X_train_a_tensor = torch.tensor(X_train_a, dtype=torch.float32)
X_test_a_tensor = torch.tensor(X_test_a, dtype=torch.float32)
X_train_b_tensor = torch.tensor(X_train_b, dtype=torch.float32)
X_test_b_tensor = torch.tensor(X_test_b, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# (d) medv ~ age + lstat 모델 평가 (70% 훈련, 30% 테스트)
model_a = CustomLinearModel(2)
model_a = train_model(model_a, X_train_a_tensor, y_train_tensor)
print("\nModel (a) with 70% train / 30% test: medv ~ age + lstat")
evaluate_model(model_a, X_train_a_tensor, y_train_tensor, X_test_a_tensor, y_test_tensor)

# (d) medv ~ age + lstat + lstat^2 모델 평가 (70% 훈련, 30% 테스트)
model_b = CustomLinearModel(3)
model_b = train_model(model_b, X_train_b_tensor, y_train_tensor)
print("\nModel (b) with 70% train / 30% test: medv ~ age + lstat + lstat^2")
evaluate_model(model_b, X_train_b_tensor, y_train_tensor, X_test_b_tensor, y_test_tensor)



Model (a): medv ~ age + lstat
Train MSE: 37.88167953491211, Train R²: 0.5512689352035522

Model (b): medv ~ age + lstat + lstat^2
Train MSE: 31.246862411499023, Train R²: 0.6298623085021973

Model (a) with 70% train / 30% test: medv ~ age + lstat
Train MSE: 38.365665435791016, Train R²: 0.5635132789611816
Test MSE: 37.1190299987793, Test R²: 0.5018454790115356

Model (b) with 70% train / 30% test: medv ~ age + lstat + lstat^2
Train MSE: 31.60544204711914, Train R²: 0.6404244899749756
Test MSE: 30.86101722717285, Test R²: 0.5858309268951416


(31.605442, 0.6404244899749756)

#3. Qualitative predictors

In [None]:
Carseats = load_data('Carseats')
Carseats.columns

Index(['Sales', 'CompPrice', 'Income', 'Advertising', 'Population', 'Price',
       'ShelveLoc', 'Age', 'Education', 'Urban', 'US'],
      dtype='object')

In [None]:
Carseats.head(5)

Unnamed: 0,Sales,CompPrice,Income,Advertising,Population,Price,ShelveLoc,Age,Education,Urban,US
0,9.5,138,73,11,276,120,Bad,42,17,Yes,Yes
1,11.22,111,48,16,260,83,Good,65,10,Yes,Yes
2,10.06,113,35,10,269,80,Medium,59,12,Yes,Yes
3,7.4,117,100,4,466,97,Medium,55,14,Yes,Yes
4,4.15,141,64,3,340,128,Bad,38,13,Yes,No


In [None]:
my_vars = list(Carseats.columns.drop('Sales'))
y = Carseats['Sales']
X = MS(["Advertising", "ShelveLoc"]).fit_transform(Carseats)
X

Unnamed: 0,intercept,Advertising,ShelveLoc[Good],ShelveLoc[Medium]
0,1.0,11,0.0,0.0
1,1.0,16,1.0,0.0
2,1.0,10,0.0,1.0
3,1.0,4,0.0,1.0
4,1.0,3,0.0,0.0
...,...,...,...,...
395,1.0,17,1.0,0.0
396,1.0,3,0.0,1.0
397,1.0,12,0.0,1.0
398,1.0,7,0.0,0.0


In [None]:
adv_mean = np.mean(X["Advertising"])
adv_std = np.std(X["Advertising"])

X["Advertising"] = ( X["Advertising"]- adv_mean) / adv_std


The similar design matrix can be obtained from the following code:

```
ones = np.ones(Carseats.shape[0])
X = np.stack([ones, Carseats["Advertising"], Carseats["ShelveLoc"]], axis=1)
X.shape
X

my_dummy = pd.get_dummies(Carseats.ShelveLoc)
my_dummy

X = np.stack([ones, Carseats["Advertising"], my_dummy["Bad"], my_dummy["Good"]], axis=1)
X.shape
X

```

In [None]:

model = sm.OLS(y, X)
results = model.fit()
summarize(results)

Unnamed: 0,coef,std err,t,P>|t|
intercept,5.5648,0.229,24.284,0.0
Advertising,0.6689,0.112,5.951,0.0
ShelveLoc[Good],4.5769,0.335,13.671,0.0
ShelveLoc[Medium],1.7514,0.275,6.375,0.0


## 3.1 Exercise:

Using `Carseats` data answer the following questions.

(a) Using pytorch, calculate the train MSE of the following model:

`Sales ~ Advertising + ShelveLoc`

(b) Using pytorch, calculate the train MSE of the following model:

`Sales ~ Advertising`

(c) Calculate $R^2$ of each model in (a) and (b).

(d) Repeat (a) and (b) by making use of 70% of data as train set and the remaining 30% as test set.

In [None]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error, r2_score


Carseats = load_data('Carseats')

# ShelveLoc는 범주형변수임. -> 원-핫 인코딩 -> 수치형 변수로 변환해줌
encoder = OneHotEncoder(drop='first')  # 첫 번째 카테고리는 drop
ShelveLoc_encoded = encoder.fit_transform(Carseats[['ShelveLoc']]).toarray()

# Advertising 변수 및 인코딩된 ShelveLoc 병합
X_a = np.hstack((Carseats[['Advertising']].values, ShelveLoc_encoded))
X_b = Carseats[['Advertising']].values

y = Carseats['Sales'].values

# 데이터 정규화
scaler_x = StandardScaler()
scaler_y = StandardScaler()

X_a_scaled = scaler_x.fit_transform(X_a)
X_b_scaled = scaler_x.fit_transform(X_b)
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

# 텐서로 변환
X_a_tensor = torch.tensor(X_a_scaled, dtype=torch.float32)
X_b_tensor = torch.tensor(X_b_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y_scaled, dtype=torch.float32).view(-1, 1)

# 모델 정의
class CustomLinearModel(nn.Module):
    def __init__(self, input_size):
        super(CustomLinearModel, self).__init__()
        self.linear = nn.Linear(input_size, 1, bias=True)

    def forward(self, x):
        return self.linear(x)

# 손실 함수  정의
criterion = nn.MSELoss()

# 학습 함수 정의
def train_model(model, X_train, y_train, epochs=10000, lr=0.001):
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    for epoch in range(epochs):
        optimizer.zero_grad()
        y_pred = model(X_train)
        loss = criterion(y_pred, y_train)
        loss.backward()
        optimizer.step()
    return model

#(C) 평가 함수 정의
def evaluate_model(model, X_train, y_train, X_test=None, y_test=None):
    model.eval()

    # 훈련 세트 예측값 계산
    y_pred_train = model(X_train).detach().numpy()
    y_train_inv = scaler_y.inverse_transform(y_train.detach().numpy())
    y_pred_train_inv = scaler_y.inverse_transform(y_pred_train)

    # MSE 및 R² 계산
    mse_train = mean_squared_error(y_train_inv, y_pred_train_inv)
    r2_train = r2_score(y_train_inv, y_pred_train_inv)

    print(f"Train MSE: {mse_train}, Train R²: {r2_train}")

    if X_test is not None and y_test is not None:
        # 테스트 세트 예측값 계산
        y_pred_test = model(X_test).detach().numpy()
        y_test_inv = scaler_y.inverse_transform(y_test.detach().numpy())
        y_pred_test_inv = scaler_y.inverse_transform(y_pred_test)

        # MSE 및 R² 계산
        mse_test = mean_squared_error(y_test_inv, y_pred_test_inv)
        r2_test = r2_score(y_test_inv, y_pred_test_inv)

        print(f"Test MSE: {mse_test}, Test R²: {r2_test}")
    return mse_train, r2_train

# (a) Sales ~ Advertising + ShelveLoc 모델 학습 및 평가
model_a = CustomLinearModel(X_a_tensor.shape[1])
model_a = train_model(model_a, X_a_tensor, y_tensor)
print("\nModel (a): Sales ~ Advertising + ShelveLoc")
evaluate_model(model_a, X_a_tensor, y_tensor)

# (b) Sales ~ Advertising 모델 학습 및 평가
model_b = CustomLinearModel(X_b_tensor.shape[1])
model_b = train_model(model_b, X_b_tensor, y_tensor)
print("\nModel (b): Sales ~ Advertising")
evaluate_model(model_b, X_b_tensor, y_tensor)

# 70% 훈련, 30% 테스트 데이터로 분할
X_train_a, X_test_a, y_train, y_test = train_test_split(X_a_scaled, y_scaled, test_size=0.3, random_state=42)
X_train_b, X_test_b, _, _ = train_test_split(X_b_scaled, y_scaled, test_size=0.3, random_state=42)

# 텐서로 변환
X_train_a_tensor = torch.tensor(X_train_a, dtype=torch.float32)
X_test_a_tensor = torch.tensor(X_test_a, dtype=torch.float32)
X_train_b_tensor = torch.tensor(X_train_b, dtype=torch.float32)
X_test_b_tensor = torch.tensor(X_test_b, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# (d) Sales ~ Advertising + ShelveLoc 모델 평가 (70% 훈련, 30% 테스트)
model_a = CustomLinearModel(X_train_a_tensor.shape[1])
model_a = train_model(model_a, X_train_a_tensor, y_train_tensor)
print("\nModel (a) with 70% train / 30% test: Sales ~ Advertising + ShelveLoc")
evaluate_model(model_a, X_train_a_tensor, y_train_tensor, X_test_a_tensor, y_test_tensor)

# (d) Sales ~ Advertising 모델 평가 (70% 훈련, 30% 테스트)
model_b = CustomLinearModel(X_train_b_tensor.shape[1])
model_b = train_model(model_b, X_train_b_tensor, y_train_tensor)
print("\nModel (b) with 70% train / 30% test: Sales ~ Advertising")
evaluate_model(model_b, X_train_b_tensor, y_train_tensor, X_test_b_tensor, y_test_tensor)



Model (a): Sales ~ Advertising + ShelveLoc
Train MSE: 4.985980987548828, Train R²: 0.3732808828353882

Model (b): Sales ~ Advertising
Train MSE: 7.377833843231201, Train R²: 0.07263398170471191

Model (a) with 70% train / 30% test: Sales ~ Advertising + ShelveLoc
Train MSE: 5.140975475311279, Train R²: 0.32930201292037964
Test MSE: 4.741320610046387, Test R²: 0.44086605310440063

Model (b) with 70% train / 30% test: Sales ~ Advertising
Train MSE: 7.020278453826904, Train R²: 0.084125816822052
Test MSE: 8.2614164352417, Test R²: 0.02574855089187622


(7.0202785, 0.084125816822052)