### [Pytorch 기반 회귀 모델 구현]
- Layer => Full-Connected Layer, Linear
- 손실함수 => MSELoss, MAELoss,...

- [1] 데이터 준비 <hr>

In [220]:
# 모듈로딩
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [221]:
# csv 파일 불러오기
file = '../data/BostonHousing.csv'
bostonDF = pd.read_csv(file)
bostonDF.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [222]:
#  csv파일 정보 확인
bostonDF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 14 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   crim     506 non-null    float64
 1   zn       506 non-null    float64
 2   indus    506 non-null    float64
 3   chas     506 non-null    int64  
 4   nox      506 non-null    float64
 5   rm       506 non-null    float64
 6   age      506 non-null    float64
 7   dis      506 non-null    float64
 8   rad      506 non-null    int64  
 9   tax      506 non-null    int64  
 10  ptratio  506 non-null    float64
 11  b        506 non-null    float64
 12  lstat    506 non-null    float64
 13  medv     506 non-null    float64
dtypes: float64(11), int64(3)
memory usage: 55.5 KB


In [223]:
# data & label 분리
#label : MEDV / data : 그 외 나머지 전부
labelSR= bostonDF['medv']
dataDF = bostonDF.drop(columns=['medv']) # 혹은 columns=[-1]
labelSR.head()

0    24.0
1    21.6
2    34.7
3    33.4
4    36.2
Name: medv, dtype: float64

In [224]:
# 훈련 데이터와 검증 데이터로 나누기
X_train, X_valid, y_train, y_valid = train_test_split(dataDF,labelSR, test_size=0.1, random_state=42)
print(X_train.shape, y_train.shape, X_valid.shape, y_valid.shape)

# 텐서로 변환
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
X_valid_tensor = torch.tensor(X_valid.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
y_valid_tensor = torch.tensor(y_valid.values, dtype=torch.float32)

(455, 13) (455,) (51, 13) (51,)


In [219]:
# 딥러닝 모델 정의
class BostonHousingModel(nn.Module):
    def __init__(self):
        super(BostonHousingModel, self).__init__()
        self.fc1 = nn.Linear(13, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [210]:
# 모델 인스턴스
model = BostonHousingModel()

# 옵티마이저 및 손실 함수 정의
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

# 데이터로더 생성
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_valid_tensor,y_valid_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset,batch_size=32, shuffle=False)

In [211]:
for dataDF, labelSR in train_loader:
    print(dataDF.shape, labelSR.shape)

torch.Size([32, 13]) torch.Size([32])
torch.Size([32, 13]) torch.Size([32])
torch.Size([32, 13]) torch.Size([32])
torch.Size([32, 13]) torch.Size([32])
torch.Size([32, 13]) torch.Size([32])
torch.Size([32, 13]) torch.Size([32])
torch.Size([32, 13]) torch.Size([32])
torch.Size([32, 13]) torch.Size([32])
torch.Size([32, 13]) torch.Size([32])
torch.Size([32, 13]) torch.Size([32])
torch.Size([32, 13]) torch.Size([32])
torch.Size([32, 13]) torch.Size([32])
torch.Size([32, 13]) torch.Size([32])
torch.Size([32, 13]) torch.Size([32])
torch.Size([7, 13]) torch.Size([7])


In [214]:
# 학습 함수 정의
def train_model(model, train_loader, optimizer, criterion, epochs=100):
    model.train()
    for epoch in range(epochs):
        epoch_loss = 0.0
        for inputs, targets in train_loader:
               optimizer.zero_grad()
               outputs = model(inputs)
               loss = criterion(outputs, targets.unsqueeze(1))
               loss.backward()
               optimizer.step()
               epoch_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss / len(train_loader):.2f}")


In [215]:
# 모델 학습 
train_model(model, train_loader, optimizer, criterion, epochs=1000)

Epoch 1/1000, Loss: 6.35
Epoch 2/1000, Loss: 5.83
Epoch 3/1000, Loss: 7.07
Epoch 4/1000, Loss: 5.96
Epoch 5/1000, Loss: 7.25
Epoch 6/1000, Loss: 6.38
Epoch 7/1000, Loss: 5.54
Epoch 8/1000, Loss: 5.81
Epoch 9/1000, Loss: 5.76
Epoch 10/1000, Loss: 5.56
Epoch 11/1000, Loss: 6.06
Epoch 12/1000, Loss: 6.50
Epoch 13/1000, Loss: 6.03
Epoch 14/1000, Loss: 6.07
Epoch 15/1000, Loss: 5.52
Epoch 16/1000, Loss: 6.22
Epoch 17/1000, Loss: 5.73
Epoch 18/1000, Loss: 9.03
Epoch 19/1000, Loss: 9.15
Epoch 20/1000, Loss: 6.54
Epoch 21/1000, Loss: 7.31
Epoch 22/1000, Loss: 6.97
Epoch 23/1000, Loss: 7.38
Epoch 24/1000, Loss: 7.28
Epoch 25/1000, Loss: 6.77
Epoch 26/1000, Loss: 6.68
Epoch 27/1000, Loss: 5.77
Epoch 28/1000, Loss: 5.83
Epoch 29/1000, Loss: 6.58
Epoch 30/1000, Loss: 5.75
Epoch 31/1000, Loss: 6.45
Epoch 32/1000, Loss: 5.72
Epoch 33/1000, Loss: 6.47
Epoch 34/1000, Loss: 5.65
Epoch 35/1000, Loss: 5.58
Epoch 36/1000, Loss: 5.91
Epoch 37/1000, Loss: 6.85
Epoch 38/1000, Loss: 5.74
Epoch 39/1000, Loss: 

결론은 Adam이 최고다,,

In [216]:
# 테스트 세트 평가
model.eval()
with torch.no_grad():
    test_loss = 0.0
    for inputs, targets in test_loader:
        outputs = model(inputs)
        loss = criterion(outputs, targets.unsqueeze(1))
        test_loss += loss.item()
    print(f'Test Loss: {test_loss/len(test_loader):.4f}')

Test Loss: 14.3900


In [226]:
# 모델 저장
model =BostonHousingModel()
torch.save(model.state_dict(), 'Mymodel.pth')

In [227]:
# 모델 불러오기
model.load_state_dict(torch.load('Mymodel.pth'))

<All keys matched successfully>