In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

In [3]:
merged_data = pd.read_csv("data_file.csv")

In [9]:
# 데이터를 학습, 검증, 테스트 데이터셋으로 분할합니다.
scaler = StandardScaler()
data_scaled = scaler.fit_transform(merged_data.drop(columns=['연도', '일시', '측정소']))

# 데이터를 학습, 검증, 테스트 데이터셋으로 분할합니다.
train_data, test_data = train_test_split(data_scaled, test_size=0.2, random_state=42)
train_data, val_data = train_test_split(train_data, test_size=0.25, random_state=42)

In [10]:
class PM25Dataset(Dataset):
    def __init__(self, data, target_col_index):
        self.data = data
        self.target_col_index = target_col_index

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        x = np.delete(self.data[idx], self.target_col_index, axis=0)
        y = self.data[idx][self.target_col_index]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

# PM2.5 값을 포함하는 열의 인덱스를 설정합니다. 이 값은 데이터셋에 따라 변경됩니다.
target_col_index = 2

train_dataset = PM25Dataset(train_data, target_col_index)
val_dataset = PM25Dataset(val_data, target_col_index)
test_dataset = PM25Dataset(test_data, target_col_index)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [11]:
class PM25Predictor(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(PM25Predictor, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

input_size = train_data.shape[1] - 1
hidden_size = 64
output_size = 1

model = PM25Predictor(input_size, hidden_size, output_size)

In [12]:
def train_model(model, criterion, optimizer, train_loader, val_loader, n_epochs):
    for epoch in range(n_epochs):
        model.train()
        train_loss = 0.0
        for x, y in train_loader:
            optimizer.zero_grad()
            y_pred = model(x)
            loss = criterion(y_pred, y.unsqueeze(1))
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            train_loss /= len(train_loader)
        
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for x, y in val_loader:
                y_pred = model(x)
                loss = criterion(y_pred, y.unsqueeze(1))
                val_loss += loss.item()
            val_loss /= len(val_loader)
        
        print(f'Epoch {epoch+1}/{n_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

In [13]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
n_epochs = 50

In [14]:
train_model(model, criterion, optimizer, train_loader, val_loader, n_epochs)


Epoch 1/50, Train Loss: 0.0027, Val Loss: 0.7997
Epoch 2/50, Train Loss: 0.0014, Val Loss: 0.7793
Epoch 3/50, Train Loss: 0.0018, Val Loss: 0.7688
Epoch 4/50, Train Loss: 0.0008, Val Loss: 0.7599
Epoch 5/50, Train Loss: 0.0016, Val Loss: 0.7554
Epoch 6/50, Train Loss: 0.0010, Val Loss: 0.7577
Epoch 7/50, Train Loss: 0.0017, Val Loss: 0.7442
Epoch 8/50, Train Loss: 0.0014, Val Loss: 0.7394
Epoch 9/50, Train Loss: 0.0012, Val Loss: 0.7320
Epoch 10/50, Train Loss: 0.0024, Val Loss: 0.7293
Epoch 11/50, Train Loss: 0.0012, Val Loss: 0.7282
Epoch 12/50, Train Loss: 0.0011, Val Loss: 0.7209
Epoch 13/50, Train Loss: 0.0025, Val Loss: 0.7255
Epoch 14/50, Train Loss: 0.0006, Val Loss: 0.7155
Epoch 15/50, Train Loss: 0.0019, Val Loss: 0.7172
Epoch 16/50, Train Loss: 0.0017, Val Loss: 0.7138
Epoch 17/50, Train Loss: 0.0013, Val Loss: 0.7180
Epoch 18/50, Train Loss: 0.0011, Val Loss: 0.7105
Epoch 19/50, Train Loss: 0.0005, Val Loss: 0.7086
Epoch 20/50, Train Loss: 0.0010, Val Loss: 0.7113
Epoch 21/

In [15]:
model.eval()
test_loss = 0.0
with torch.no_grad():
    for x, y in test_loader:
        y_pred = model(x)
        loss = criterion(y_pred, y.unsqueeze(1))
        test_loss += loss.item()
    test_loss /= len(test_loader)

print(f'Test Loss: {test_loss:.4f}')

Test Loss: 0.7185
