# 1. 데이터 준비

In [55]:
import pandas as pd
import numpy as np

# 데이터 로드
data = pd.read_csv('../data/train/train.csv')

# 필요한 품목 필터링
target_products = ['배추', '무', '양파', '사과', '배', '건고추', '깐마늘', '감자', '대파', '상추']
filtered_data = data[data['품목명'].isin(target_products)]
filtered_data

Unnamed: 0,시점,품목명,품종명,거래단위,등급,평년 평균가격(원),평균가격(원)
0,201801상순,건고추,화건,30 kg,상품,381666.666667,590000.0
1,201801중순,건고추,화건,30 kg,상품,380809.666667,590000.0
2,201801하순,건고추,화건,30 kg,상품,380000.000000,590000.0
3,201802상순,건고추,화건,30 kg,상품,380000.000000,590000.0
4,201802중순,건고추,화건,30 kg,상품,376666.666667,590000.0
...,...,...,...,...,...,...,...
29371,202111중순,대파,대파(일반),10키로묶음,상,0.000000,0.0
29372,202111하순,대파,대파(일반),10키로묶음,상,0.000000,0.0
29373,202112상순,대파,대파(일반),10키로묶음,상,0.000000,0.0
29374,202112중순,대파,대파(일반),10키로묶음,상,0.000000,0.0


In [31]:
def todatetime(x) :
    c = ''
    if '상순' in x :
        c = x.replace('상순','01')
    elif '중순' in x:
        c = x.replace('중순', '10')
    elif '하순' in x:
        c = x.replace('하순', '20')
    return c

In [57]:
filtered_data['시점'] = filtered_data['시점'].apply(lambda x : todatetime(x))
filtered_data = filtered_data.loc[filtered_data['품목명'] == '건고추']
filtered_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['시점'] = filtered_data['시점'].apply(lambda x : todatetime(x))


Unnamed: 0,시점,품목명,품종명,거래단위,등급,평년 평균가격(원),평균가격(원)
0,20180101,건고추,화건,30 kg,상품,381666.666667,590000.0
1,20180110,건고추,화건,30 kg,상품,380809.666667,590000.0
2,20180120,건고추,화건,30 kg,상품,380000.000000,590000.0
3,20180201,건고추,화건,30 kg,상품,380000.000000,590000.0
4,20180210,건고추,화건,30 kg,상품,376666.666667,590000.0
...,...,...,...,...,...,...,...
1147,20211110,건고추,햇산양건,30 kg,중품,0.000000,0.0
1148,20211120,건고추,햇산양건,30 kg,중품,0.000000,0.0
1149,20211201,건고추,햇산양건,30 kg,중품,0.000000,0.0
1150,20211210,건고추,햇산양건,30 kg,중품,0.000000,0.0


In [61]:
# 필요 열 선택 및 피처 엔지니어링
features = filtered_data[['시점', '품종명', '등급']]
targets = filtered_data[['평년 평균가격(원)', '평균가격(원)']]

# 예시: 품목명과 등급을 원-핫 인코딩
features = pd.get_dummies(features, columns=['품종명', '등급'], dtype='float')
features

Unnamed: 0,시점,품종명_양건,품종명_햇산양건,품종명_햇산화건,품종명_화건,등급_상품,등급_중품
0,20180101,0.0,0.0,0.0,1.0,1.0,0.0
1,20180110,0.0,0.0,0.0,1.0,1.0,0.0
2,20180120,0.0,0.0,0.0,1.0,1.0,0.0
3,20180201,0.0,0.0,0.0,1.0,1.0,0.0
4,20180210,0.0,0.0,0.0,1.0,1.0,0.0
...,...,...,...,...,...,...,...
1147,20211110,0.0,1.0,0.0,0.0,0.0,1.0
1148,20211120,0.0,1.0,0.0,0.0,0.0,1.0
1149,20211201,0.0,1.0,0.0,0.0,0.0,1.0
1150,20211210,0.0,1.0,0.0,0.0,0.0,1.0


# 2. 데이터셋 분할

In [64]:
from sklearn.model_selection import train_test_split

# 특성과 타겟 분리
X = features.values
y = targets['평균가격(원)'].values

# 데이터 분할
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. 베이즈 신경망 모델 구현

In [67]:
import torch
import torch.nn as nn
import torch.optim as optim

class BayesianNN(nn.Module):
    def __init__(self, input_dim):
        super(BayesianNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)
        
        # 초기화
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# 4. 모델 학습

In [81]:
# 데이터 형식 변환 (필요한 경우)
X_train = X_train.astype(np.float32)
y_train = y_train.astype(np.float32)

# 데이터 텐서로 변환
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_val_tensor = torch.tensor(X_val.astype(np.float32), dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.astype(np.float32), dtype=torch.float32).view(-1, 1)

# 모델 초기화
model = BayesianNN(input_dim=X_train.shape[1])
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 학습
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 224855457792.0000
Epoch [20/100], Loss: 161838940160.0000
Epoch [30/100], Loss: 145825398784.0000
Epoch [40/100], Loss: 140704923648.0000
Epoch [50/100], Loss: 136504066048.0000
Epoch [60/100], Loss: 134770081792.0000
Epoch [70/100], Loss: 129108926464.0000
Epoch [80/100], Loss: 123717804032.0000
Epoch [90/100], Loss: 126287781888.0000
Epoch [100/100], Loss: 127403278336.0000


# 5. 예측 및 평가

In [77]:
model.eval()
with torch.no_grad():
    val_outputs = model(X_val_tensor)
    val_loss = criterion(val_outputs, y_val_tensor)
    print(f'Validation Loss: {val_loss.item():.4f}')

# 예측값 출력
predicted_prices = val_outputs.numpy()

Validation Loss: 164572577792.0000
