## Import Library

In [1]:
import os
import re
import torch
import glob
import multiprocessing
import random 
import numpy as np
import pandas as pd
import torch.nn as nn
from torch import Tensor 

from tqdm.notebook import tqdm
from types import SimpleNamespace
from typing import Callable, Optional

import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
device = torch.device("cpu")
import warnings 
warnings.filterwarnings('ignore')
device

device(type='cpu')

## 버전 확인

In [37]:
import platform
import psutil
from sklearn import __version__ as sklearn_version

# 라이브러리 버전 출력
print(f"os version: {os.name}")
print(f"PyTorch version: {torch.__version__}")
print(f"random module: built-in module, no version")
print(f"NumPy version: {np.__version__}")
print(f"Pandas version: {pd.__version__}")
print(f"Scikit-learn version: {sklearn_version}")

print("------------------------------------------------------------------------------")
# OS 정보
print(f"Operating System: {platform.system()} {platform.release()}")
# Python 버전
print(f"Python Version: {platform.python_version()}")
# CPU 정보
print(f"CPU: {platform.processor()}")
# 메모리 정보
mem_info = psutil.virtual_memory()
print(f"Total Memory: {mem_info.total / (1024 ** 3):.2f} GB")
print(f"Available Memory: {mem_info.available / (1024 ** 3):.2f} GB")
# GPU 정보 (CUDA 확인)
try:
    import torch
    print(f"CUDA Available: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        print(f"GPU Device Name: {torch.cuda.get_device_name(0)}")
except ImportError:
    print("PyTorch not installed, skipping GPU information.")

os version: posix
PyTorch version: 2.5.0+cu124
random module: built-in module, no version
NumPy version: 1.23.5
Pandas version: 1.5.3
Scikit-learn version: 1.1.3
------------------------------------------------------------------------------
Operating System: Linux 6.8.0-47-generic
Python Version: 3.10.12
CPU: x86_64
Total Memory: 31.18 GB
Available Memory: 19.30 GB
CUDA Available: True
GPU Device Name: NVIDIA GeForce RTX 4070


## 기본 설정 및 함수

In [38]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
seed_everything(42) 

def normalize_data(data):
    scaler = StandardScaler()
    normalized_data = scaler.fit_transform(data)
    return normalized_data, scaler

def inverse_normalize(data, scaler):
    return scaler.inverse_transform(data)

def interpolate_zeros(df, column):
    col = df[column]
    # 값이 0인 위치를 찾음
    zeros = col == 0
    # 0인 값들을 NaN으로 대체
    col[zeros] = np.nan
    # 보간 수행
    col.interpolate(method='linear', inplace=True, limit_direction='both')
    # 결과를 데이터프레임에 반영
    df[column] = col
    
품목_리스트 = ["감자 수미", "무", "양파", "배추", "대파(일반)", "건고추", "깐마늘(국산)", "사과", "상추", "배"]
group1 = ['배추', '무', '양파', '감자 수미', '대파(일반)']
group2 = ['건고추', '깐마늘(국산)']
group3 = ['상추', '사과', '배']

item_columns = {
    "감자 수미": ["YYYYMMSOON", "평균가격(원)"],
    "무": [ "YYYYMMSOON","평균가격(원)"],
    "양파": ["YYYYMMSOON","평균가격(원)"],

    "배추": ["YYYYMMSOON", "평균가격(원)"],
    "대파(일반)": ["YYYYMMSOON", "평균가격(원)"],

    "건고추": ["YYYYMMSOON", "평균가격(원)"],
    "깐마늘(국산)": [ "YYYYMMSOON","평균가격(원)"],
    "사과": ["YYYYMMSOON" , "평균가격(원)"],
    
    "상추": ["YYYYMMSOON","평균가격(원)"],

    "배": [ "YYYYMMSOON","평균가격(원)"]
}


# 사용 예시
selected_dome = ['감자_수미_100000', '대파_대파(일반)_100000', '마늘_깐마늘_100000', 
                 '무_기타무_100000', '배_신고_100000', '배추_기타배추_100000', 
                 '상추_포기찹_100000']
dome_items = ['감자 수미', '대파(일반)', '깐마늘(국산)', '무', '배', '배추', '상추']
dome_cols = [
    '감자_수미_100000_경매 건수', '마늘_깐마늘_100000_총반입량(kg)', '대파_대파(일반)_100000_총반입량(kg)', 
    '배추_기타배추_100000_총반입량(kg)', '상추_포기찹_100000_경매 건수', '상추_포기찹_100000_고가(20%) 평균가', 
    '배_신고_100000_고가(20%) 평균가', '무_기타무_100000_평균가(원/kg)'
]

## 모델 

In [39]:
class Data(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __len__(self):
        return len(self.Y)
    
    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]
    
def reshape_data(df):
    time_series_data = []
    for idx, row in df.iterrows():
        sales_data = row.values.astype(float)
        time_series_data.append(sales_data)
    return np.array(time_series_data)
def time_slide_df(data, window_size, forecast_size):
    data_list = []
    dap_list = []
    for idx in range(0, len(data) - window_size - forecast_size + 1):
        x = data[idx:idx + window_size].reshape(window_size, -1)  
        y = data[idx + window_size:idx + window_size + forecast_size]
        data_list.append(x)
        dap_list.append(y)
    return np.array(data_list, dtype='float32'), np.array(dap_list, dtype='float32')
def create_dataloader(data, window_size, forecast_size, batch_size):
    X, Y = time_slide_df(data, window_size, forecast_size)
    ds = Data(X, Y)
    return DataLoader(ds, batch_size=batch_size, shuffle=True)
class NMAELoss(nn.Module):
    def __init__(self):
        super(NMAELoss, self).__init__()
    
    def forward(self, y_pred, y_true):
        mae = torch.mean(torch.abs(y_pred - y_true))
        mean_true = torch.mean(torch.abs(y_true))
        nmae = mae / mean_true
        
        return nmae

def get_optimizer(model, config):
    if config.optimizer.lower() == 'adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)
    elif config.optimizer.lower() == 'adamw':
        optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)
    elif config.optimizer.lower() == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)
    elif config.optimizer.lower() == 'rmsprop':
        optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)
    else:
        raise ValueError(f"Unsupported optimizer type: {config.optimizer}")
    return optimizer

def get_scheduler(optimizer, config):
    if config.scheduler.lower() == 'step_lr':
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=config.step_size, gamma=config.gamma)
    elif config.scheduler.lower() == 'reduce_on_plateau':
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=config.patience, factor=config.gamma)
    elif config.scheduler.lower() == 'none':
        scheduler = None
    else:
        raise ValueError(f"Unsupported scheduler type: {config.scheduler}")
    return scheduler

In [40]:
class RevIN(nn.Module):
    def __init__(self, num_features: int, eps=1e-5, affine=True, subtract_last=False):
        """
        :param num_features: the number of features or channels
        :param eps: a value added for numerical stability
        :param affine: if True, RevIN has learnable affine parameters
        """
        super(RevIN, self).__init__()
        self.num_features = num_features
        self.eps = eps
        self.affine = affine
        self.subtract_last = subtract_last
        if self.affine:
            self._init_params()

    def forward(self, x, mode:str):
        if mode == 'norm':
            self._get_statistics(x)
            x = self._normalize(x)
        elif mode == 'denorm':
            x = self._denormalize(x)
        else: raise NotImplementedError
        return x

    def _init_params(self):
        self.affine_weight = nn.Parameter(torch.ones(self.num_features))
        self.affine_bias = nn.Parameter(torch.zeros(self.num_features))

    def _get_statistics(self, x):
        dim2reduce = tuple(range(1, x.ndim-1))
        if self.subtract_last:
            self.last = x[:,-1,:].unsqueeze(1)
        else:
            self.mean = torch.mean(x, dim=dim2reduce, keepdim=True).detach()
        self.stdev = torch.sqrt(torch.var(x, dim=dim2reduce, keepdim=True, unbiased=False) + self.eps).detach()

    def _normalize(self, x):
        if self.subtract_last:
            x = x - self.last
        else:
            x = x - self.mean
        x = x / self.stdev
        if self.affine:
            x = x * self.affine_weight
            x = x + self.affine_bias
        return x

    def _denormalize(self, x):
        if self.affine:
            x = x - self.affine_bias
            x = x / (self.affine_weight + self.eps*self.eps)
        x = x * self.stdev
        if self.subtract_last:
            x = x + self.last
        else:
            x = x + self.mean
        return x
    
 
 
class moving_avg(nn.Module):
    """
    Moving average block to highlight the trend of time series
    """
    def __init__(self, kernel_size, stride):
        super(moving_avg, self).__init__()
        self.kernel_size = kernel_size
        self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)

    def forward(self, x):
        # padding on the both ends of time series
        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        x = torch.cat([front, x, end], dim=1)
        x = self.avg(x.permute(0, 2, 1))
        x = x.permute(0, 2, 1)
        return x
class momentum(nn.Module):
    def __init__(self, window_size):
        super(momentum, self).__init__()
        self.window_size = window_size

    def forward(self, x):
        # x: [Batch, Seq_len, Channels]
        momentum = x[:, self.window_size:, :] - x[:, :-self.window_size, :]
        padding = torch.zeros(x.size(0), self.window_size, x.size(2)).to(x.device)
        momentum = torch.cat([padding, momentum], dim=1)
        return momentum

class series_decomp2(nn.Module):
    def __init__(self, kernel_size, momentum_window):
        super(series_decomp2, self).__init__()
        self.moving_avg = moving_avg(kernel_size, stride=1)
        self.momentum = momentum(momentum_window)

    def forward(self, x):
        moving_mean = self.moving_avg(x)
        res = x - moving_mean
        momentums =self.momentum(x) 
        return res, moving_mean , momentums
class series_decomp(nn.Module):
    """
    Series decomposition block
    """
    def __init__(self, kernel_size):
        super(series_decomp, self).__init__()
        self.moving_avg = moving_avg(kernel_size, stride=1)

    def forward(self, x):
        moving_mean = self.moving_avg(x)
        res = x - moving_mean
        return res, moving_mean

#DlinwithAttn 
class ModelWithMultiheadAttention(nn.Module):
    def __init__(self, configs):
        super(ModelWithMultiheadAttention, self).__init__()
        self.seq_len = configs.window_size
        self.pred_len = configs.forecast_size
        self.n_heads = configs.n_heads
        self.channels = configs.feature_size
        self.kernel_size =configs.kernel_size
        self.momentum_window =configs.momentum_window
        self.individual = configs.individual
        
        
        self.decomposition = series_decomp2(self.kernel_size , self.momentum_window)
        # Multihead Attention 레이어
        self.multihead_attn = nn.MultiheadAttention(embed_dim=self.seq_len, num_heads=self.n_heads, batch_first=True)

        if configs.individual:
            self.Linear_Seasonal = nn.ModuleList()
            self.Linear_Trend = nn.ModuleList()
            for i in range(self.channels):
                self.Linear_Seasonal.append(nn.Linear(self.seq_len, self.pred_len))
                self.Linear_Trend.append(nn.Linear(self.seq_len, self.pred_len))
        else:
            self.Linear_Seasonal = nn.Linear(self.seq_len, self.pred_len)
            self.Linear_Trend = nn.Linear(self.seq_len, self.pred_len)

    def forward(self, x):
        seasonal_init, trend_init, momentum_init = self.decomposition(x)
        # 계절성, 트렌드성 추출하기 
        seasonal_init, trend_init,momentum_init = seasonal_init.permute(0, 2, 1), trend_init.permute(0, 2, 1) ,momentum_init.permute(0,2,1) 
        combined_features = trend_init + seasonal_init + momentum_init
        attn_output, _ = self.multihead_attn(query=trend_init, key=momentum_init, value=seasonal_init)
        

        if self.individual:
            seasonal_output = torch.zeros([attn_output.size(0), self.channels, self.pred_len], dtype=attn_output.dtype).to(attn_output.device)
            trend_output = torch.zeros([attn_output.size(0), self.channels, self.pred_len], dtype=attn_output.dtype).to(attn_output.device)
            
            for i in range(self.channels):
                seasonal_output[:, i, :] = self.Linear_Seasonal[i](attn_output[:, i, :])
                trend_output[:, i, :] = self.Linear_Trend[i](trend_init[:, i, :])
        else:
            seasonal_output = self.Linear_Seasonal(attn_output)
            trend_output = self.Linear_Trend(trend_init)

        x = seasonal_output + trend_output
        return x.permute(0, 2, 1)  # [Batch, Output length, Channel]로 변환

# Linear
class LinModel(nn.Module):
    """
    Just one Linear layer
    """
    def __init__(self, configs):
        super(LinModel, self).__init__()
        self.seq_len = configs.window_size
        self.pred_len = configs.forecast_size
        self.channels = configs.feature_size
        self.individual = configs.individual
        if self.individual:
            self.Linear = nn.ModuleList()
            for i in range(self.channels):
                self.Linear.append(nn.Linear(self.seq_len,self.pred_len))
        else:
            self.Linear = nn.Linear(self.seq_len, self.pred_len)

    def forward(self, x):
        # x: [Batch, Input length, Channel]
        if self.individual:
            output = torch.zeros([x.size(0),self.pred_len,x.size(2)],dtype=x.dtype).to(x.device)
            for i in range(self.channels):
                output[:,:,i] = self.Linear[i](x[:,:,i])
            x = output
        else:
            x = self.Linear(x.permute(0,2,1)).permute(0,2,1)
        return x # [Batch, Output length, Channel]

# Dlinear
class LTSF_DLinear(torch.nn.Module):
    def __init__(self,config):
        super(LTSF_DLinear, self).__init__()
        self.window_size = config.window_size
        self.forecast_size = config.forecast_size
        self.decomposition = series_decomp(config.kernel_size)
        self.individual = config.individual
        self.channels = config.feature_size
        if self.individual:
            self.Linear_Seasonal = torch.nn.ModuleList()
            self.Linear_Trend = torch.nn.ModuleList()
            for i in range(self.channels):
                self.Linear_Trend.append(torch.nn.Linear(self.window_size, self.forecast_size))
                self.Linear_Trend[i].weight = torch.nn.Parameter((1 / self.window_size) * torch.ones([self.forecast_size, self.window_size]))
                self.Linear_Seasonal.append(torch.nn.Linear(self.window_size, self.forecast_size))
                self.Linear_Seasonal[i].weight = torch.nn.Parameter((1 / self.window_size) * torch.ones([self.forecast_size, self.window_size]))
        else:
            self.Linear_Trend = torch.nn.Linear(self.window_size, self.forecast_size)
            self.Linear_Trend.weight = torch.nn.Parameter((1 / self.window_size) * torch.ones([self.forecast_size, self.window_size]))
            self.Linear_Seasonal = torch.nn.Linear(self.window_size, self.forecast_size)
            self.Linear_Seasonal.weight = torch.nn.Parameter((1 / self.window_size) * torch.ones([self.forecast_size, self.window_size]))

    def forward(self, x):
        trend_init, seasonal_init = self.decomposition(x)
        trend_init, seasonal_init = trend_init.permute(0, 2, 1), seasonal_init.permute(0, 2, 1)
        if self.individual:
            trend_output = torch.zeros([trend_init.size(0), trend_init.size(1), self.forecast_size], dtype=trend_init.dtype).to(trend_init.device)
            seasonal_output = torch.zeros([seasonal_init.size(0), seasonal_init.size(1), self.forecast_size], dtype=seasonal_init.dtype).to(seasonal_init.device)
            for idx in range(self.channels):
                trend_output[:, idx, :] = self.Linear_Trend[idx](trend_init[:, idx, :])
                seasonal_output[:, idx, :] = self.Linear_Seasonal[idx](seasonal_init[:, idx, :])
        else:
            trend_output = self.Linear_Trend(trend_init)
            seasonal_output = self.Linear_Seasonal(seasonal_init)
        x = seasonal_output + trend_output
        return x.permute(0, 2, 1)

       

## 확정거래물량 (사과,배 )

In [41]:
def get_deal_info():
    # 사과 데이터 로드 및 전처리
    사과_deal_info = pd.read_csv('../data/extradata/사과_확정거래물량.csv', encoding='cp949')
    사과_deal_info = 사과_deal_info[사과_deal_info['품목명'] == '후지']
    사과_deal_info = 사과_deal_info.rename(columns={col: f'사과_{col}' for col in 사과_deal_info.columns if col not in ['거래일자', '품목명']})
    사과_deal_info = 사과_deal_info[['거래일자', '사과_금액', '사과_평년 반입량 증감률(%)']]

    # 배 데이터 로드 및 전처리
    배_deal_info = pd.read_csv('../data/extradata/배_확정거래물량.csv', encoding='cp949')
    배_deal_info = 배_deal_info.rename(columns={col: f'배_{col}' for col in 배_deal_info.columns if col not in ['거래일자', '품목명']})
    배_deal_info = 배_deal_info[['거래일자', '배_반입량']]

    # 날짜 형식 변환 함수 정의
    def format_date(row):
        year = row[:4]  # 연도 (예: '2023')
        month = row[5:7]  # 월 (예: '01')
        period = row[8]  # 주기 ('상', '중', '하')
        return f"{year}{month}{period}순"

    # 날짜 형식 변환 적용
    사과_deal_info['거래일자'] = 사과_deal_info['거래일자'].apply(format_date)
    배_deal_info['거래일자'] = 배_deal_info['거래일자'].apply(format_date)

    # 거래일자 기준으로 병합하여 하나의 데이터프레임으로 결합
    combined_deal_info = pd.merge(사과_deal_info, 배_deal_info, on='거래일자', how='outer')

    return combined_deal_info

# 결과 호출
deal_info = get_deal_info()
deal_info


Unnamed: 0,거래일자,사과_금액,사과_평년 반입량 증감률(%),배_반입량
0,202409하순,1.930600e+07,-42.2,323835
1,202409중순,5.381000e+06,-77.5,1085421
2,202409상순,3.036950e+07,-41.0,3539181
3,202408하순,3.937600e+07,-48.7,382044
4,202408중순,2.617950e+07,-68.9,4320
...,...,...,...,...
238,201802중순,7.550720e+08,-55.0,900271
239,201802상순,6.665168e+09,132.4,4506319
240,201801하순,1.818520e+09,-44.4,1428041
241,201801중순,1.388682e+09,-55.6,937063


## 공판장(test_jointmarket)

In [42]:
def jointmarket_filter(df):
    # 필요한 컬럼만 읽어서 메모리 사용 최적화
    df = df[['공판장코드', '품목명', '품종명', '등급코드', '공판장명', 'YYYYMMSOON', '경매 건수', '총반입량(kg)']]

    mask = (        ((df['품목명'] == '대파') & (df['품종명'] == '대파(일반)') & (df['등급코드'] == 11) & (df['공판장명'] == '*전국농협공판장')) |
        ((df['품목명'] == '무') & (df['품종명'] == '기타무') & (df['등급코드'] == 11) & (df['공판장명'] == '*전국농협공판장'))
    )
    df = df[mask]
    df['item'] = df['공판장코드'].astype(str) + '_' + df['품목명'] + '_' + df['품종명'] + '_' + df['등급코드'].astype(str)
    df = df[['item', 'YYYYMMSOON', '경매 건수', '총반입량(kg)']]
    # 피벗 테이블 생성
    df_pivot = df.pivot_table(index='YYYYMMSOON', columns='item', values=['경매 건수', '총반입량(kg)'], aggfunc='sum')
    df_pivot.columns = [f"{col[1]}_{col[0]}" for col in df_pivot.columns]
    df_pivot = df_pivot.reset_index()
    df_filtered = df_pivot.loc[:, df_pivot.notna().all() & (df_pivot != 0).all()]
    
    return df_filtered

def add_jointmarket_info(df, item):
    # item 단어가 포함된 열을 필터링하여 새로운 DataFrame 생성
    df = df[[col for col in df.columns if item in col]]
    return df

def load_test_jointmarket():
    all_data = []
    for i in range(52):
        file_path = f'../data/test/meta/TEST_경락정보_산지공판장_{i:02d}.csv'
        one_test_jointmarket = pd.read_csv(file_path)
        filtered_data = jointmarket_filter(one_test_jointmarket)
        all_data.append(filtered_data)
    test_jointmarket = pd.concat(all_data, axis=0, ignore_index=True)

    test_jointmarket = test_jointmarket.drop_duplicates()
    test_jointmarket = test_jointmarket.reset_index(drop=True)
    return test_jointmarket
test_jointmarket =load_test_jointmarket() 


## 공판장(train_jointmarket)

In [43]:
train_jointmarket = pd.read_csv('../data/train/meta/TRAIN_경락정보_산지공판장_2018-2022.csv')
train_jointmarket =jointmarket_filter(train_jointmarket)

train_jointmarket


Unnamed: 0,YYYYMMSOON,1000000000_대파_대파(일반)_11_경매 건수,1000000000_무_기타무_11_경매 건수,1000000000_대파_대파(일반)_11_총반입량(kg),1000000000_무_기타무_11_총반입량(kg)
0,201801상순,8,8,57187.00,194800.00
1,201801중순,9,9,49875.00,142304.00
2,201801하순,9,9,69948.00,202378.00
3,201802상순,9,9,67499.00,149490.00
4,201802중순,6,5,56272.00,122969.00
...,...,...,...,...,...
175,202211중순,10,9,56011.00,69003.00
176,202211하순,10,10,54466.05,108812.60
177,202212상순,10,10,43784.50,77193.00
178,202212중순,8,7,41034.00,181776.02


## 전국 도매 데이터 (train_dome,test_dome)

In [44]:
# 전국 도매 데이터 로드 하기 
selected_dome = ['감자_수미_100000', '대파_대파(일반)_100000', '마늘_깐마늘_100000', 
                 '무_기타무_100000', '배_신고_100000', '배추_기타배추_100000', 
                 '상추_포기찹_100000']
dome_items = ['감자 수미', '대파(일반)', '깐마늘(국산)', '무', '배', '배추', '상추']
dome_cols = [
    '감자_수미_100000_경매 건수', '마늘_깐마늘_100000_총반입량(kg)', '대파_대파(일반)_100000_총반입량(kg)', 
    '배추_기타배추_100000_총반입량(kg)', '상추_포기찹_100000_경매 건수', '상추_포기찹_100000_고가(20%) 평균가', 
    '배_신고_100000_고가(20%) 평균가', '무_기타무_100000_평균가(원/kg)'
]
# 전국 도매 정보 불러오기 및 처리 함수
def get_dome_data(df, selected_dome, final_cols):
    # '품목_품종_시장코드' 컬럼 생성 및 필터링
    df['품목_품종_시장코드'] = df['품목명'].replace({'깐마늘(국산)': '마늘', '대파(일반)': '대파', '감자 수미': '감자'}) + '_' + df['품종명'] + '_' + df['시장코드'].astype(str)
    df_filtered = df[df['품목_품종_시장코드'].isin(selected_dome)][['YYYYMMSOON', '품목_품종_시장코드', '총반입량(kg)', '총거래금액(원)', '평균가(원/kg)', '고가(20%) 평균가', '경매 건수']]
    
    # 피벗 테이블 생성
    df_pivot = df_filtered.pivot_table(index='YYYYMMSOON', columns='품목_품종_시장코드', values=['총반입량(kg)', '총거래금액(원)', '평균가(원/kg)', '고가(20%) 평균가', '경매 건수'], aggfunc='sum')
    df_pivot.columns = [f'{col[1]}_{col[0]}' for col in df_pivot.columns]
    df_pivot = df_pivot.reset_index()

    # 필요한 컬럼만 유지하여 반환
    return df_pivot[['YYYYMMSOON'] + [col for col in final_cols if col in df_pivot.columns]]

# 전국 도매 데이터 로드
nation_dome_info = pd.read_csv('../data/train/meta/TRAIN_경락정보_전국도매_2018-2022.csv')[['YYYYMMSOON', '시장코드', '품목명', '품종명', '총반입량(kg)', '총거래금액(원)', '평균가(원/kg)', '고가(20%) 평균가', '경매 건수']]
train_dome = get_dome_data(nation_dome_info, selected_dome, dome_cols)

# 모든 테스트 데이터 로드 함수
def load_all_test_dome(selected_dome, final_cols):
    test_files = glob.glob('../data/test/meta/TEST_경락정보_전국도매_*.csv')
    all_test_data = pd.concat([get_dome_data(pd.read_csv(file), selected_dome, final_cols) for file in test_files], ignore_index=True)
    return all_test_data.drop_duplicates()

# 모든 테스트 데이터 결합
test_dome = load_all_test_dome(selected_dome, dome_cols)


## Functions 

In [45]:
def process_data(raw_file, meta_file, 품목명, scaler=None):
    raw_data = pd.read_csv(raw_file)
    meta_data = pd.read_csv(meta_file)

    if '품목명' not in raw_data.columns:
            if '품목(품종)명' in raw_data.columns:
                raw_data['품목명'] = raw_data['품목(품종)명']
    raw_품목 = raw_data[raw_data['품목명'] == 품목명]

    meta_conditions = {
         '감자 수미': lambda x: (x['품목(품종)명'] == '감자 수미') & (x['등급(특 5% 상 35% 중 40% 하 20%)'] == '특'), 
        '무': lambda x: (x['품목(품종)명'] == '무') &(x['거래단위'] == '20키로상자아아'),
        '양파': lambda x: (x['품목(품종)명'] == '양파')& (x['등급(특 5% 상 35% 중 40% 하 20%)'] == '상') & (x['거래단위'] == '12키로'),
        '배추': lambda x: (x['품목(품종)명'] == '알배기배추')& (x['등급(특 5% 상 35% 중 40% 하 20%)'] == '상'),
        '대파(일반)': lambda x: (x['품목(품종)명'] == '대파(일반이이)')|((x['품목(품종)명'] == '쪽파')&(x['등급(특 5% 상 35% 중 40% 하 20%)'] == '상')) ,
        '건고추': lambda x: (x['품목명'] == '건고추')&(x['품종명']=='화건')&(x['등급명'] =='중품') ,
        '깐마늘(국산)': lambda x: (x['품목명'] == '깐마늘(국산산)') ,
        '상추': lambda x: (x['품목명'] == '상추') & (x['품종명'] == '청') & (x['등급명'] == '중품') ,
        '사과': lambda x: (x['품목명'] == '사과아') ,
        '배': lambda x: (x['품목명'] == '배')& (x['품종명'] == '신고오') ,    }

    filtered_meta = meta_data[meta_conditions[품목명](meta_data)].copy()
    # 필요한 열 조합 생성
    if 품목명 in['감자 수미', '무', '양파','배추','대파(일반)']: 
        filtered_meta['품목명_거래단위_등급'] = filtered_meta['품목(품종)명'] + '_' + filtered_meta['거래단위'] + '_' + filtered_meta['등급(특 5% 상 35% 중 40% 하 20%)']
    else :
        filtered_meta['품목명_거래단위_등급'] = filtered_meta['품목명'] + '_' + filtered_meta['품종명'] + '_' + filtered_meta['등급명'] + '_' + filtered_meta['유통단계별 단위 '].astype(str)

    # 필요한 열만 선택
    columns_to_keep = ['YYYYMMSOON', '품목명_거래단위_등급', '평균가격(원)',  '평년 평균가격(원) Common Year SOON']
    filtered_meta = filtered_meta[columns_to_keep]
    filtered_meta_pivot = filtered_meta.pivot_table(
        index='YYYYMMSOON',
        columns='품목명_거래단위_등급',
        values=['평균가격(원)', '평년 평균가격(원) Common Year SOON']    )
    filtered_meta_pivot.columns = ['_'.join(col).strip() for col in filtered_meta_pivot.columns.values]
    filtered_meta_pivot.reset_index(inplace=True)
    train_data = pd.merge(raw_품목, filtered_meta_pivot, on='YYYYMMSOON', how='left')
    return train_data 

## 다변량 변수들 

In [46]:
fin_cols1 = {
    '감자 수미': ['평균가격(원)_감자 수미_20키로상자_특', '감자_수미_100000_경매 건수'],
    '건고추': ['평균가격(원)_건고추_화건_중품_30'],
    '깐마늘(국산)': ['마늘_깐마늘_100000_총반입량(kg)'],
    '대파(일반)': ['평균가격(원)_쪽파_10키로상자_상', '1000000000_대파_대파(일반)_11_총반입량(kg)'],
    '무': ['무_기타무_100000_평균가(원/kg)', '1000000000_무_기타무_11_총반입량(kg)'],
    '배추': ['평년 평균가격(원) Common Year SOON', '평균가격(원)_알배기배추_8키로상자_상'],
    '사과': ['평년 평균가격(원) Common Year SOON', '사과_금액', '사과_평년 반입량 증감률(%)'],
    '상추': ['상추_포기찹_100000_경매 건수', '상추_포기찹_100000_고가(20%) 평균가'],
    '양파': ['평균가격(원)_양파_12키로_상'],
    '배': ['배_신고_100000_고가(20%) 평균가', '배_반입량']
}


fin_cols2 = {
    '감자 수미': ['평균가격-평년가격', '평균가격(원)_감자 수미_20키로상자_특', '감자_수미_100000_경매 건수'],
    '건고추': ['평균가격-평년가격', '평균가격(원)_건고추_화건_중품_30'],
    '깐마늘(국산)': ['마늘_깐마늘_100000_총반입량(kg)'],
    '대파(일반)': ['평균가격-평년가격', '평균가격(원)_쪽파_10키로상자_상', '대파_대파(일반)_100000_총반입량(kg)'],
    '배': ['평균가격-평년가격', '배_신고_100000_고가(20%) 평균가'],
    '배추': ['평균가격-평년가격', '평년 평균가격(원) Common Year SOON', '평균가격(원)_알배기배추_8키로상자_상', '배추_기타배추_100000_총반입량(kg)'],
    '사과': ['평균가격-평년가격', '평년 평균가격(원) Common Year SOON', '사과_금액', '사과_평년 반입량 증감률(%)'],
    '상추': ['평균가격-평년가격', '상추_포기찹_100000_경매 건수'],
    '양파': ['평균가격-평년가격', '평균가격(원)_양파_12키로_상'],
    '무': ['무_기타무_100000_평균가(원/kg)', '1000000000_무_기타무_11_경매 건수', '1000000000_무_기타무_11_총반입량(kg)']
}


## Config 

* 단기 시계열 : {item}_config1 
* 장기 시계열 : {item}_config2 

### 단기 모델 config

In [47]:
class potato_config1:
    def __init__(self):
        self.seed = 258
        self.learning_rate = 0.001
        self.epoch = 71
        self.batch_size = 16
        self.optimizer = 'adam'
        self.weight_decay = 1e-8
        self.scheduler = 'reduce_on_plateau'
        self.patience = 6
        self.step_size = 20      
        self.gamma = 0.5   
        
        self.model = 'D'
        self.window_size = 3
        self.fin_cols = ['평균가격(원)_감자 수미_20키로상자_특', '감자_수미_100000_경매 건수']
        self.forecast_size = 2
        self.kernel_size = 17
        self.individual = False
        self.feature_size = 3
        self.year = 2018
        
class garlic_config1:
    def __init__(self):
        self.seed = 97
        self.learning_rate = 0.0015
        self.epoch = 109
        self.batch_size = 8
        self.optimizer = 'rmsprop'
        self.weight_decay = 0
        self.scheduler = 'reduce_on_plateau'
        self.patience = 4
        self.step_size = 20      
        self.gamma = 0.5   
        
        self.model = 'D'
        self.window_size = 3
        self.fin_cols = ['마늘_깐마늘_100000_총반입량(kg)']
        self.forecast_size = 2
        self.kernel_size = 19
        self.individual = True
        self.feature_size = 2 
        self.year = 2019
        
class apple_config1:
    def __init__(self):
        self.seed = 319
        self.learning_rate = 0.0025
        self.epoch = 125
        self.batch_size = 16
        self.optimizer = 'adam'
        self.weight_decay = 0
        self.scheduler = 'none'
        self.patience = 3
        self.step_size = 20      
        self.gamma = 0.5   
        self.model = 'D'
        self.window_size = 3
        self.fin_cols = ['평년 평균가격(원) Common Year SOON', '사과_금액', '사과_평년 반입량 증감률(%)']
        self.forecast_size = 2
        self.kernel_size = 13
        self.individual = True
        self.feature_size = 4
        self.year = 2018

class lettuce_config1:
    def __init__(self):
        self.seed = 435
        self.learning_rate = 0.002
        self.epoch = 99
        self.batch_size = 16
        self.optimizer = 'rmsprop'
        self.weight_decay = 0
        self.scheduler = 'none'
        self.patience = 6
        self.step_size = 20      
        self.gamma = 0.5   
        self.model = 'D'
        self.window_size = 3
        self.fin_cols = ['상추_포기찹_100000_경매 건수', '상추_포기찹_100000_고가(20%) 평균가']
        self.forecast_size = 2
        self.kernel_size = 19
        self.individual = True
        self.feature_size = 3
        self.year = 2021

class pepper_config1:
    def __init__(self):
        self.seed = 81
        self.learning_rate = 0.002
        self.epoch = 115
        self.batch_size = 32
        self.optimizer = 'rmsprop'
        self.weight_decay = 1e-09
        self.scheduler = 'reduce_on_plateau'
        self.patience = 5
        self.step_size = 20      
        self.gamma = 0.5   
        
        self.fin_cols = ['평균가격(원)_건고추_화건_중품_30']
        self.window_size = 3
        self.forecast_size = 2
        self.kernel_size = 17
        self.individual = True
        self.feature_size = 2
        self.momentum_window = 2
        self.n_heads = 3
        self.year = 2020

class daepa_config1:
    def __init__(self):
        self.seed = 800
        self.learning_rate = 0.003
        self.epoch = 140
        self.batch_size = 8
        self.optimizer = 'adam'
        self.weight_decay = 1e-10
        self.scheduler = 'reduce_on_plateau'
        self.patience = 3
        self.step_size = 20      
        self.gamma = 0.5   
        self.fin_cols = ['평균가격(원)_쪽파_10키로상자_상', '1000000000_대파_대파(일반)_11_총반입량(kg)']
        self.window_size = 3
        self.forecast_size = 2
        self.kernel_size = 15
        self.individual = True
        self.feature_size = 3
        self.momentum_window = 1
        self.n_heads = 3
        self.year = 2020
        
class moo_config1:
    def __init__(self):
        self.seed = 2551
        self.learning_rate = 0.003
        self.epoch = 81
        self.batch_size = 8
        self.optimizer = 'adamw'
        self.weight_decay = 0
        self.scheduler = 'none'
        self.patience = 5
        self.step_size = 20      
        self.gamma = 0.5   
        self.fin_cols = ['무_기타무_100000_평균가(원/kg)', '1000000000_무_기타무_11_총반입량(kg)']
        self.window_size = 3
        self.forecast_size = 2
        self.kernel_size = 15
        self.individual = True
        self.feature_size = 3
        self.momentum_window = 2
        self.n_heads = 1
        self.year = 2018
        

class cabbage_config1:
    def __init__(self):
        self.seed = 318
        self.learning_rate = 0.0009
        self.epoch = 72
        self.batch_size = 16
        self.optimizer = 'rmsprop'
        self.weight_decay = 1e-8
        self.scheduler = 'none'
        self.patience = 3
        self.step_size = 20      
        self.gamma = 0.5   
        self.fin_cols = ['평년 평균가격(원) Common Year SOON', '평균가격(원)_알배기배추_8키로상자_상']
        self.window_size = 3
        self.forecast_size = 2
        self.kernel_size = 21
        self.individual = True
        self.feature_size = 3
        self.momentum_window = 3
        self.n_heads = 1
        self.year = 2019
        
class onion_config1:
    def __init__(self):
        self.seed = 321
        self.learning_rate = 0.0025
        self.epoch = 145
        self.batch_size = 32
        self.optimizer = 'adam'
        self.weight_decay = 0
        self.scheduler = 'none'
        self.patience = 3
        self.step_size = 20      
        self.gamma = 0.5   
        self.fin_cols = ['평균가격(원)_양파_12키로_상']
        self.window_size = 3
        self.forecast_size = 2
        self.kernel_size = 21
        self.individual = False
        self.feature_size = 2
        self.momentum_window = 1
        self.n_heads = 3
        self.year = 2020
        
class pear_config1:
    def __init__(self):
        self.seed = 2713
        self.learning_rate = 0.003
        self.epoch = 156
        self.batch_size = 16
        self.optimizer = 'adamw'
        self.weight_decay = 0
        self.scheduler = 'none'
        self.patience = 4
        self.step_size = 20      
        self.gamma = 0.5   
        self.fin_cols = ['배_신고_100000_고가(20%) 평균가', '배_반입량']
        self.window_size = 3
        self.forecast_size = 2
        self.kernel_size = 19
        self.individual = False
        self.feature_size = 3
        self.momentum_window = 1
        self.n_heads = 3
        self.year = 2019

### 장기 모델 config

In [48]:
class potato_config2:
    def __init__(self):
        self.seed = 199
        self.learning_rate = 0.003
        self.epoch = 90
        self.patience = 4
        self.batch_size = 16
        self.optimizer = 'rmsprop'
        self.weight_decay = 0
        self.scheduler = 'none'
        
        self.model = 'L'
        self.window_size = 9
        self.fin_cols = ['평균가격-평년가격', '평균가격(원)_감자 수미_20키로상자_특', '감자_수미_100000_경매 건수']
        self.forecast_size = 3
        self.kernel_size = 15
        self.individual = False
        self.feature_size = len(self.fin_cols) + 1  # 주요 특징의 개수에 따라 조정
        self.year = 2018
        
class garlic_config2:
    def __init__(self):
        self.seed = 25
        self.learning_rate = 0.002
        self.epoch = 113
        self.patience = 5
        self.batch_size = 8
        self.optimizer = 'adam'
        self.weight_decay = 1e-09
        self.scheduler = 'none'
        
        self.model = 'D'
        self.window_size = 9
        self.fin_cols = ['마늘_깐마늘_100000_총반입량(kg)']
        self.forecast_size = 3
        self.kernel_size = 21
        self.individual = True
        self.feature_size = len(self.fin_cols) + 1
        self.year = 2019

class apple_config2:
    def __init__(self):
        self.seed = 332
        self.learning_rate = 0.002
        self.epoch = 120
        self.patience = 6
        self.batch_size = 8
        self.optimizer = 'adam'
        self.weight_decay = 1e-08
        self.scheduler = 'none'
        
        self.model = 'D'
        self.window_size = 9
        self.fin_cols = ['평균가격-평년가격', '평년 평균가격(원) Common Year SOON', '사과_금액', '사과_평년 반입량 증감률(%)']
        self.forecast_size =3
        self.kernel_size = 13
        self.individual = False
        self.feature_size = len(self.fin_cols) + 1
        self.year = 2018

class lettuce_config2:
    def __init__(self):
        self.seed = 888
        self.learning_rate = 0.002
        self.epoch = 100
        self.patience = 6
        self.batch_size = 8
        self.optimizer = 'rmsprop'
        self.weight_decay = 1e-08
        self.scheduler = 'none'
        
        self.model = 'D'
        self.window_size = 9
        self.fin_cols = ['평균가격-평년가격', '상추_포기찹_100000_경매 건수']
        self.forecast_size = 3
        self.kernel_size = 21
        self.individual = True
        self.feature_size = len(self.fin_cols) + 1
        self.year = 2021

class pepper_config2:
    def __init__(self):
        self.seed = 221
        self.learning_rate = 0.0015
        self.epoch = 79
        self.patience = 5
        self.batch_size = 32
        self.optimizer = 'adam'
        self.weight_decay = 1e-08
        self.scheduler = 'none'
        
        self.fin_cols = ['평균가격-평년가격', '평균가격(원)_건고추_화건_중품_30']
        self.window_size = 9
        self.forecast_size = 3
        self.kernel_size = 15
        self.individual = True
        self.feature_size = len(self.fin_cols) + 1
        self.momentum_window = 3
        self.n_heads = 1
        self.year = 2020
        
class daepa_config2:
    def __init__(self):
        self.seed = 6
        self.learning_rate = 0.001
        self.epoch = 112
        self.patience = 5
        self.batch_size = 8
        self.optimizer = 'rmsprop'
        self.weight_decay = 0
        self.scheduler = 'none'
        
        self.fin_cols = ['평균가격-평년가격', '평균가격(원)_쪽파_10키로상자_상', '대파_대파(일반)_100000_총반입량(kg)']
        self.window_size = 9
        self.forecast_size = 3
        self.kernel_size = 15
        self.individual = True
        self.feature_size = len(self.fin_cols) + 1
        self.momentum_window = 1
        self.n_heads = 1
        self.year = 2020

class moo_config2:
    def __init__(self):
        self.seed = 101
        self.learning_rate = 0.0025
        self.epoch = 97
        self.batch_size = 16
        self.optimizer = 'adamw'
        self.weight_decay = 1e-10
        self.scheduler = 'none'
        self.patience = 4 
        
        self.fin_cols = ['무_기타무_100000_평균가(원/kg)', '1000000000_무_기타무_11_경매 건수', '1000000000_무_기타무_11_총반입량(kg)']
        self.window_size = 9
        self.forecast_size = 3
        self.kernel_size = 19
        self.individual = False
        self.feature_size = len(self.fin_cols) + 1
        self.momentum_window = 3
        self.n_heads = 3
        self.year = 2018


class cabbage_config2:
    def __init__(self):
        self.seed = 268
        self.learning_rate = 0.0015
        self.epoch = 73
        self.batch_size = 16
        self.optimizer = 'adam'
        self.weight_decay = 0
        self.scheduler = 'none'
        
        self.fin_cols = ['평균가격-평년가격', '평년 평균가격(원) Common Year SOON', '평균가격(원)_알배기배추_8키로상자_상', '배추_기타배추_100000_총반입량(kg)']
        self.window_size = 9
        self.forecast_size = 3
        self.kernel_size = 21
        self.individual = True
        self.feature_size = len(self.fin_cols) + 1
        self.momentum_window = 3
        self.n_heads = 3
        self.year = 2019
        
        
class onion_config2:
    def __init__(self):
        self.seed = 3
        self.learning_rate = 0.0025
        self.epoch = 157
        self.batch_size = 32
        self.optimizer = 'adamw'
        self.weight_decay = 1e-09
        self.scheduler = 'none'
        
        self.fin_cols = ['평균가격-평년가격', '평균가격(원)_양파_12키로_상']
        self.window_size = 9
        self.forecast_size = 3
        self.kernel_size = 17
        self.individual = False
        self.feature_size = len(self.fin_cols) + 1
        self.momentum_window = 2
        self.n_heads = 3
        self.year = 2020


class pear_config2:
    def __init__(self):
        self.seed = 337
        self.learning_rate = 0.0015
        self.epoch = 134
        self.batch_size = 8
        self.patience = 7 
        self.optimizer = 'adam'
        self.weight_decay = 0
        self.step_size = 20      
        self.gamma = 0.5 
        self.scheduler = 'reduce_on_plateau'
        
        self.fin_cols = ['평균가격-평년가격', '배_신고_100000_고가(20%) 평균가']
        self.window_size = 9
        self.forecast_size = 3
        self.kernel_size = 21
        self.individual = True
        self.feature_size = len(self.fin_cols) + 1
        self.momentum_window = 3
        self.n_heads = 3
        self.year = 2019
        
        

## 모델학습 함수

### 단기 시계열 학습

In [49]:
def train_dlin1(품목리스트 , config ):
    # 감자, 깐마늘(국산), 사과, 상추 => jointmarket 불필요
    seed_everything(config.seed) 
    
    for item in 품목리스트:
        config.feature_size = len(config.fin_cols) + 1
        config.window_size = 3 
        # 데이터 파일 및 메타 파일 설정
        if item in group1:
            train_file = "../data/train/train_1.csv"
            meta_file = "../data/train/meta/TRAIN_경락정보_가락도매_2018-2022.csv"
        elif item in group2:
            train_file = "../data/train/train_2.csv"
            meta_file = "../data/train/meta/TRAIN_중도매_2018-2022.csv"
        elif item in group3:
            train_file = "../data/train/train_2.csv"
            meta_file = "../data/train/meta/TRAIN_소매_2018-2022.csv"
        
        # 필요한 열만 추려서 데이터 로드 및 병합
        train_data = process_data(train_file, meta_file, item)
        fincols = ['YYYYMMSOON', '평균가격(원)'] + [col for col in config.fin_cols if col in train_data.columns]
        train_data = train_data[fincols]
        dome_cols = ['YYYYMMSOON'] + [col for col in config.fin_cols if col in train_dome.columns]
        train_dome_filtered = train_dome[dome_cols]
        train_data = pd.merge(train_data, train_dome_filtered, how='left', on='YYYYMMSOON')

        if item == '사과':
            deal_cols = ['거래일자'] + [col for col in config.fin_cols if col in deal_info.columns]
            deal_info_filtered = deal_info[deal_cols]
            train_data = pd.merge(train_data, deal_info_filtered, how='left', left_on='YYYYMMSOON', right_on='거래일자')
            
        final_columns = ['평균가격(원)'] + config.fin_cols
        train_data = train_data[final_columns]
        
        for col in train_data.columns:             interpolate_zeros(train_data, col) 
        
        
        offset = (config.year - 2018) * 36
        train_data = train_data.iloc[offset: (144 + (3 * 12)), :]
        train_data = train_data.fillna(0) 
        
        price_df = train_data.reset_index(drop=True)
        normalized_timedata, scaler = normalize_data(price_df) 
        train_dl = create_dataloader(normalized_timedata, config.window_size, config.forecast_size, config.batch_size)
        
        # dlinear만 사용
        model = LTSF_DLinear(config)
        model.to(device)
        optimizer = get_optimizer(model, config)
        scheduler = get_scheduler(optimizer, config)
        criterion = NMAELoss()
        for ep in range(1, config.epoch + 1):
            model.train()
            batch_losses = []
            for idx, (data_batch, target) in enumerate(train_dl):
                data_batch, target = data_batch.to(device), target.to(device)
                optimizer.zero_grad()
                output = model(data_batch)
                loss = criterion(output, target)
                loss.backward()
                optimizer.step()
                batch_losses.append(loss.item())
            avg_loss = np.mean(batch_losses)
            
        os.makedirs('dl_weights2', exist_ok=True)
        model_save_path = f'dl_weights2/{item}_model1_win3.pth'
        torch.save(model.state_dict(), model_save_path)
        print(f"Model for {item} saved at {model_save_path}")

    return scaler


In [50]:
def train_dlinAttn1(품목리스트,config):
    # 건고추, 대파, 무, 배추, 양파, 배
    seed_everything(config.seed) 
    for item in 품목리스트:
        config.feature_size = len(config.fin_cols) + 1
        
        if item in group1:
            train_file = "../data/train/train_1.csv"
            meta_file = "../data/train/meta/TRAIN_경락정보_가락도매_2018-2022.csv"
        elif item in group2:
            train_file = "../data/train/train_2.csv"
            meta_file = "../data/train/meta/TRAIN_중도매_2018-2022.csv"
        elif item in group3:
            train_file = "../data/train/train_2.csv"
            meta_file = "../data/train/meta/TRAIN_소매_2018-2022.csv"
        
        # 필요한 열만 추려서 데이터 로드 및 병합
        train_data = process_data(train_file, meta_file, item)
        # train_data에서 필요한 열 중 실제 존재하는 열만 선택
        fincols = ['YYYYMMSOON', '평균가격(원)'] + [col for col in config.fin_cols if col in train_data.columns]
        train_data = train_data[fincols]
        
        # train_dome에서도 필요한 열만 선택
        dome_cols = ['YYYYMMSOON'] + [col for col in config.fin_cols if col in train_dome.columns]
        train_dome_filtered = train_dome[dome_cols]
        train_data = pd.merge(train_data, train_dome_filtered, how='left', on='YYYYMMSOON')
        
        # 조건에 따라 추가적인 병합 처리
        if item in ['대파(일반)', '무']:
            joint_cols = ['YYYYMMSOON'] + [col for col in config.fin_cols if col in train_jointmarket.columns]
            train_jointmarket_filtered = train_jointmarket[joint_cols]
            train_data = pd.merge(train_data, train_jointmarket_filtered, how='left', on='YYYYMMSOON')
        elif item == '배':
            deal_cols = ['거래일자'] + [col for col in config.fin_cols if col in deal_info.columns]
            deal_info_filtered = deal_info[deal_cols]
            train_data = pd.merge(train_data, deal_info_filtered, how='left', left_on='YYYYMMSOON', right_on='거래일자')

        final_columns = ['평균가격(원)'] + config.fin_cols 
        train_data = train_data[final_columns]
        
        
        offset = (config.year - 2018) * 36
        train_data = train_data.iloc[offset: (144 + (3 * 12)), :]
        train_data = train_data.fillna(0) 
        for col in train_data.columns:
            interpolate_zeros(train_data, col)
        price_df = train_data.reset_index(drop=True)
        normalized_timedata, scaler = normalize_data(price_df) 
        train_dl = create_dataloader(normalized_timedata, config.window_size, config.forecast_size, config.batch_size)
        
        model = ModelWithMultiheadAttention(config)
        model.to(device)
        optimizer = get_optimizer(model, config)
        scheduler = get_scheduler(optimizer, config)
        criterion = NMAELoss()
        
        for ep in range(1, config.epoch + 1):
            model.train()
            batch_losses = []
            for idx, (data_batch, target) in enumerate(train_dl):
                data_batch, target = data_batch.to(device), target.to(device)
                optimizer.zero_grad()
                output = model(data_batch)
                loss = criterion(output, target)
                loss.backward()
                optimizer.step()
                batch_losses.append(loss.item())

            if ep % 5 == 0:                avg_loss = np.mean(batch_losses)
            
        os.makedirs('dl_weights2', exist_ok=True)
        model_save_path = f'dl_weights2/{item}_model1_win3.pth'
        torch.save(model.state_dict(), model_save_path)
        print(f"Model for {item} saved at {model_save_path}")
    
    return scaler


### 장기 시계열 학습

In [51]:
def train_dlin2(품목리스트,config):
    # 감자, 깐마늘(국산), 사과, 상추 => jointmarket 불필요
    # 데이터 선형보간 해야해 
    seed_everything(config.seed) 
    for item in 품목리스트:
        config.feature_size = len(config.fin_cols) + 1
        
        
        # 데이터 파일 및 메타 파일 설정
        if item in group1:
            train_file = "../data/train/train_1.csv"
            meta_file = "../data/train/meta/TRAIN_경락정보_가락도매_2018-2022.csv"
        elif item in group2:
            train_file = "../data/train/train_2.csv"
            meta_file = "../data/train/meta/TRAIN_중도매_2018-2022.csv"
        elif item in group3:
            train_file = "../data/train/train_2.csv"
            meta_file = "../data/train/meta/TRAIN_소매_2018-2022.csv"
        
        # 필요한 열만 추려서 데이터 로드 및 병합
        train_data = process_data(train_file, meta_file, item)
        if item!='깐마늘(국산)':
            interpolate_zeros(train_data,'평년 평균가격(원) Common Year SOON')
            train_data['평균가격-평년가격'] = train_data['평균가격(원)']-train_data['평년 평균가격(원) Common Year SOON']
    
    
        fincols = ['YYYYMMSOON', '평균가격(원)'] + [col for col in config.fin_cols if col in train_data.columns]
        train_data = train_data[fincols]
        dome_cols = ['YYYYMMSOON'] + [col for col in config.fin_cols if col in train_dome.columns]
        train_dome_filtered = train_dome[dome_cols]
        train_data = pd.merge(train_data, train_dome_filtered, how='left', on='YYYYMMSOON')

        if item == '사과':
            deal_cols = ['거래일자'] + [col for col in config.fin_cols if col in deal_info.columns]
            deal_info_filtered = deal_info[deal_cols]
            train_data = pd.merge(train_data, deal_info_filtered, how='left', left_on='YYYYMMSOON', right_on='거래일자')
            # interpolate_zeros(train_data, '사과_평년 반입량 증감률(%)')
        
        final_columns = ['평균가격(원)'] + config.fin_cols
        train_data = train_data[final_columns]
        # print(train_data.columns) 
        for col in train_data.columns : interpolate_zeros(train_data,col  )
        
        offset = (config.year - 2018) * 36
        train_data = train_data.iloc[offset: (144 + (3 * 12)), :]
        train_data = train_data.fillna(0) 
        price_df = train_data.reset_index(drop=True)
        normalized_timedata, scaler = normalize_data(price_df) 
        train_dl = create_dataloader(normalized_timedata, config.window_size, config.forecast_size, config.batch_size)
        
        
        if item== '감자 수미' :model = LinModel(config) 
        else : model = LTSF_DLinear(config) 
        
        model.to(device)
        optimizer = get_optimizer(model, config)
        scheduler = get_scheduler(optimizer, config)
        criterion = NMAELoss()
        
        for ep in range(1, config.epoch + 1):
            model.train()
            batch_losses = []
            for idx, (data_batch, target) in enumerate(train_dl):
                data_batch, target = data_batch.to(device), target.to(device)
                optimizer.zero_grad()
                output = model(data_batch)
                loss = criterion(output, target)
                loss.backward()
                optimizer.step()
                batch_losses.append(loss.item())
            if ep % 5 == 0:                avg_loss = np.mean(batch_losses)
            
        os.makedirs('dl_weights2', exist_ok=True)
        model_save_path = f'dl_weights2/{item}_model2_win9.pth'
        torch.save(model.state_dict(), model_save_path)
        print(f"Model for {item} saved at {model_save_path}")
    return scaler

In [52]:
def train_dlinAttn2(품목리스트,config):
    # 건고추, 대파, 무, 배추, 양파, 배
    seed_everything(config.seed) 
    
    for item in 품목리스트:
        # 데이터 파일 로드
        if item in group1:
            train_file = "../data/train/train_1.csv"
            meta_file = "../data/train/meta/TRAIN_경락정보_가락도매_2018-2022.csv"
        elif item in group2:
            train_file = "../data/train/train_2.csv"
            meta_file = "../data/train/meta/TRAIN_중도매_2018-2022.csv"
        elif item in group3:
            train_file = "../data/train/train_2.csv"
            meta_file = "../data/train/meta/TRAIN_소매_2018-2022.csv"
        
        # 필요한 열만 추려서 데이터 로드 및 병합
        train_data = process_data(train_file, meta_file, item)
        # 평년 가격 보간 이후 평균가격-평년가격 생성 
        # if item !='무':
        #     interpolate_zeros(train_data,'평년 평균가격(원) Common Year SOON')
        #     train_data['평균가격-평년가격'] = train_data['평균가격(원)']-train_data['평년 평균가격(원) Common Year SOON']
        if item!='배':
            for col in train_data.columns : interpolate_zeros(train_data , col) 
        train_data['평균가격-평년가격'] = train_data['평균가격(원)']-train_data['평년 평균가격(원) Common Year SOON']
            
        
        # train_data에서 필요한 열 중 실제 존재하는 열만 선택
        fincols = ['YYYYMMSOON', '평균가격(원)'] + [col for col in config.fin_cols if col in train_data.columns]
        train_data = train_data[fincols]
        
        # train_dome에서도 필요한 열만 선택
        dome_cols = ['YYYYMMSOON'] + [col for col in config.fin_cols if col in train_dome.columns]
        train_dome_filtered = train_dome[dome_cols]
        train_data = pd.merge(train_data, train_dome_filtered, how='left', on='YYYYMMSOON')
        
        # 조건에 따라 추가적인 병합 처리
        if item in ['대파(일반)', '무']:
            joint_cols = ['YYYYMMSOON'] + [col for col in config.fin_cols if col in train_jointmarket.columns]
            train_jointmarket_filtered = train_jointmarket[joint_cols]
            train_data = pd.merge(train_data, train_jointmarket_filtered, how='left', on='YYYYMMSOON')
        elif item == '배':
            deal_cols = ['거래일자'] + [col for col in config.fin_cols if col in deal_info.columns]
            deal_info_filtered = deal_info[deal_cols]
            train_data = pd.merge(train_data, deal_info_filtered, how='left', left_on='YYYYMMSOON', right_on='거래일자')

        # if item=='배' :
        #     # interpolate_zeros(train_data, '배_신고_100000_고가(20%) 평균가')
        #     # for col in train_data.columns : interpolate_zeros(train_data, train_data.columns)
        final_columns = ['평균가격(원)'] + config.fin_cols 
        train_data = train_data[final_columns]
        print(train_data.columns) 
        
        offset = (config.year - 2018) * 36
        train_data = train_data.iloc[offset: (144 + (3 * 12)), :]
        train_data = train_data.fillna(0) 

        price_df = train_data.reset_index(drop=True)
        normalized_timedata, scaler = normalize_data(price_df) 
        train_dl = create_dataloader(normalized_timedata, config.window_size, config.forecast_size, config.batch_size)
        
        model = ModelWithMultiheadAttention(config)
        model.to(device)
        optimizer = get_optimizer(model, config)
        scheduler = get_scheduler(optimizer, config)
        criterion = NMAELoss()
        for ep in range(1, config.epoch + 1):
            model.train()
            batch_losses = []
            for idx, (data_batch, target) in enumerate(train_dl):
                data_batch, target = data_batch.to(device), target.to(device)
                optimizer.zero_grad()
                output = model(data_batch)
                loss = criterion(output, target)
                loss.backward()
                optimizer.step()
                batch_losses.append(loss.item())

            avg_loss = np.mean(batch_losses)
            if config.scheduler.lower()=='reduce_on_plateau':scheduler.step(avg_loss)
        os.makedirs('dl_weights2', exist_ok=True)
        model_save_path = f'dl_weights2/{item}_model2_win9.pth'
        torch.save(model.state_dict(), model_save_path)
        print(f"Model for {item} saved at {model_save_path}")
        
    return scaler


## 모델 학습 및 저장

### 단기 모델 학습 및 저장


In [53]:

potato_scaler1 = train_dlin1(['감자 수미'] , potato_config1())
pepper_scaler1 = train_dlinAttn1(['건고추'] , pepper_config1())
garlic_scaler1 = train_dlin1(['깐마늘(국산)'] , garlic_config1())
daepa_scaler1  = train_dlinAttn1(['대파(일반)'] , daepa_config1())
moo_scaler1    = train_dlinAttn1(['무'] , moo_config1())
cabbage_scaler1= train_dlinAttn1(['배추'] , cabbage_config1())
apple_scaler1  = train_dlin1(['사과'] , apple_config1())
lettuce_scaler1= train_dlin1(['상추'] , lettuce_config1())
onion_scaler1  = train_dlinAttn1(['양파'] , onion_config1())
pear_scaler1   = train_dlinAttn1(['배'] , pear_config1())

Model for 감자 수미 saved at dl_weights2/감자 수미_model1_win3.pth
Model for 건고추 saved at dl_weights2/건고추_model1_win3.pth
Model for 깐마늘(국산) saved at dl_weights2/깐마늘(국산)_model1_win3.pth
Model for 대파(일반) saved at dl_weights2/대파(일반)_model1_win3.pth
Model for 무 saved at dl_weights2/무_model1_win3.pth
Model for 배추 saved at dl_weights2/배추_model1_win3.pth
Model for 사과 saved at dl_weights2/사과_model1_win3.pth
Model for 상추 saved at dl_weights2/상추_model1_win3.pth
Model for 양파 saved at dl_weights2/양파_model1_win3.pth
Model for 배 saved at dl_weights2/배_model1_win3.pth


### 장기 모델 학습 및 저장

In [54]:

potato_scaler2 = train_dlin2(['감자 수미'] , potato_config2())
pepper_scaler2 = train_dlinAttn2(['건고추'] , pepper_config2())
garlic_scaler2 = train_dlin2(['깐마늘(국산)'] , garlic_config2())
daepa_scaler2  = train_dlinAttn2(['대파(일반)'] , daepa_config2())
moo_scaler2    = train_dlinAttn2(['무'] , moo_config2())
cabbage_scaler2= train_dlinAttn2(['배추'] , cabbage_config2())
apple_scaler2  = train_dlin2(['사과'] , apple_config2())
lettuce_scaler2= train_dlin2(['상추'] , lettuce_config2())
onion_scaler2  = train_dlinAttn2(['양파'] , onion_config2())
pear_scaler2   = train_dlinAttn2(['배'] , pear_config2())

Model for 감자 수미 saved at dl_weights2/감자 수미_model2_win9.pth
Index(['평균가격(원)', '평균가격-평년가격', '평균가격(원)_건고추_화건_중품_30'], dtype='object')
Model for 건고추 saved at dl_weights2/건고추_model2_win9.pth
Model for 깐마늘(국산) saved at dl_weights2/깐마늘(국산)_model2_win9.pth
Index(['평균가격(원)', '평균가격-평년가격', '평균가격(원)_쪽파_10키로상자_상',
       '대파_대파(일반)_100000_총반입량(kg)'],
      dtype='object')
Model for 대파(일반) saved at dl_weights2/대파(일반)_model2_win9.pth
Index(['평균가격(원)', '무_기타무_100000_평균가(원/kg)', '1000000000_무_기타무_11_경매 건수',
       '1000000000_무_기타무_11_총반입량(kg)'],
      dtype='object')
Model for 무 saved at dl_weights2/무_model2_win9.pth
Index(['평균가격(원)', '평균가격-평년가격', '평년 평균가격(원) Common Year SOON',
       '평균가격(원)_알배기배추_8키로상자_상', '배추_기타배추_100000_총반입량(kg)'],
      dtype='object')
Model for 배추 saved at dl_weights2/배추_model2_win9.pth
Model for 사과 saved at dl_weights2/사과_model2_win9.pth
Model for 상추 saved at dl_weights2/상추_model2_win9.pth
Index(['평균가격(원)', '평균가격-평년가격', '평균가격(원)_양파_12키로_상'], dtype='object')
Model for 양파 saved 

## 추론 시작 

In [55]:
def infer_dlin2(품목리스트, config, scaler) :
    # 감자는 Linear로 하기 
    seed_everything(config.seed)
    predicts={}
    
    for item in 품목리스트:
        # 모델 불러오기
        if item == '감자 수미' :model = LinModel(config) 
        else :model = LTSF_DLinear(config)
        
        model_path = f"dl_weights2/{item}_model2_win9.pth"
        model.load_state_dict(torch.load(model_path))
        model.to(device)
        model.eval()
        item_test_tensors = []
        
        for i in range(52):
            if item in group1:
                test_file = f"../data/test/TEST_{i:02d}_1.csv"
                meta_file = f"../data/test/meta/TEST_경락정보_가락도매_{i:02d}.csv"
            elif item in group2:
                test_file = f"../data/test/TEST_{i:02d}_2.csv"
                meta_file = f"../data/test/meta/TEST_중도매_{i:02d}.csv"
            elif item in group3:
                test_file = f"../data/test/TEST_{i:02d}_2.csv"
                meta_file = f"../data/test/meta/TEST_소매_{i:02d}.csv"
            test_data = process_data(test_file, meta_file, item)
            if item!='깐마늘(국산)':
                interpolate_zeros(test_data,'평년 평균가격(원) Common Year SOON')
                test_data['평균가격-평년가격'] = test_data['평균가격(원)']-test_data['평년 평균가격(원) Common Year SOON']
            fincols = ['YYYYMMSOON', '평균가격(원)'] + [col for col in config.fin_cols if col in test_data.columns]
            test_data = test_data[fincols]
            dome_cols = ['YYYYMMSOON'] + [col for col in config.fin_cols if col in test_dome.columns]
            test_dome_filtered = test_dome[dome_cols]
            test_data = pd.merge(test_data, test_dome_filtered, how='left', on='YYYYMMSOON')

            if item == '사과':
                deal_cols = ['거래일자'] + [col for col in config.fin_cols if col in deal_info.columns]
                deal_info_filtered = deal_info[deal_cols]
                test_data = pd.merge(test_data, deal_info_filtered, how='left', left_on='YYYYMMSOON', right_on='거래일자')
                interpolate_zeros(test_data, '사과_평년 반입량 증감률(%)')
            final_columns = ['평균가격(원)'] + config.fin_cols
            
            test_data = test_data[final_columns]
            # print(test_data.columns) 
            # for col in
            test_price_df = test_data.reset_index(drop=True)
            test_price_df = test_price_df.iloc[-1 * config.window_size:, :]
            normalized_testdata = scaler.transform(test_price_df)
            test_tensor = torch.tensor(normalized_testdata, dtype=torch.float32)
            item_test_tensors.append(test_tensor)
            
        item_test_batch = torch.stack(item_test_tensors).to(device)  # (52, window_size, feature_size)
        # 모델 예측
        with torch.no_grad():
            prediction = model(item_test_batch)
        prediction = prediction.cpu().numpy()  # Shape: (52, forecast_size, feature_size)
        product_predict = []
        for pred in prediction:
            inverse_pred = inverse_normalize(pred, scaler)
            product_predict.append(inverse_pred[:, 0])
        flatlist = np.concatenate(product_predict).tolist()
        predicts[item] = flatlist
    return predicts 



def infer_dlinAttn2(품목리스트,config, scaler) :
    seed_everything(config.seed)
    predicts={}
    
    for item in 품목리스트:
        # 모델 불러오기
        model = ModelWithMultiheadAttention(config)
        model_path = f"dl_weights2/{item}_model2_win9.pth"
        model.load_state_dict(torch.load(model_path))
        model.to(device)
        model.eval()
        item_test_tensors = []
        for i in range(52):
            if item in group1:
                test_file = f"../data/test/TEST_{i:02d}_1.csv"
                meta_file = f"../data/test/meta/TEST_경락정보_가락도매_{i:02d}.csv"
            elif item in group2:
                test_file = f"../data/test/TEST_{i:02d}_2.csv"
                meta_file = f"../data/test/meta/TEST_중도매_{i:02d}.csv"
            elif item in group3:
                test_file = f"../data/test/TEST_{i:02d}_2.csv"
                meta_file = f"../data/test/meta/TEST_소매_{i:02d}.csv"
            test_data = process_data(test_file, meta_file, item)
            if item !='무':
                
                interpolate_zeros(test_data,'평년 평균가격(원) Common Year SOON')
                test_data['평균가격-평년가격'] = test_data['평균가격(원)']-test_data['평년 평균가격(원) Common Year SOON']
                # if item =='배' :print('배  평년가격 평균 차이 계산 ')

            
            fincols = ['YYYYMMSOON', '평균가격(원)'] + [col for col in config.fin_cols if col in test_data.columns]
            test_data = test_data[fincols]
            
            dome_cols = ['YYYYMMSOON'] + [col for col in config.fin_cols if col in test_dome.columns]
            test_dome_filtered = test_dome[dome_cols]
            test_data = pd.merge(test_data, test_dome_filtered, how='left', on='YYYYMMSOON')
            
            if item in ['대파(일반)', '무']:
                joint_cols = ['YYYYMMSOON'] + [col for col in config.fin_cols if col in test_jointmarket.columns]
                test_jointmarket_filtered = test_jointmarket[joint_cols]
                test_data = pd.merge(test_data, test_jointmarket_filtered, how='left', on='YYYYMMSOON')
            elif item == '배':
                deal_cols = ['거래일자'] + [col for col in config.fin_cols if col in deal_info.columns]
                deal_info_filtered = deal_info[deal_cols]
                test_data = pd.merge(test_data, deal_info_filtered, how='left', left_on='YYYYMMSOON', right_on='거래일자')
                
            final_columns = ['평균가격(원)'] + config.fin_cols 
            test_data = test_data[final_columns]
            
            test_price_df = test_data.reset_index(drop=True)
            test_price_df = test_price_df.iloc[-1 * config.window_size:, :]
            normalized_testdata = scaler.transform(test_price_df)
            test_tensor = torch.tensor(normalized_testdata, dtype=torch.float32)
            item_test_tensors.append(test_tensor)
        item_test_batch = torch.stack(item_test_tensors).to(device)  # (52, window_size, feature_size)
        # 모델 예측
        with torch.no_grad():
            prediction = model(item_test_batch)
        prediction = prediction.cpu().numpy()  # Shape: (52, forecast_size, feature_size)
        product_predict = []
        for pred in prediction:
            inverse_pred = inverse_normalize(pred, scaler)
            product_predict.append(inverse_pred[:, 0])
        flatlist = np.concatenate(product_predict).tolist()
        predicts[item] = flatlist
    return predicts 



In [56]:
def infer_dlinAttn1(품목리스트,config, scaler):
    # windowsize = 3, forecasting size = 2 
    seed_everything(config.seed)
    predicts={}
    
    for item in 품목리스트:
        print(f"Processing {item}")
        # 모델 불러오기
        model = ModelWithMultiheadAttention(config)
        model_path = f"dl_weights2/{item}_model1_win{config.window_size}.pth"
        model.load_state_dict(torch.load(model_path))
        model.to(device)
        model.eval()
        item_test_tensors = []
        
        for i in range(52):
            if item in group1:
                test_file = f"../data/test/TEST_{i:02d}_1.csv"
                meta_file = f"../data/test/meta/TEST_경락정보_가락도매_{i:02d}.csv"
            elif item in group2:
                test_file = f"../data/test/TEST_{i:02d}_2.csv"
                meta_file = f"../data/test/meta/TEST_중도매_{i:02d}.csv"
            elif item in group3:
                test_file = f"../data/test/TEST_{i:02d}_2.csv"
                meta_file = f"../data/test/meta/TEST_소매_{i:02d}.csv"
            test_data = process_data(test_file, meta_file, item)
            fincols = ['YYYYMMSOON', '평균가격(원)'] + [col for col in config.fin_cols if col in test_data.columns]
            test_data = test_data[fincols]
            dome_cols = ['YYYYMMSOON'] + [col for col in config.fin_cols if col in test_dome.columns]
            test_dome_filtered = test_dome[dome_cols]
            test_data = pd.merge(test_data, test_dome_filtered, how='left', on='YYYYMMSOON')
            
            if item in ['대파(일반)', '무']:
                joint_cols = ['YYYYMMSOON'] + [col for col in config.fin_cols if col in test_jointmarket.columns]
                test_jointmarket_filtered = test_jointmarket[joint_cols]
                test_data = pd.merge(test_data, test_jointmarket_filtered, how='left', on='YYYYMMSOON')
            elif item == '배':
                deal_cols = ['거래일자'] + [col for col in config.fin_cols if col in deal_info.columns]
                deal_info_filtered = deal_info[deal_cols]
                test_data = pd.merge(test_data, deal_info_filtered, how='left', left_on='YYYYMMSOON', right_on='거래일자')
            
            final_columns = ['평균가격(원)'] + config.fin_cols
            test_data = test_data[final_columns]
            
            
            test_price_df = test_data.reset_index(drop=True)
            test_price_df = test_price_df.iloc[-1 * config.window_size:, :]
            normalized_testdata = scaler.transform(test_price_df)
            test_tensor = torch.tensor(normalized_testdata, dtype=torch.float32)
            item_test_tensors.append(test_tensor)
        
        # 품목별 테스트 텐서를 하나의 텐서로 결합
        item_test_batch = torch.stack(item_test_tensors).to(device)  # (52, window_size, feature_size)
        # 모델 예측
        with torch.no_grad():
            prediction = model(item_test_batch)
            
        prediction = prediction.cpu().numpy()  # Shape: (52, forecast_size, feature_size)
        product_predict = []
        for pred in prediction:
            inverse_pred = inverse_normalize(pred, scaler)
            extended_pred = np.append(inverse_pred[:, 0], 0)
            product_predict.append(extended_pred)
        flatlist = np.concatenate(product_predict).tolist()
        predicts[item] = flatlist
    return predicts 


def infer_dlin1(품목리스트, config, scaler):
    seed_everything(config.seed)
    predicts = {}
    
    for item in 품목리스트:
        print(f"Processing {item}")
        # 모델 불러오기
        model = LTSF_DLinear(config)
        model_path = f"dl_weights2/{item}_model1_win3.pth"
        
        model.load_state_dict(torch.load(model_path))
        model.to(device)
        
        model.eval()
        
        # 품목별 테스트 데이터를 저장할 리스트
        item_test_tensors = []
        
        for i in range(52):
            if item in group1:
                test_file = f"../data/test/TEST_{i:02d}_1.csv"
                meta_file = f"../data/test/meta/TEST_경락정보_가락도매_{i:02d}.csv"
            elif item in group2:
                test_file = f"../data/test/TEST_{i:02d}_2.csv"
                meta_file = f"../data/test/meta/TEST_중도매_{i:02d}.csv"
            elif item in group3:
                test_file = f"../data/test/TEST_{i:02d}_2.csv"
                meta_file = f"../data/test/meta/TEST_소매_{i:02d}.csv"
            else:
                continue  # 해당 품목이 어떤 그룹에도 속하지 않으면 넘어감

            test_data = process_data(test_file, meta_file, item)
            fincols = ['YYYYMMSOON', '평균가격(원)'] + [col for col in config.fin_cols if col in test_data.columns]
            test_data = test_data[fincols]
            dome_cols = ['YYYYMMSOON'] + [col for col in config.fin_cols if col in test_dome.columns]
            test_dome_filtered = test_dome[dome_cols]
            test_data = pd.merge(test_data, test_dome_filtered, how='left', on='YYYYMMSOON')
            if item == '사과':
                deal_cols = ['거래일자'] + [col for col in config.fin_cols if col in deal_info.columns]
                deal_info_filtered = deal_info[deal_cols]
                test_data = pd.merge(test_data, deal_info_filtered, how='left', left_on='YYYYMMSOON', right_on='거래일자')
                interpolate_zeros(test_data, '사과_평년 반입량 증감률(%)')
                
                
            final_columns = ['평균가격(원)'] + config.fin_cols
            test_data = test_data[final_columns]
            
            
            
            test_price_df = test_data.reset_index(drop=True)
            test_price_df = test_price_df.iloc[-1 * config.window_size:, :]
            normalized_testdata = scaler.transform(test_price_df)
            test_tensor = torch.tensor(normalized_testdata, dtype=torch.float32)
            item_test_tensors.append(test_tensor)
        
        # 품목별 테스트 텐서를 하나의 텐서로 결합
        item_test_batch = torch.stack(item_test_tensors).to(device)  # (52, window_size, feature_size)
        
        # 모델 예측
        with torch.no_grad():
            prediction = model(item_test_batch)
        
        prediction = prediction.cpu().numpy()  # Shape: (52, forecast_size, feature_size)
        
        # 예측 결과 처리 및 길이 맞춤
        product_predict = []
        for pred in prediction:
            inverse_pred = inverse_normalize(pred, scaler)
            # forecast_size는 항상 2이므로, 예측 결과에 0을 추가하여 길이 맞춤
            extended_pred = np.append(inverse_pred[:, 0], 0)
            product_predict.append(extended_pred)
        
        flatlist = np.concatenate(product_predict).tolist()
        predicts[item] = flatlist
    
    return predicts


In [57]:
potato_preds1 = infer_dlin1(['감자 수미'] ,potato_config1(),potato_scaler1 )
garlic_preds1 = infer_dlin1(['깐마늘(국산)'] ,garlic_config1(),garlic_scaler1 ) 
apple_preds1 = infer_dlin1(['사과'] ,apple_config1(),apple_scaler1 ) 
lettuce_preds1 = infer_dlin1(['상추'] ,lettuce_config1(),lettuce_scaler1 )

pepper_preds1 = infer_dlinAttn1(['건고추'] ,pepper_config1(),pepper_scaler1 )
daepa_preds1 = infer_dlinAttn1(['대파(일반)'] ,daepa_config1(),daepa_scaler1 )
moo_preds1 = infer_dlinAttn1(['무'] ,moo_config1(),moo_scaler1 )
cabbage_preds1 = infer_dlinAttn1(['배추'] ,cabbage_config1(),cabbage_scaler1 )
onion_preds1 = infer_dlinAttn1(['양파'] ,onion_config1(),onion_scaler1 ) 
pear_preds1 = infer_dlinAttn1(['배'] ,pear_config1(),pear_scaler1 )


Processing 감자 수미
Processing 깐마늘(국산)
Processing 사과
Processing 상추
Processing 건고추
Processing 대파(일반)
Processing 무
Processing 배추
Processing 양파
Processing 배


In [58]:
# potato_preds2 = infer_dlin2(['감자 수미'] ,potato_config2(),potato_scaler2 )
garlic_preds2 = infer_dlin2(['깐마늘(국산)'] ,garlic_config2(),garlic_scaler2 ) 
apple_preds2 = infer_dlin2(['사과'] ,apple_config2(),apple_scaler2 ) 
lettuce_preds2 = infer_dlin2(['상추'] ,lettuce_config2(),lettuce_scaler2 )

pepper_preds2 = infer_dlinAttn2(['건고추'] ,pepper_config2(),pepper_scaler2 )
daepa_preds2 = infer_dlinAttn2(['대파(일반)'] ,daepa_config2(),daepa_scaler2 )
moo_preds2 = infer_dlinAttn2(['무'] ,moo_config2(),moo_scaler2 )
cabbage_preds2 = infer_dlinAttn2(['배추'] ,cabbage_config2(),cabbage_scaler2 )
onion_preds2 = infer_dlinAttn2(['양파'] ,onion_config2(),onion_scaler2 ) 
# pear_preds2 = infer_dlinAttn2(['배'] ,pear_config2(),pear_scaler2 )


## 제출파일 생성하기 

In [59]:
dl_model1 =pd.read_csv('../data/sample_submission.csv')
dl_model2 =pd.read_csv('../data/sample_submission.csv') 

품목_리스트 = ['potato', 'garlic', 'apple', 'lettuce', 'pepper', 'daepa', 'moo', 'cabbage', 'onion', 'pear']

# 각 품목별로 예측 결과를 dl_model1과 dl_model2에 업데이트
for item in 품목_리스트:
    # 동적으로 '_preds1'와 '_preds2' 변수를 참조
    preds1 = eval(f"{item}_preds1")
    preds2 = eval(f"{item}_preds2")
    
    # preds1의 예측값을 dl_model1에 추가
    for sub_item, prices in preds1.items():
        if sub_item in dl_model1.columns:
            dl_model1[sub_item] = prices

    # preds2의 예측값을 dl_model2에 추가
    for sub_item, prices in preds2.items():
        if sub_item in dl_model2.columns:
            dl_model2[sub_item] = prices