In [39]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [40]:
!unzip -qq "/content/drive/MyDrive/DS/open.zip"

replace sample_submission.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace test/meta/TEST_산지공판장_00.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [41]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from tqdm.notebook import tqdm
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from types import SimpleNamespace
from sklearn.preprocessing import MinMaxScaler
import os

In [63]:
config = {
    "learning_rate": 0.0001,
    "epoch": 100,
    "batch_size": 64,
    "hidden_size": 128,
    "num_layers": 4,
    "output_size": 3,
    "num_heads": 4,
    "dropout": 0.2,
    "step_size": 10,
    "gamma": 0.8
}

CFG = SimpleNamespace(**config)

item_list = ['건고추', '사과', '감자', '배', '깐마늘(국산)', '무', '상추', '배추', '양파', '대파']

In [54]:
import re
from datetime import datetime

def parse_custom_date(date_str):
    # 접두사 'T-' 제거
    if date_str.startswith('T-'):
        date_str = date_str.lstrip('T-')

    # 정규식으로 날짜를 파싱
    match = re.match(r"(\d{4})(\d{2})(상순|중순|하순)", date_str)
    if match:
        year, month, part = match.groups()
        # 상순, 중순, 하순을 각각 1일, 11일, 21일로 매핑
        day = {"상순": "01", "중순": "11", "하순": "21"}[part]
        # 날짜 생성
        return datetime.strptime(f"{year}-{month}-{day}", "%Y-%m-%d")
    else:
        return None


In [55]:
def process_data(raw_file, 산지공판장_file, 전국도매_file, 품목명, scaler=None):
    raw_data = pd.read_csv(raw_file)
    산지공판장 = pd.read_csv(산지공판장_file)
    전국도매 = pd.read_csv(전국도매_file)

    # 타겟 및 메타데이터 필터 조건 정의
    conditions = {
    '감자': {
        'target': lambda df: (df['품종명'] == '감자 수미') & (df['거래단위'] == '20키로상자') & (df['등급'] == '상'),
        '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['감자'], '품종명': ['수미'], '등급명': ['상']},
        '도매': {'시장명': ['*전국도매시장'], '품목명': ['감자'], '품종명': ['수미']}
    },
    '건고추': {
        'target': lambda df: (df['품종명'] == '화건') & (df['거래단위'] == '30 kg') & (df['등급'] == '상품'),
        '공판장': None,
        '도매': None
    },
    '깐마늘(국산)': {
        'target': lambda df: (df['거래단위'] == '20 kg') & (df['등급'] == '상품'),
        '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['마늘'], '품종명': ['깐마늘'], '등급명': ['상']},
        '도매': {'시장명': ['*전국도매시장'], '품목명': ['마늘'], '품종명': ['깐마늘']}
    },
    '대파': {
        'target': lambda df: (df['품종명'] == '대파(일반)') & (df['거래단위'] == '1키로단') & (df['등급'] == '상'),
        '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['대파'], '품종명': ['대파(일반)'], '등급명': ['상']},
        '도매': {'시장명': ['*전국도매시장'], '품목명': ['대파'], '품종명': ['대파(일반)']}
    },
    '무': {
        'target': lambda df: (df['거래단위'] == '20키로상자') & (df['등급'] == '상'),
        '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['무'], '품종명': ['기타무'], '등급명': ['상']},
        '도매': {'시장명': ['*전국도매시장'], '품목명': ['무'], '품종명': ['무']}
    },
    '배추': {
        'target': lambda df: (df['거래단위'] == '10키로망대') & (df['등급'] == '상'),
        '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['배추'], '품종명': ['쌈배추'], '등급명': ['상']},
        '도매': {'시장명': ['*전국도매시장'], '품목명': ['배추'], '품종명': ['배추']}
    },
    '사과': {
        'target': lambda df: (df['품종명'].isin(['홍로', '후지'])) & (df['거래단위'] == '10 개') & (df['등급'] == '상품'),
        '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['사과'], '품종명': ['후지'], '등급명': ['상']},
        '도매': {'시장명': ['*전국도매시장'], '품목명': ['사과'], '품종명': ['후지']}
    },
    '상추': {
        'target': lambda df: (df['품종명'] == '청') & (df['거래단위'] == '100 g') & (df['등급'] == '상품'),
        '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['상추'], '품종명': ['청상추'], '등급명': ['상']},
        '도매': {'시장명': ['*전국도매시장'], '품목명': ['상추'], '품종명': ['청상추']}
    },
    '양파': {
        'target': lambda df: (df['품종명'] == '양파') & (df['거래단위'] == '1키로') & (df['등급'] == '상'),
        '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['양파'], '품종명': ['기타양파'], '등급명': ['상']},
        '도매': {'시장명': ['*전국도매시장'], '품목명': ['양파'], '품종명': ['양파(일반)']}
    },
    '배': {
        'target': lambda df: (df['품종명'] == '신고') & (df['거래단위'] == '10 개') & (df['등급'] == '상품'),
        '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['배'], '품종명': ['신고'], '등급명': ['상']},
        '도매': {'시장명': ['*전국도매시장'], '품목명': ['배'], '품종명': ['신고']}
    }
    }

    # 타겟 데이터 필터링
    raw_품목 = raw_data[raw_data['품목명'] == 품목명]
    target_mask = conditions[품목명]['target'](raw_품목)
    filtered_data = raw_품목[target_mask]

    # 다른 품종에 대한 파생변수 생성
    other_data = raw_품목[~target_mask]
    unique_combinations = other_data[['품종명', '거래단위', '등급']].drop_duplicates()
    for _, row in unique_combinations.iterrows():
        품종명, 거래단위, 등급 = row['품종명'], row['거래단위'], row['등급']
        mask = (other_data['품종명'] == 품종명) & (other_data['거래단위'] == 거래단위) & (other_data['등급'] == 등급)
        temp_df = other_data[mask]
        for col in ['평년 평균가격(원)', '평균가격(원)']:
            new_col_name = f'{품종명}_{거래단위}_{등급}_{col}'
            filtered_data = filtered_data.merge(temp_df[['시점', col]], on='시점', how='left', suffixes=('', f'_{new_col_name}'))
            filtered_data.rename(columns={f'{col}_{new_col_name}': new_col_name}, inplace=True)


    # 공판장 데이터 처리
    if conditions[품목명]['공판장']:
        filtered_공판장 = 산지공판장
        for key, value in conditions[품목명]['공판장'].items():
            filtered_공판장 = filtered_공판장[filtered_공판장[key].isin(value)]

        filtered_공판장 = filtered_공판장.add_prefix('공판장_').rename(columns={'공판장_시점': '시점'})
        filtered_data = filtered_data.merge(filtered_공판장, on='시점', how='left')

    # 도매 데이터 처리
    if conditions[품목명]['도매']:
        filtered_도매 = 전국도매
        for key, value in conditions[품목명]['도매'].items():
            filtered_도매 = filtered_도매[filtered_도매[key].isin(value)]

        filtered_도매 = filtered_도매.add_prefix('도매_').rename(columns={'도매_시점': '시점'})
        filtered_data = filtered_data.merge(filtered_도매, on='시점', how='left')

    ### 날짜변환
    filtered_data['시점'] = filtered_data['시점'].apply(parse_custom_date)

    # 수치형 컬럼 처리
    numeric_columns = filtered_data.select_dtypes(include=[np.number]).columns
    filtered_data = filtered_data[['시점'] + list(numeric_columns)]
    filtered_data[numeric_columns] = filtered_data[numeric_columns].fillna(0)

    ### 결측치 처리: 선형 보간법
    filtered_data[numeric_columns] = filtered_data[numeric_columns].interpolate(method='linear', limit_direction='both')

    ### 이상치 처리: IQR 방법 ###
#    for col in numeric_columns:
#        Q1 = filtered_data[col].quantile(0.25)
#        Q3 = filtered_data[col].quantile(0.75)
#        IQR = Q3 - Q1
#        lower_bound = Q1 - 1.5 * IQR
#        upper_bound = Q3 + 1.5 * IQR
#        filtered_data = filtered_data[(filtered_data[col] >= lower_bound) & (filtered_data[col] <= upper_bound)]
    #######

#####mentori님의 이상치 처리 부분 참######
    for col in numeric_columns:
        for i in range(1, len(filtered_data)):
            if filtered_data.loc[i, col] == 0:  # 0인 값을 이상치로 간주
                filtered_data.loc[i, col] = filtered_data.loc[i - 1, col]  # 이전 값으로 대체

    # 평균가격 값이 0인 것이 일정 수준 이하라면 그 칼럼을 제거한다.
    drop_columns = []
    for col in filtered_data.columns:
        zero_cols = len(filtered_data[filtered_data[col]==0])
        if zero_cols/len(filtered_data) > 0.5:
            drop_columns.append(col)
###################################################

    # 정규화 적용
    if scaler is None:
        scaler = MinMaxScaler()
        filtered_data[numeric_columns] = scaler.fit_transform(filtered_data[numeric_columns])
    else:
        filtered_data[numeric_columns] = scaler.transform(filtered_data[numeric_columns])

    return filtered_data, scaler


In [56]:
# Define custom dataset class
class AgriculturePriceDataset(Dataset):
    def __init__(self, dataframe, window_size=9, prediction_length=3, is_test=False):
        self.data = dataframe
        self.window_size = window_size
        self.prediction_length = prediction_length
        self.is_test = is_test

        self.price_column = [col for col in self.data.columns if '평균가격(원)' in col and len(col.split('_')) == 1][0]
        self.numeric_columns = self.data.select_dtypes(include=[np.number]).columns.tolist()

        self.sequences = []
        if not self.is_test:
            for i in range(len(self.data) - self.window_size - self.prediction_length + 1):
                x = self.data[self.numeric_columns].iloc[i:i+self.window_size].values
                y = self.data[self.price_column].iloc[i+self.window_size:i+self.window_size+self.prediction_length].values
                self.sequences.append((x, y))
        else:
            self.sequences = [self.data[self.numeric_columns].values]

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        if not self.is_test:
            x, y = self.sequences[idx]
            return torch.FloatTensor(x), torch.FloatTensor(y)
        else:
            return torch.FloatTensor(self.sequences[idx])

In [57]:
# Define Time2Vec layer
class Time2Vec(nn.Module):
    def __init__(self, input_dim):
        super(Time2Vec, self).__init__()
        self.linear = nn.Linear(input_dim, 1)
        self.periodic = nn.Linear(input_dim, input_dim-1)

    def forward(self, x):
        linear_out = self.linear(x)
        periodic_out = torch.sin(self.periodic(x))
        return torch.cat([linear_out, periodic_out], dim=-1)

# Define Transformer Encoder Block
class TransformerBlock(nn.Module):
    def __init__(self, input_dim, num_heads, dropout):
        super(TransformerBlock, self).__init__()
        self.attention = nn.MultiheadAttention(input_dim, num_heads, dropout=dropout)
        self.norm1 = nn.LayerNorm(input_dim)
        self.norm2 = nn.LayerNorm(input_dim)
        self.ff = nn.Sequential(
            nn.Linear(input_dim, 4 * input_dim),
            nn.GELU(),
            nn.Linear(4 * input_dim, input_dim),
            nn.Dropout(dropout)
        )

    def forward(self, x):
        attended, _ = self.attention(x, x, x)
        x = self.norm1(attended + x)
        feedforward = self.ff(x)
        return self.norm2(feedforward + x)

# Define main model architecture
class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_heads, output_size, dropout):
        super(TimeSeriesTransformer, self).__init__()
        self.time2vec = Time2Vec(input_size)
        self.embedding = nn.Linear(input_size, hidden_size)
        self.position_encoding = self.generate_position_encoding(hidden_size, 10)
        self.dropout = nn.Dropout(dropout)

        self.transformer_blocks = nn.ModuleList([
            TransformerBlock(hidden_size, num_heads, dropout)
            for _ in range(num_layers)
        ])

        self.output_layer = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_size // 2, output_size)
        )
    def generate_position_encoding(self, hidden_size, max_len):
        pe = torch.zeros(max_len, hidden_size)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, hidden_size, 2).float() * (-np.log(10000.0) / hidden_size))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        return pe.unsqueeze(0)

    def forward(self, x):
        b, s, f = x.shape
        x = self.time2vec(x)
        x = self.embedding(x)
        x = x + self.position_encoding[:, :s, :].to(x.device)
        x = self.dropout(x)

        for transformer in self.transformer_blocks:
            x = transformer(x)

        x = x.mean(dim=1)
        return self.output_layer(x)


In [58]:
# Training function with mixed precision training
def train_model(model, train_loader, criterion, optimizer, scheduler, scaler, device):
    model.train()
    total_loss = 0
    for batch_x, batch_y in train_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)  # 데이터를 GPU로 이동
        optimizer.zero_grad()

        with torch.cuda.amp.autocast():  # GPU에서 Mixed Precision Training
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        scheduler.step()
        total_loss += loss.item()

    return total_loss / len(train_loader)


In [59]:
# Evaluation function
def evaluate_model(model, test_loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)  # 데이터를 GPU로 이동
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            total_loss += loss.item()
    return total_loss / len(test_loader)

In [60]:
class NMAELoss(nn.Module):
    def __init__(self):
        super(NMAELoss, self).__init__()

    def forward(self, y_pred, y_true):
        mae = torch.abs(y_pred - y_true).mean()
        # y_true의 평균을 기준으로 정규화
        nmae = mae / torch.mean(torch.abs(y_true))
        return nmae

In [61]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
from torch.cuda.amp import GradScaler
from torch.cuda.amp import autocast


품목별_predictions = {}
품목별_scalers = {}

pbar_outer = tqdm(item_list, desc="품목 처리 중", position=0)
for 품목명 in pbar_outer:
    pbar_outer.set_description(f"품목별 전처리 및 모델 학습 -> {품목명}")
    train_data, scaler = process_data("./train/train.csv",
                              "./train/meta/TRAIN_산지공판장_2018-2021.csv",
                              "./train/meta/TRAIN_전국도매_2018-2021.csv",
                              품목명)
    품목별_scalers[품목명] = scaler
    dataset = AgriculturePriceDataset(train_data)

    # 데이터를 train과 validation으로 분할
    train_data, val_data = train_test_split(dataset, test_size=0.2, random_state=42)

    train_loader = DataLoader(train_data, CFG.batch_size, shuffle=True)
    val_loader = DataLoader(val_data, CFG.batch_size, shuffle=False)

    input_size = len(dataset.numeric_columns)

    model = TimeSeriesTransformer(
            input_size=input_size,
            hidden_size=CFG.hidden_size,
            num_layers=CFG.num_layers,
            num_heads=CFG.num_heads,
            output_size=CFG.output_size,
            dropout=CFG.dropout
        ).to(device)
    criterion = NMAELoss()
    optimizer = torch.optim.Adam(model.parameters(), CFG.learning_rate)

    best_val_loss = float('inf')
    os.makedirs('models', exist_ok=True)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=CFG.step_size, gamma=CFG.gamma)
    scaler = GradScaler()

    for epoch in range(CFG.epoch):
        train_loss = train_model(
            model, train_loader, criterion, optimizer, scheduler, scaler, device
        )
        val_loss = evaluate_model(model, val_loader, criterion, device)
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), f'models/best_model_{품목명}.pth')

        print(f'Epoch {epoch+1}/{CFG.epoch}, Train Loss: {train_loss:.4f}')
    print(f'Best Validation Loss for {품목명}: {best_val_loss:.4f}')

    품목_predictions = []

    ### 추론
    pbar_inner = tqdm(range(25), desc="테스트 파일 추론 중", position=1, leave=False)
    for i in pbar_inner:
        test_file = f"./test/TEST_{i:02d}.csv"
        산지공판장_file = f"./test/meta/TEST_산지공판장_{i:02d}.csv"
        전국도매_file = f"./test/meta/TEST_전국도매_{i:02d}.csv"

        test_data, _ = process_data(test_file, 산지공판장_file, 전국도매_file, 품목명, scaler=품목별_scalers[품목명])
        test_dataset = AgriculturePriceDataset(test_data, is_test=True)
        test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

        model.eval()
        predictions = []
        with torch.no_grad():
            for batch in test_loader:
                batch = batch.to(device)  # 입력 데이터를 GPU로 이동
                output = model(batch)    # 모델과 데이터가 GPU에서 처리
                predictions.append(output.cpu().numpy())

        predictions_array = np.concatenate(predictions)

        # 예측값을 원래 스케일로 복원
        price_column_index = test_data.columns.get_loc(test_dataset.price_column)
        predictions_reshaped = predictions_array.reshape(-1, 1)

        # 가격 열에 대해서만 inverse_transform 적용
        price_scaler = MinMaxScaler()
        price_scaler.min_ = 품목별_scalers[품목명].min_[price_column_index]
        price_scaler.scale_ = 품목별_scalers[품목명].scale_[price_column_index]
        predictions_original_scale = price_scaler.inverse_transform(predictions_reshaped)
        #print(predictions_original_scale)

        if np.isnan(predictions_original_scale).any():
            pbar_inner.set_postfix({"상태": "NaN"})
        else:
            pbar_inner.set_postfix({"상태": "정상"})
            품목_predictions.extend(predictions_original_scale.flatten())


    품목별_predictions[품목명] = 품목_predictions
    pbar_outer.update(1)

품목 처리 중:   0%|          | 0/10 [00:00<?, ?it/s]

  scaler = GradScaler()
  with torch.cuda.amp.autocast():  # GPU에서 Mixed Precision Training


Epoch 1/100, Train Loss: 1.1739
Epoch 2/100, Train Loss: 1.0024
Epoch 3/100, Train Loss: 0.5657
Epoch 4/100, Train Loss: 0.6564
Epoch 5/100, Train Loss: 0.6307
Epoch 6/100, Train Loss: 0.5614
Epoch 7/100, Train Loss: 0.5271
Epoch 8/100, Train Loss: 0.4690
Epoch 9/100, Train Loss: 0.4916
Epoch 10/100, Train Loss: 0.4690
Epoch 11/100, Train Loss: 0.5063
Epoch 12/100, Train Loss: 0.5271
Epoch 13/100, Train Loss: 0.4813
Epoch 14/100, Train Loss: 0.5100
Epoch 15/100, Train Loss: 0.4746
Epoch 16/100, Train Loss: 0.4864
Epoch 17/100, Train Loss: 0.4604
Epoch 18/100, Train Loss: 0.4771
Epoch 19/100, Train Loss: 0.4475
Epoch 20/100, Train Loss: 0.4592
Epoch 21/100, Train Loss: 0.4765
Epoch 22/100, Train Loss: 0.4289
Epoch 23/100, Train Loss: 0.4451
Epoch 24/100, Train Loss: 0.4200
Epoch 25/100, Train Loss: 0.4241
Epoch 26/100, Train Loss: 0.4820
Epoch 27/100, Train Loss: 0.4522
Epoch 28/100, Train Loss: 0.4468
Epoch 29/100, Train Loss: 0.4640
Epoch 30/100, Train Loss: 0.4725
Epoch 31/100, Train

테스트 파일 추론 중:   0%|          | 0/25 [00:00<?, ?it/s]

  scaler = GradScaler()
  with torch.cuda.amp.autocast():  # GPU에서 Mixed Precision Training


Epoch 1/100, Train Loss: 1.0210
Epoch 2/100, Train Loss: 0.8367
Epoch 3/100, Train Loss: 0.8580
Epoch 4/100, Train Loss: 0.7421
Epoch 5/100, Train Loss: 0.6737
Epoch 6/100, Train Loss: 0.6668
Epoch 7/100, Train Loss: 0.6510
Epoch 8/100, Train Loss: 0.6746
Epoch 9/100, Train Loss: 0.5977
Epoch 10/100, Train Loss: 0.5982
Epoch 11/100, Train Loss: 0.6301
Epoch 12/100, Train Loss: 0.6721
Epoch 13/100, Train Loss: 0.6261
Epoch 14/100, Train Loss: 0.5799
Epoch 15/100, Train Loss: 0.5858
Epoch 16/100, Train Loss: 0.6015
Epoch 17/100, Train Loss: 0.6075
Epoch 18/100, Train Loss: 0.5730
Epoch 19/100, Train Loss: 0.5577
Epoch 20/100, Train Loss: 0.5736
Epoch 21/100, Train Loss: 0.5873
Epoch 22/100, Train Loss: 0.5831
Epoch 23/100, Train Loss: 0.6036
Epoch 24/100, Train Loss: 0.5728
Epoch 25/100, Train Loss: 0.5822
Epoch 26/100, Train Loss: 0.5397
Epoch 27/100, Train Loss: 0.5578
Epoch 28/100, Train Loss: 0.5571
Epoch 29/100, Train Loss: 0.6150
Epoch 30/100, Train Loss: 0.5705
Epoch 31/100, Train

테스트 파일 추론 중:   0%|          | 0/25 [00:00<?, ?it/s]

  scaler = GradScaler()
  with torch.cuda.amp.autocast():  # GPU에서 Mixed Precision Training


Epoch 1/100, Train Loss: 1.4004
Epoch 2/100, Train Loss: 1.1647
Epoch 3/100, Train Loss: 0.6019
Epoch 4/100, Train Loss: 0.5465
Epoch 5/100, Train Loss: 0.5408
Epoch 6/100, Train Loss: 0.5580
Epoch 7/100, Train Loss: 0.5789
Epoch 8/100, Train Loss: 0.5467
Epoch 9/100, Train Loss: 0.5231
Epoch 10/100, Train Loss: 0.5702
Epoch 11/100, Train Loss: 0.5178
Epoch 12/100, Train Loss: 0.5326
Epoch 13/100, Train Loss: 0.5472
Epoch 14/100, Train Loss: 0.4913
Epoch 15/100, Train Loss: 0.4751
Epoch 16/100, Train Loss: 0.5091
Epoch 17/100, Train Loss: 0.5055
Epoch 18/100, Train Loss: 0.5416
Epoch 19/100, Train Loss: 0.4959
Epoch 20/100, Train Loss: 0.4895
Epoch 21/100, Train Loss: 0.4993
Epoch 22/100, Train Loss: 0.4592
Epoch 23/100, Train Loss: 0.5408
Epoch 24/100, Train Loss: 0.4905
Epoch 25/100, Train Loss: 0.4941
Epoch 26/100, Train Loss: 0.4752
Epoch 27/100, Train Loss: 0.4910
Epoch 28/100, Train Loss: 0.4844
Epoch 29/100, Train Loss: 0.4890
Epoch 30/100, Train Loss: 0.4933
Epoch 31/100, Train

테스트 파일 추론 중:   0%|          | 0/25 [00:00<?, ?it/s]

  scaler = GradScaler()
  with torch.cuda.amp.autocast():  # GPU에서 Mixed Precision Training


Epoch 1/100, Train Loss: 0.8396
Epoch 2/100, Train Loss: 0.4648
Epoch 3/100, Train Loss: 0.5513
Epoch 4/100, Train Loss: 0.5077
Epoch 5/100, Train Loss: 0.4270
Epoch 6/100, Train Loss: 0.5018
Epoch 7/100, Train Loss: 0.4742
Epoch 8/100, Train Loss: 0.4510
Epoch 9/100, Train Loss: 0.4345
Epoch 10/100, Train Loss: 0.4585
Epoch 11/100, Train Loss: 0.4147
Epoch 12/100, Train Loss: 0.4211
Epoch 13/100, Train Loss: 0.4263
Epoch 14/100, Train Loss: 0.4283
Epoch 15/100, Train Loss: 0.4389
Epoch 16/100, Train Loss: 0.4315
Epoch 17/100, Train Loss: 0.4034
Epoch 18/100, Train Loss: 0.4004
Epoch 19/100, Train Loss: 0.4075
Epoch 20/100, Train Loss: 0.4104
Epoch 21/100, Train Loss: 0.4279
Epoch 22/100, Train Loss: 0.4085
Epoch 23/100, Train Loss: 0.3849
Epoch 24/100, Train Loss: 0.3951
Epoch 25/100, Train Loss: 0.4006
Epoch 26/100, Train Loss: 0.3888
Epoch 27/100, Train Loss: 0.4238
Epoch 28/100, Train Loss: 0.4036
Epoch 29/100, Train Loss: 0.3909
Epoch 30/100, Train Loss: 0.3875
Epoch 31/100, Train

테스트 파일 추론 중:   0%|          | 0/25 [00:00<?, ?it/s]

  scaler = GradScaler()
  with torch.cuda.amp.autocast():  # GPU에서 Mixed Precision Training


Epoch 1/100, Train Loss: inf
Epoch 2/100, Train Loss: inf
Epoch 3/100, Train Loss: inf
Epoch 4/100, Train Loss: inf
Epoch 5/100, Train Loss: inf
Epoch 6/100, Train Loss: inf
Epoch 7/100, Train Loss: inf
Epoch 8/100, Train Loss: inf
Epoch 9/100, Train Loss: inf
Epoch 10/100, Train Loss: inf
Epoch 11/100, Train Loss: inf
Epoch 12/100, Train Loss: inf
Epoch 13/100, Train Loss: inf
Epoch 14/100, Train Loss: inf
Epoch 15/100, Train Loss: inf
Epoch 16/100, Train Loss: inf
Epoch 17/100, Train Loss: inf
Epoch 18/100, Train Loss: inf
Epoch 19/100, Train Loss: inf
Epoch 20/100, Train Loss: inf
Epoch 21/100, Train Loss: inf
Epoch 22/100, Train Loss: inf
Epoch 23/100, Train Loss: inf
Epoch 24/100, Train Loss: inf
Epoch 25/100, Train Loss: inf
Epoch 26/100, Train Loss: inf
Epoch 27/100, Train Loss: inf
Epoch 28/100, Train Loss: inf
Epoch 29/100, Train Loss: inf
Epoch 30/100, Train Loss: inf
Epoch 31/100, Train Loss: inf
Epoch 32/100, Train Loss: inf
Epoch 33/100, Train Loss: inf
Epoch 34/100, Train

테스트 파일 추론 중:   0%|          | 0/25 [00:00<?, ?it/s]

  scaler = GradScaler()
  with torch.cuda.amp.autocast():  # GPU에서 Mixed Precision Training


Epoch 1/100, Train Loss: inf
Epoch 2/100, Train Loss: inf
Epoch 3/100, Train Loss: inf
Epoch 4/100, Train Loss: inf
Epoch 5/100, Train Loss: inf
Epoch 6/100, Train Loss: inf
Epoch 7/100, Train Loss: inf
Epoch 8/100, Train Loss: inf
Epoch 9/100, Train Loss: inf
Epoch 10/100, Train Loss: inf
Epoch 11/100, Train Loss: inf
Epoch 12/100, Train Loss: inf
Epoch 13/100, Train Loss: inf
Epoch 14/100, Train Loss: inf
Epoch 15/100, Train Loss: inf
Epoch 16/100, Train Loss: inf
Epoch 17/100, Train Loss: inf
Epoch 18/100, Train Loss: inf
Epoch 19/100, Train Loss: inf
Epoch 20/100, Train Loss: inf
Epoch 21/100, Train Loss: inf
Epoch 22/100, Train Loss: inf
Epoch 23/100, Train Loss: inf
Epoch 24/100, Train Loss: inf
Epoch 25/100, Train Loss: inf
Epoch 26/100, Train Loss: inf
Epoch 27/100, Train Loss: inf
Epoch 28/100, Train Loss: inf
Epoch 29/100, Train Loss: inf
Epoch 30/100, Train Loss: inf
Epoch 31/100, Train Loss: inf
Epoch 32/100, Train Loss: inf
Epoch 33/100, Train Loss: inf
Epoch 34/100, Train

테스트 파일 추론 중:   0%|          | 0/25 [00:00<?, ?it/s]

  scaler = GradScaler()
  with torch.cuda.amp.autocast():  # GPU에서 Mixed Precision Training


Epoch 1/100, Train Loss: 1.1001
Epoch 2/100, Train Loss: 0.9389
Epoch 3/100, Train Loss: 0.9111
Epoch 4/100, Train Loss: 0.7768
Epoch 5/100, Train Loss: 0.7258
Epoch 6/100, Train Loss: 0.6590
Epoch 7/100, Train Loss: 0.6802
Epoch 8/100, Train Loss: 0.6395
Epoch 9/100, Train Loss: 0.6569
Epoch 10/100, Train Loss: 0.6775
Epoch 11/100, Train Loss: 0.6629
Epoch 12/100, Train Loss: 0.6821
Epoch 13/100, Train Loss: 0.6882
Epoch 14/100, Train Loss: 0.6926
Epoch 15/100, Train Loss: 0.6557
Epoch 16/100, Train Loss: 0.6685
Epoch 17/100, Train Loss: 0.6511
Epoch 18/100, Train Loss: 0.6528
Epoch 19/100, Train Loss: 0.6754
Epoch 20/100, Train Loss: 0.6530
Epoch 21/100, Train Loss: 0.6897
Epoch 22/100, Train Loss: 0.6503
Epoch 23/100, Train Loss: 0.6333
Epoch 24/100, Train Loss: 0.6406
Epoch 25/100, Train Loss: 0.6253
Epoch 26/100, Train Loss: 0.6585
Epoch 27/100, Train Loss: 0.6384
Epoch 28/100, Train Loss: 0.6859
Epoch 29/100, Train Loss: 0.6576
Epoch 30/100, Train Loss: 0.6770
Epoch 31/100, Train

테스트 파일 추론 중:   0%|          | 0/25 [00:00<?, ?it/s]

  scaler = GradScaler()
  with torch.cuda.amp.autocast():  # GPU에서 Mixed Precision Training


Epoch 1/100, Train Loss: 0.6997
Epoch 2/100, Train Loss: 0.7969
Epoch 3/100, Train Loss: 0.6848
Epoch 4/100, Train Loss: 0.6458
Epoch 5/100, Train Loss: 0.5735
Epoch 6/100, Train Loss: 0.6371
Epoch 7/100, Train Loss: 0.6100
Epoch 8/100, Train Loss: 0.6031
Epoch 9/100, Train Loss: 0.5529
Epoch 10/100, Train Loss: 0.5602
Epoch 11/100, Train Loss: 0.5663
Epoch 12/100, Train Loss: 0.5248
Epoch 13/100, Train Loss: 0.5595
Epoch 14/100, Train Loss: 0.5878
Epoch 15/100, Train Loss: 0.5522
Epoch 16/100, Train Loss: 0.5655
Epoch 17/100, Train Loss: 0.5615
Epoch 18/100, Train Loss: 0.5461
Epoch 19/100, Train Loss: 0.5509
Epoch 20/100, Train Loss: 0.5484
Epoch 21/100, Train Loss: 0.5568
Epoch 22/100, Train Loss: 0.5394
Epoch 23/100, Train Loss: 0.5568
Epoch 24/100, Train Loss: 0.5331
Epoch 25/100, Train Loss: 0.5522
Epoch 26/100, Train Loss: 0.5434
Epoch 27/100, Train Loss: 0.5250
Epoch 28/100, Train Loss: 0.5446
Epoch 29/100, Train Loss: 0.5453
Epoch 30/100, Train Loss: 0.5239
Epoch 31/100, Train

테스트 파일 추론 중:   0%|          | 0/25 [00:00<?, ?it/s]

  scaler = GradScaler()
  with torch.cuda.amp.autocast():  # GPU에서 Mixed Precision Training


Epoch 1/100, Train Loss: 1.0239
Epoch 2/100, Train Loss: 0.4877
Epoch 3/100, Train Loss: 0.5156
Epoch 4/100, Train Loss: 0.5013
Epoch 5/100, Train Loss: 0.3423
Epoch 6/100, Train Loss: 0.4015
Epoch 7/100, Train Loss: 0.3876
Epoch 8/100, Train Loss: 0.3391
Epoch 9/100, Train Loss: 0.3638
Epoch 10/100, Train Loss: 0.3024
Epoch 11/100, Train Loss: 0.3080
Epoch 12/100, Train Loss: 0.3229
Epoch 13/100, Train Loss: 0.3018
Epoch 14/100, Train Loss: 0.3154
Epoch 15/100, Train Loss: 0.3123
Epoch 16/100, Train Loss: 0.3108
Epoch 17/100, Train Loss: 0.3124
Epoch 18/100, Train Loss: 0.2999
Epoch 19/100, Train Loss: 0.3152
Epoch 20/100, Train Loss: 0.2932
Epoch 21/100, Train Loss: 0.3062
Epoch 22/100, Train Loss: 0.2751
Epoch 23/100, Train Loss: 0.2883
Epoch 24/100, Train Loss: 0.2919
Epoch 25/100, Train Loss: 0.3183
Epoch 26/100, Train Loss: 0.3109
Epoch 27/100, Train Loss: 0.2881
Epoch 28/100, Train Loss: 0.2956
Epoch 29/100, Train Loss: 0.2929
Epoch 30/100, Train Loss: 0.3038
Epoch 31/100, Train

테스트 파일 추론 중:   0%|          | 0/25 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data[numeric_columns] = filtered_data[numeric_columns].fillna(0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data[numeric_columns] = filtered_data[numeric_columns].interpolate(method='linear', limit_direction='both')
  scaler = GradScaler()
  with torch.cuda.amp.autocast():  # GPU에서 Mixed Precision Training


Epoch 1/100, Train Loss: 0.9420
Epoch 2/100, Train Loss: 0.6839
Epoch 3/100, Train Loss: 0.4462
Epoch 4/100, Train Loss: 0.4965
Epoch 5/100, Train Loss: 0.4028
Epoch 6/100, Train Loss: 0.3905
Epoch 7/100, Train Loss: 0.4279
Epoch 8/100, Train Loss: 0.4146
Epoch 9/100, Train Loss: 0.4314
Epoch 10/100, Train Loss: 0.3550
Epoch 11/100, Train Loss: 0.3824
Epoch 12/100, Train Loss: 0.3534
Epoch 13/100, Train Loss: 0.3787
Epoch 14/100, Train Loss: 0.3895
Epoch 15/100, Train Loss: 0.3642
Epoch 16/100, Train Loss: 0.3333
Epoch 17/100, Train Loss: 0.3641
Epoch 18/100, Train Loss: 0.3257
Epoch 19/100, Train Loss: 0.3702
Epoch 20/100, Train Loss: 0.3750
Epoch 21/100, Train Loss: 0.3562
Epoch 22/100, Train Loss: 0.3386
Epoch 23/100, Train Loss: 0.3684
Epoch 24/100, Train Loss: 0.3518
Epoch 25/100, Train Loss: 0.3529
Epoch 26/100, Train Loss: 0.3467
Epoch 27/100, Train Loss: 0.3419
Epoch 28/100, Train Loss: 0.3487
Epoch 29/100, Train Loss: 0.3717
Epoch 30/100, Train Loss: 0.3427
Epoch 31/100, Train

테스트 파일 추론 중:   0%|          | 0/25 [00:00<?, ?it/s]

In [65]:
sample_submission = pd.read_csv('./sample_submission.csv')

for 품목명, predictions in 품목별_predictions.items():
    sample_submission[품목명] = predictions

# 결과 저장
sample_submission.to_csv('./baseline_submission11.csv', index=False)

ValueError: Length of values (25) does not match length of index (75)