# Library Import & Settings

In [2]:
import pandas as pd
import numpy as np
import lightgbm
from tqdm import tqdm
import warnings

In [3]:
# 경고 끄기
pd.set_option('mode.chained_assignment', None)
warnings.filterwarnings(action='ignore')

# 전처리

## lag_feature 추가 및 기타 전처리

In [4]:
def preprocessing(temp_df, pum, len_lag) :
    # p_lag, q_lag 추가
    for lag in range(1,len_lag+1) :
      temp_df[f'p_lag_{lag}'] = -1
      temp_df[f'q_lag_{lag}'] = -1
      for index in range(lag, len(temp_df)) :
        temp_df.loc[index, f'p_lag_{lag}'] = temp_df[f'{pum}_가격(원/kg)'][index-lag] #1일전, 2일전, ... 가격을 feature로 추가
        temp_df.loc[index, f'q_lag_{lag}'] = temp_df[f'{pum}_거래량(kg)'][index-lag] #1일전, 2일전, ... 거래량을 feature로 추가

    # month 추가
    temp_df['date'] = pd.to_datetime(temp_df['date'])
    temp_df['month'] = temp_df['date'].dt.month

    # 예측 대상(1w,2w,4w) 추가
    for week in ['1_week','2_week','4_week'] :
      temp_df[week] = 0
      n_week = int(week[0])
      for index in range(len(temp_df)) :
        try : temp_df[week][index] = temp_df[f'{pum}_가격(원/kg)'][index+7*n_week]
        except : continue

    # 불필요한 column 제거        
    temp_df = temp_df.drop(['date',f'{pum}_거래량(kg)',f'{pum}_가격(원/kg)'], axis=1)
    
    return temp_df

In [5]:
train = pd.read_csv('C:/Users/User/Desktop/workspace/vsc/real_con/train.csv')
train.head(2)

Unnamed: 0.1,Unnamed: 0,date,배추_거래량(kg),배추_가격(원/kg),무_거래량(kg),무_가격(원/kg),양파_거래량(kg),양파_가격(원/kg),건고추_거래량(kg),건고추_가격(원/kg),...,청상추_거래량(kg),청상추_가격(원/kg),백다다기_거래량(kg),백다다기_가격(원/kg),애호박_거래량(kg),애호박_가격(원/kg),캠벨얼리_거래량(kg),캠벨얼리_가격(원/kg),샤인마스캇_거래량(kg),샤인마스캇_가격(원/kg)
0,0,2016-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0
1,1,2016-01-02,80860.0,329.430497,80272.0,359.977327,122787.5,1280.691438,3.0,11000.0,...,5125.0,9235,434.0,2109,19159.0,2414,880.0,2014,0.0,0


In [7]:
# preprocessing 함수 예시
pum = '배추'
temp_df = train[['date',f'{pum}_거래량(kg)', f'{pum}_가격(원/kg)']]
preprocessing(temp_df, pum, len_lag=28)

Unnamed: 0,p_lag_1,q_lag_1,p_lag_2,q_lag_2,p_lag_3,q_lag_3,p_lag_4,q_lag_4,p_lag_5,q_lag_5,...,p_lag_26,q_lag_26,p_lag_27,q_lag_27,p_lag_28,q_lag_28,month,1_week,2_week,4_week
0,-1.000000,-1.0,-1.000000,-1.0,-1.000000,-1.0,-1.000000,-1.0,-1.000000,-1.0,...,-1.000000,-1.00,-1.000000,-1.00,-1.000000,-1.00,1,388,474,596
1,0.000000,0.0,-1.000000,-1.0,-1.000000,-1.0,-1.000000,-1.0,-1.000000,-1.0,...,-1.000000,-1.00,-1.000000,-1.00,-1.000000,-1.00,1,398,510,674
2,329.430497,80860.0,0.000000,0.0,-1.000000,-1.0,-1.000000,-1.0,-1.000000,-1.0,...,-1.000000,-1.00,-1.000000,-1.00,-1.000000,-1.00,1,431,511,607
3,477.632718,1422742.5,329.430497,80860.0,0.000000,0.0,-1.000000,-1.0,-1.000000,-1.0,...,-1.000000,-1.00,-1.000000,-1.00,-1.000000,-1.00,1,428,572,601
4,441.666329,1167241.0,477.632718,1422742.5,329.430497,80860.0,0.000000,0.0,-1.000000,-1.0,...,-1.000000,-1.00,-1.000000,-1.00,-1.000000,-1.00,1,440,626,662
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1808,0.000000,0.0,1458.900037,94185.0,1144.799196,128020.6,1377.193210,1014318.2,928.394759,2104179.0,...,908.316988,574010.90,686.071116,1370607.40,643.760395,130467.40,9,0,0,0
1809,1464.405125,20216.0,0.000000,0.0,1458.900037,94185.0,1144.799196,128020.6,1377.193210,1014318.2,...,636.492440,1190878.35,908.316988,574010.90,686.071116,1370607.40,9,0,0,0
1810,793.828249,1981369.6,1464.405125,20216.0,0.000000,0.0,1458.900037,94185.0,1144.799196,128020.6,...,684.944474,1292200.30,636.492440,1190878.35,908.316988,574010.90,9,0,0,0
1811,695.632033,1608104.0,793.828249,1981369.6,1464.405125,20216.0,0.000000,0.0,1458.900037,94185.0,...,652.135704,1132008.20,684.944474,1292200.30,636.492440,1190878.35,9,0,0,0


# 학습

## metric 정의

In [13]:
def nmae(week_answer, week_submission):
    answer = week_answer
    target_idx = np.where(answer!=0)
    true = answer[target_idx]
    pred = week_submission[target_idx]
    score = np.mean(np.abs(true-pred)/true)
    
    return score


def at_nmae(pred, dataset):
    y_true = dataset.get_label()
    week_1_answer = y_true[0::3]
    week_2_answer = y_true[1::3]
    week_4_answer = y_true[2::3]
    
    week_1_submission = pred[0::3]
    week_2_submission = pred[1::3]
    week_4_submission = pred[2::3]
    
    score1 = nmae(week_1_answer, week_1_submission)
    score2 = nmae(week_2_answer, week_2_submission)
    score4 = nmae(week_4_answer, week_4_submission)
    
    score = (score1+score2+score4)/3
    
    return 'score', score, False

## 학습 정의

In [14]:
def model_train(x_train, y_train, x_valid, y_valid) :
    params = {'learning_rate': 0.01, 
              'max_depth': 6, 
              'boosting': 'gbdt', 
              'objective': 'regression',  
              'is_training_metric': True, 
              'num_leaves': 100, 
              'feature_fraction': 0.8, 
              'bagging_fraction': 0.8, 
              'bagging_freq': 5, 
              'seed':42,
              'num_threads':8
             }

    model = lightgbm.train(params, 
                   train_set = lightgbm.Dataset(data = x_train, label = y_train),
                   num_boost_round = 10000, 
                   valid_sets = lightgbm.Dataset(data = x_valid, label = y_valid), 
                   init_model = None, 
                   early_stopping_rounds = 100,
                   feval = at_nmae,
                   verbose_eval = False
                    )
    
    return model

## 품목 및 품종별 모델 학습

In [15]:
unique_pum = [
    '배추', '무', '양파', '건고추','마늘',
    '대파', '얼갈이배추', '양배추', '깻잎',
    '시금치', '미나리', '당근',
    '파프리카', '새송이', '팽이버섯', '토마토',
]

unique_kind = [
    '청상추', '백다다기', '애호박', '캠벨얼리', '샤인마스캇'
]

In [16]:
model_dict = {}
split = 28 #validation

for pum in tqdm(unique_pum + unique_kind):
    # 품목 품종별 전처리
    temp_df = train[['date',f'{pum}_거래량(kg)', f'{pum}_가격(원/kg)']]
    temp_df = preprocessing(temp_df, pum, len_lag=28)
    
    # 주차별(1,2,4w) 학습
    for week_num in [1,2,4] :
        x = temp_df[temp_df[f'{week_num}_week']>0].iloc[:,:-3]
        y = temp_df[temp_df[f'{week_num}_week']>0][f'{week_num}_week']
        
        #train, test split
        x_train = x[:-split]
        y_train = y[:-split]
        x_valid = x[-split:]
        y_valid = y[-split:]
        
        model_dict[f'{pum}_model_{week_num}'] = model_train(x_train, y_train, x_valid, y_valid)

  0%|          | 0/21 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1747, number of used features: 57
[LightGBM] [Info] Start training from score 681.585575
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1740, number of used features: 57
[LightGBM] [Info] Start training from score 682.610345
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1726, number of used features: 57
[LightGBM] [Info] Start training from score 682.988413

  5%|▍         | 1/21 [00:22<07:39, 22.96s/it]

No further splits with positive gain, best gain: -inf
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1747, number of used features: 57
[LightGBM] [Info] Start training from score 550.981110
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1740, number of used features: 57
[LightGBM] [Info] Start training from score 551.927011
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1726, number of used features: 57
[LightGBM] [Info] Start training from score 551.738702


 10%|▉         | 2/21 [00:45<07:07, 22.48s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1743, number of used features: 57
[LightGBM] [Info] Start training from score 806.615032
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1736, number of used features: 57
[LightGBM] [Info] Start training from score 805.148618
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1722, number of used features: 57
[LightGBM] [Info] Start training from score 801.836237


 14%|█▍        | 3/21 [01:06<06:37, 22.09s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1725, number of used features: 57
[LightGBM] [Info] Start training from score 12298.448696
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1718, number of used features: 57
[LightGBM] [Info] Start training from score 12325.603027
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1704, number of used features: 57
[LightGBM] [Info] Start training from score 12370.090962

 19%|█▉        | 4/21 [01:28<06:16, 22.15s/it]

No further splits with positive gain, best gain: -inf
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1734, number of used features: 57
[LightGBM] [Info] Start training from score 4211.787197
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1727, number of used features: 57
[LightGBM] [Info] Start training from score 4209.976259
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1714, number of used features: 57
[LightGBM] [Info] Start training from score 4202.728705

 24%|██▍       | 5/21 [01:51<05:57, 22.33s/it]

No further splits with positive gain, best gain: -inf
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1747, number of used features: 57
[LightGBM] [Info] Start training from score 1522.427590
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1740, number of used features: 57
[LightGBM] [Info] Start training from score 1523.166092
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1726, number of used features: 57
[LightGBM] [Info] Start training from score 1518.078795


 29%|██▊       | 6/21 [02:13<05:32, 22.17s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1739, number of used features: 57
[LightGBM] [Info] Start training from score 1067.225417
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1732, number of used features: 57
[LightGBM] [Info] Start training from score 1067.621247
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1718, number of used features: 57
[LightGBM] [Info] Start training from score 1064.989523

 33%|███▎      | 7/21 [02:36<05:15, 22.56s/it]

No further splits with positive gain, best gain: -inf
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1741, number of used features: 57
[LightGBM] [Info] Start training from score 611.762780
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1734, number of used features: 57
[LightGBM] [Info] Start training from score 613.000577

 38%|███▊      | 8/21 [02:59<04:52, 22.52s/it]

Auto-choosing col-wise multi-threading, the overhead of testing was 0.001138 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1720, number of used features: 57
[LightGBM] [Info] Start training from score 613.593605
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1746, number of used features: 57
[LightGBM] [Info] Start training from score 5418.721077
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1739, number of used features: 57
[LightGBM] [Info] Start training from score 5419.351351
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1725, number of used features: 57
[LightG

 43%|████▎     | 9/21 [03:25<04:44, 23.73s/it]

No further splits with positive gain, best gain: -inf
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1743, number of used features: 57
[LightGBM] [Info] Start training from score 2833.306368
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1736, number of used features: 57
[LightGBM] [Info] Start training from score 2836.333525
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1722, number of used features: 57
[LightGBM] [Info] Start training from score 2835.843206

 48%|████▊     | 10/21 [03:49<04:20, 23.65s/it]

No further splits with positive gain, best gain: -inf
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1739, number of used features: 57
[LightGBM] [Info] Start training from score 2605.261070
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1732, number of used features: 57
[LightGBM] [Info] Start training from score 2604.017321
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1718, number of used features: 57
[LightGBM] [Info] Start training from score 2597.021537

 52%|█████▏    | 11/21 [04:12<03:54, 23.49s/it]

No further splits with positive gain, best gain: -inf
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1737, number of used features: 57
[LightGBM] [Info] Start training from score 1059.175014
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1730, number of used features: 57
[LightGBM] [Info] Start training from score 1060.520231
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1716, number of used features: 57
[LightGBM] [Info] Start training from score 1062.254662


 57%|█████▋    | 12/21 [04:34<03:28, 23.13s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1733, number of used features: 57
[LightGBM] [Info] Start training from score 3529.315638
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1726, number of used features: 57
[LightGBM] [Info] Start training from score 3513.271727
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1713, number of used features: 57
[LightGBM] [Info] Start training from score 3472.169294


 62%|██████▏   | 13/21 [04:56<03:01, 22.69s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1741, number of used features: 57
[LightGBM] [Info] Start training from score 2217.751867
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1734, number of used features: 57
[LightGBM] [Info] Start training from score 2217.119954
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1720, number of used features: 57
[LightGBM] [Info] Start training from score 2211.694767


 67%|██████▋   | 14/21 [05:21<02:44, 23.57s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1733, number of used features: 57
[LightGBM] [Info] Start training from score 1680.275245
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1726, number of used features: 57
[LightGBM] [Info] Start training from score 1681.683662
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1713, number of used features: 57
[LightGBM] [Info] Start training from score 1682.434910


 71%|███████▏  | 15/21 [05:44<02:20, 23.36s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1742, number of used features: 57
[LightGBM] [Info] Start training from score 2210.867394
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1735, number of used features: 57
[LightGBM] [Info] Start training from score 2210.393084
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1722, number of used features: 57
[LightGBM] [Info] Start training from score 2206.431475

 76%|███████▌  | 16/21 [06:06<01:54, 22.88s/it]

No further splits with positive gain, best gain: -inf
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1742, number of used features: 57
[LightGBM] [Info] Start training from score 3244.963261
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1735, number of used features: 57
[LightGBM] [Info] Start training from score 3245.839193
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1721, number of used features: 57
[LightGBM] [Info] Start training from score 3238.439861

 81%|████████  | 17/21 [06:31<01:33, 23.40s/it]

No further splits with positive gain, best gain: -inf
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1735, number of used features: 57
[LightGBM] [Info] Start training from score 1864.831124
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1728, number of used features: 57
[LightGBM] [Info] Start training from score 1863.685764
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1714, number of used features: 57
[LightGBM] [Info] Start training from score 1856.404317

 86%|████████▌ | 18/21 [06:53<01:09, 23.17s/it]

No further splits with positive gain, best gain: -inf
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1742, number of used features: 57
[LightGBM] [Info] Start training from score 1882.851894
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1735, number of used features: 57
[LightGBM] [Info] Start training from score 1879.773487
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1721, number of used features: 57
[LightGBM] [Info] Start training from score 1866.832655

 90%|█████████ | 19/21 [07:17<00:46, 23.38s/it]

No further splits with positive gain, best gain: -inf
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1651, number of used features: 57
[LightGBM] [Info] Start training from score 4097.811629
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1644, number of used features: 57
[LightGBM] [Info] Start training from score 4101.366180
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1631, number of used features: 57
[LightGBM] [Info] 

 95%|█████████▌| 20/21 [07:39<00:22, 22.84s/it]

Start training from score 4111.586143
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14282
[LightGBM] [Info] Number of data points in the train set: 1152, number of used features: 57
[LightGBM] [Info] Start training from score 13395.397569
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14275
[LightGBM] [Info] Number of data points in the train set: 1152, number of used features: 57
[LightGBM] [Info] Start training from score 13395.397569
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14266
[LightGBM] [Info] Number of data points in the train set: 1152, number of used features: 57
[LightGBM] [Info] Start training from score 13395.397569


100%|██████████| 21/21 [08:01<00:00, 22.92s/it]






# 추론

In [18]:
submission = pd.read_csv('C:/Users/User/Desktop/workspace/vsc/real_con/sample_submission.csv')
public_date_list = submission[submission['예측대상일자'].str.contains('2021')]['예측대상일자'].str.split('+').str[0].unique()
# ['2020-09-29', ...]

for date in tqdm(public_date_list) :
    test = pd.read_csv(f'C:/Users/User/Desktop/workspace/vsc/real_con/test_files/test_{date}.csv')
    for pum in unique_pum + unique_kind:
        # 예측기준일에 대해 전처리
        temp_test = pd.DataFrame([{'date' : date}]) #예측기준일
        alldata = pd.concat([train, test, temp_test], sort=False).reset_index(drop=True)
        alldata = alldata[['date', f'{pum}_거래량(kg)', f'{pum}_가격(원/kg)']].fillna(0)
        alldata = alldata.iloc[-28:].reset_index(drop=True)
        alldata = preprocessing(alldata, pum, len_lag=28)
        temp_test = alldata.iloc[-1].astype(float)[:-3]
        
        # 개별 모델을 활용하여 1,2,4주 후 가격 예측
        for week_num in [1,2,4] :
            temp_model = model_dict[f'{pum}_model_{week_num}']
            result = temp_model.predict(temp_test)
            condition = (submission['예측대상일자']==f'{date}+{week_num}week')
            idx = submission[condition].index
            submission.loc[idx, f'{pum}_가격(원/kg)'] = result[0]

100%|██████████| 38/38 [02:39<00:00,  4.20s/it]


In [19]:
submission.to_csv('C:/Users/User/Desktop/workspace/vsc/real_con/LGBM_baseline.csv',index=False)