In [45]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc, rcParams
import seaborn as sns
import glob
from sklearn.model_selection import train_test_split, StratifiedKFold ,KFold
# import lightgbm
from lightgbm import LGBMRegressor
import datetime

import warnings
warnings.filterwarnings('ignore')

plt.style.use('ggplot')
pd.set_option('max_columns', 100)
pd.set_option("display.precision", 4)
rcParams['figure.figsize'] = (16, 8)
rc('font', family='AppleGothic')

# training data load

In [136]:
train_path = './trainingdata'
infer_path = './inferencedata'
pil_path = './magok'

In [185]:
# 20201225_105925
# 20201225_135053
# 20201226_003554
# 20201226_172333
# 20210101_192508
time_str = '20210101_192508'

train = pd.read_csv(
    os.path.join(train_path, f'training_{time_str}.csv')
)
X_test = pd.read_csv(
    os.path.join(infer_path, f'inference_{time_str}.csv')
)

# 필령형 path

In [189]:
# 필령이형 path
train = pd.read_csv(
    os.path.join(pil_path, 'train_pilv2.csv')
)
X_test = pd.read_csv(
    os.path.join(pil_path, 'test_pilv2.csv')
)

In [179]:
train['Hour'] = train['Hour'].astype('category')
X_test['Hour'] = X_test['Hour'].astype('category')

In [180]:
X_train = train.drop(columns = ['1day_after_target', '2day_after_target'])
target_1 = train['1day_after_target']
target_2 = train['2day_after_target']

# train test split

In [5]:
# 검증셋을 한번만 돌렸음
X_train_1, X_valid_1, Y_train_1, Y_valid_1 = train_test_split(train.iloc[:, :-2], train.iloc[:, -2], test_size=0.3, random_state=0)
X_train_2, X_valid_2, Y_train_2, Y_valid_2 = train_test_split(train.iloc[:, :-2], train.iloc[:, -1], test_size=0.3, random_state=0)

# training code

In [187]:
quantiles = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

In [188]:
X_train = train.drop(columns = ['Target1', 'Target2'])
target_1 = train['Target1']
target_2 = train['Target2']

In [126]:
def LGBM_reg(X_train,y_train):
    quantiles = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
    
    # 랜덤모수 샘플 추론이 왜 안되지;;;
    # fold = StratifiedKFold(n_splits=5, shuffle = True)
    
    fold = KFold(n_splits=5, shuffle = True)

    oof_preds = np.zeros([X_train.shape[0], len(quantiles)])

    feature_importance_df = pd.DataFrame(np.zeros([len(X_train.columns), len(quantiles)]))
    feature_importance_df.columns = quantiles
    
    fold_metric = []
    loss_ls = []
    # training
    target = y_train
    for fold_, (train_idx, valid_idx) in enumerate(
        fold.split(np.array(X_train), target)
    ):

        train_x, train_y = X_train.iloc[train_idx], target.iloc[train_idx]
        valid_x, valid_y = X_train.iloc[valid_idx], target.iloc[valid_idx]

        oof_df = pd.DataFrame()
        for idx, q in enumerate(quantiles):
            print(f'\nquantile: {q}\n')
            model = LGBMRegressor(objective='quantile', 
                                  alpha=q,
                                  n_estimators=10000, 
                                  bagging_fraction=0.7, 
                                  learning_rate=0.027, 
                                  subsample=0.7
                                 )                   

            model.fit(train_x, 
                      train_y,
                      eval_metric = ['quantile'], 
                      eval_set=[(valid_x, valid_y)], 
                      early_stopping_rounds=300, 
                      verbose=1500
                     )

            oof_preds[[valid_idx],idx] = \
                model.predict(valid_x).round(2)

            oof_df[q] = model.predict(valid_x).round(2)
            # feature importance
            feature_importance_df[q] = model.feature_importances_ / 5
            
            
        metric = pinball(oof_df, valid_y)
        loss = np.mean(metric)
        print(f'\n FOLD {fold_}의 total pinball loss: {loss}\n')
        fold_metric.append(metric)
        loss_ls.append(loss)
        
        
        print('\n===================================================')
        print(f'FOLD {fold_} Success')
        print('===================================================\n')
        
    feature_importance_df.index = X_train.columns.tolist()
    
    return oof_preds, fold_metric, loss_ls, feature_importance_df

# Metric
- pinball loss

In [127]:
def pinball(pred, actual):
    quantile_col = pred.columns.tolist()
    pred2 = pred.copy()
    actual2 = actual.copy()
    
    pred2.index = range(len(pred2))
    actual2.index = range(len(actual2))

    pinball_loss = []
    for item in quantile_col:
        pre_pinball = []
        for idx in range(len(pred2)):
            y = actual2.iloc[idx]
            z = pred2[item].iloc[idx]
            q = item
            
            if y >= z:
                value = (y - z)*q
                pre_pinball.append(value)
            else:
                value = (z - y)*(1 - q)
                pre_pinball.append(value)
        loss_mean = np.mean(pre_pinball)
        pinball_loss.append(loss_mean)
    
    fin = np.mean(pinball_loss)
    print(f'Pinball Loss: {fin}')
    
    return pinball_loss         

# Training

### 제출용_20201225_134528.csv CV결과

In [None]:
oof_preds, fold_metric, loss_ls,feature_importance_df = LGBM_reg(X_train,target_1)

In [None]:
oof_preds2, fold_metric2, loss_ls2, feature_importance_df2 = LGBM_reg(X_train,target_2)

In [90]:
feature_importance_df2

Unnamed: 0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
Hour,209.8,616.8,329.8,2296.0,5155.2,2367.4,1625.0,489.0,595.0
TARGET_lag_48,242.6,739.4,662.2,3427.2,7053.8,3451.6,2347.0,768.8,1005.2
DHI_lag_48,204.8,562.4,568.0,2686.2,5567.0,2830.6,2006.6,662.6,905.4
DNI_lag_48,196.2,514.6,472.8,2348.8,4919.6,2427.0,1759.0,610.2,886.2
WS_lag_48,290.2,892.0,694.2,3889.2,8391.4,4039.8,2486.6,535.8,719.2
RH_lag_48,334.0,1190.6,831.2,5139.8,10662.2,4876.8,3004.6,581.0,907.2
T_lag_48,298.4,878.2,737.8,3822.8,7732.8,3640.8,2419.2,666.6,777.8


In [89]:
day7 = np.mean(loss_ls)
day8 = np.mean(loss_ls2)
final = np.mean([day7, day8])
print(f'day7 pinball loss: {day7}')
print(f'day8 pinball loss: {day8}')
print(f'Total pinball loss: {final}')

day7 pinball loss: 1.9162037225315487
day8 pinball loss: 2.0084542828882768
Total pinball loss: 1.9623290027099127


### 20201225_135053 Mimute CV결과 -> CV 성능이 약간 감소했는데 테스트로 한번 제출
- 흠... CV <-> LB 성능차이가 있네

In [99]:
oof_preds, fold_metric, loss_ls,feature_importance_df = LGBM_reg(X_train,target_1)


quantile: 0.1

Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[402]	valid_0's quantile: 1.3581

quantile: 0.2

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.11866
Early stopping, best iteration is:
[1399]	valid_0's quantile: 2.11855

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.47121
[3000]	valid_0's quantile: 2.45323
Early stopping, best iteration is:
[3829]	valid_0's quantile: 2.44655

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.5671
[3000]	valid_0's quantile: 2.55608
Early stopping, best iteration is:
[2806]	valid_0's quantile: 2.55576

quantile: 0.5

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.49144
[3000]	valid_0's quantile: 2.48017
[4500]	valid_0's quantile: 2.47114
[6000]	valid_0's quantile: 2.46689
Early stopping, be

[3000]	valid_0's quantile: 1.39228
Early stopping, best iteration is:
[2787]	valid_0's quantile: 1.39161

quantile: 0.9

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 0.773831
Early stopping, best iteration is:
[1563]	valid_0's quantile: 0.773683
Pinball Loss: 1.9532725995034808

 FOLD 3의 total pinball loss: 1.9532725995034808


FOLD 3 Success


quantile: 0.1

Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[317]	valid_0's quantile: 1.35858

quantile: 0.2

Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[618]	valid_0's quantile: 2.1405

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.52054
Early stopping, best iteration is:
[2312]	valid_0's quantile: 2.51875

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.65716
[3000]	valid_0's quan

In [100]:
oof_preds2, fold_metric2, loss_ls2, feature_importance_df2 = LGBM_reg(X_train,target_2)


quantile: 0.1

Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[481]	valid_0's quantile: 1.44178

quantile: 0.2

Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[462]	valid_0's quantile: 2.32703

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.72213
[3000]	valid_0's quantile: 2.6984
[4500]	valid_0's quantile: 2.69341
[6000]	valid_0's quantile: 2.68698
Early stopping, best iteration is:
[5793]	valid_0's quantile: 2.6868

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.82675
[3000]	valid_0's quantile: 2.80456
Early stopping, best iteration is:
[3277]	valid_0's quantile: 2.80189

quantile: 0.5

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.73056
Early stopping, best iteration is:
[2592]	valid_0's quantile: 2.71635

quantile: 0.6

Tra

Early stopping, best iteration is:
[478]	valid_0's quantile: 1.38174

quantile: 0.2

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.20188
Early stopping, best iteration is:
[1261]	valid_0's quantile: 2.20111

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.61012
Early stopping, best iteration is:
[1904]	valid_0's quantile: 2.60662

quantile: 0.4

Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[486]	valid_0's quantile: 2.74421

quantile: 0.5

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.63303
[3000]	valid_0's quantile: 2.61483
Early stopping, best iteration is:
[3183]	valid_0's quantile: 2.61384

quantile: 0.6

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.35263
Early stopping, best iteration is:
[2550]	valid_0's quantile: 2.33794

quantile: 0.7

T

In [101]:
day7 = np.mean(loss_ls)
day8 = np.mean(loss_ls2)
final = np.mean([day7, day8])
print(f'day7 pinball loss: {day7}')
print(f'day8 pinball loss: {day8}')
print(f'Total pinball loss: {final}')

day7 pinball loss: 1.9196264977231923
day8 pinball loss: 2.0115831732839857
Total pinball loss: 1.9656048355035889


### 20201226_003554 CV결과

In [117]:
oof_preds, fold_metric, loss_ls,feature_importance_df = LGBM_reg(X_train,target_1)


quantile: 0.1

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 1.33828
Early stopping, best iteration is:
[1616]	valid_0's quantile: 1.33748

quantile: 0.2

Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[945]	valid_0's quantile: 2.07987

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.46726
[3000]	valid_0's quantile: 2.44014
[4500]	valid_0's quantile: 2.42973
[6000]	valid_0's quantile: 2.42348
Early stopping, best iteration is:
[6689]	valid_0's quantile: 2.42047

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.57508
[3000]	valid_0's quantile: 2.54438
[4500]	valid_0's quantile: 2.53351
[6000]	valid_0's quantile: 2.52242
Early stopping, best iteration is:
[6564]	valid_0's quantile: 2.52149

quantile: 0.5

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's

Early stopping, best iteration is:
[2501]	valid_0's quantile: 1.39127

quantile: 0.9

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 0.765112
Early stopping, best iteration is:
[1809]	valid_0's quantile: 0.764857
Pinball Loss: 1.9465175726748452

 FOLD 3의 total pinball loss: 1.9465175726748452


FOLD 3 Success


quantile: 0.1

Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[386]	valid_0's quantile: 1.32331

quantile: 0.2

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.06277
Early stopping, best iteration is:
[1708]	valid_0's quantile: 2.06021

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.46893
[3000]	valid_0's quantile: 2.44974
[4500]	valid_0's quantile: 2.44221
Early stopping, best iteration is:
[5625]	valid_0's quantile: 2.43923

quantile: 0.4

Training until validation scores don't improve

In [118]:
oof_preds2, fold_metric2, loss_ls2, feature_importance_df2 = LGBM_reg(X_train,target_2)


quantile: 0.1

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 1.35595
Early stopping, best iteration is:
[2201]	valid_0's quantile: 1.35595

quantile: 0.2

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.16217
[3000]	valid_0's quantile: 2.15202
[4500]	valid_0's quantile: 2.14941
[6000]	valid_0's quantile: 2.14426
[7500]	valid_0's quantile: 2.13943
Early stopping, best iteration is:
[7660]	valid_0's quantile: 2.13667

quantile: 0.3

Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[900]	valid_0's quantile: 2.57259

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.70709
[3000]	valid_0's quantile: 2.67943
[4500]	valid_0's quantile: 2.66563
[6000]	valid_0's quantile: 2.66113
[7500]	valid_0's quantile: 2.654
[9000]	valid_0's quantile: 2.64999
Did not meet early stopping. Best iteration is:
[9996]	valid_

Pinball Loss: 2.0107073624288283

 FOLD 3의 total pinball loss: 2.0107073624288283


FOLD 3 Success


quantile: 0.1

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 1.39833
Early stopping, best iteration is:
[1439]	valid_0's quantile: 1.39798

quantile: 0.2

Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[806]	valid_0's quantile: 2.24318

quantile: 0.3

Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[1085]	valid_0's quantile: 2.61771

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.75353
[3000]	valid_0's quantile: 2.73816
[4500]	valid_0's quantile: 2.72227
[6000]	valid_0's quantile: 2.71285
Early stopping, best iteration is:
[5896]	valid_0's quantile: 2.71191

quantile: 0.5

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.66642
Early stopping, best 

In [119]:
day7 = np.mean(loss_ls)
day8 = np.mean(loss_ls2)
final = np.mean([day7, day8])
print(f'day7 pinball loss: {day7}')
print(f'day8 pinball loss: {day8}')
print(f'Total pinball loss: {final}')

day7 pinball loss: 1.9147686529806862
day8 pinball loss: 2.0108222525916575
Total pinball loss: 1.962795452786172


### D_I statistical / dew point 변수 CV

In [128]:
oof_preds, fold_metric, loss_ls,feature_importance_df = LGBM_reg(X_train,target_1)


quantile: 0.1

Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[569]	valid_0's quantile: 1.37627

quantile: 0.2

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.13972
[3000]	valid_0's quantile: 2.12952
Early stopping, best iteration is:
[3906]	valid_0's quantile: 2.12495

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.48642
[3000]	valid_0's quantile: 2.46919
[4500]	valid_0's quantile: 2.45564
[6000]	valid_0's quantile: 2.44838
Early stopping, best iteration is:
[6313]	valid_0's quantile: 2.44785

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.59228
[3000]	valid_0's quantile: 2.56893
Early stopping, best iteration is:
[3181]	valid_0's quantile: 2.56535

quantile: 0.5

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.51318
[3000]	valid_0's

[1500]	valid_0's quantile: 2.17971
[3000]	valid_0's quantile: 2.15196
[4500]	valid_0's quantile: 2.13923
[6000]	valid_0's quantile: 2.13399
Early stopping, best iteration is:
[6463]	valid_0's quantile: 2.12975

quantile: 0.7

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 1.80459
Early stopping, best iteration is:
[1897]	valid_0's quantile: 1.80066

quantile: 0.8

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 1.3201
[3000]	valid_0's quantile: 1.31354
Early stopping, best iteration is:
[3489]	valid_0's quantile: 1.31231

quantile: 0.9

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 0.731099
Early stopping, best iteration is:
[2631]	valid_0's quantile: 0.728926
Pinball Loss: 1.8319920722170855

 FOLD 3의 total pinball loss: 1.8319920722170855


FOLD 3 Success


quantile: 0.1

Training until validation scores don't improve for 300 rounds
Early stopping, best iterat

In [129]:
oof_preds2, fold_metric2, loss_ls2, feature_importance_df2 = LGBM_reg(X_train,target_2)


quantile: 0.1

Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[193]	valid_0's quantile: 1.38686

quantile: 0.2

Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[839]	valid_0's quantile: 2.18673

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.57928
[3000]	valid_0's quantile: 2.54979
[4500]	valid_0's quantile: 2.54199
[6000]	valid_0's quantile: 2.53628
Early stopping, best iteration is:
[6699]	valid_0's quantile: 2.53383

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.70025
[3000]	valid_0's quantile: 2.65937
[4500]	valid_0's quantile: 2.64378
[6000]	valid_0's quantile: 2.63294
[7500]	valid_0's quantile: 2.62919
[9000]	valid_0's quantile: 2.62583
Did not meet early stopping. Best iteration is:
[10000]	valid_0's quantile: 2.6227

quantile: 0.5

Training until validation score

Early stopping, best iteration is:
[431]	valid_0's quantile: 1.41525

quantile: 0.2

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.22009
[3000]	valid_0's quantile: 2.21055
[4500]	valid_0's quantile: 2.1992
[6000]	valid_0's quantile: 2.19345
[7500]	valid_0's quantile: 2.18658
Early stopping, best iteration is:
[7862]	valid_0's quantile: 2.18477

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.60294
[3000]	valid_0's quantile: 2.57257
Early stopping, best iteration is:
[3468]	valid_0's quantile: 2.56724

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.72502
[3000]	valid_0's quantile: 2.70489
[4500]	valid_0's quantile: 2.69429
[6000]	valid_0's quantile: 2.68445
[7500]	valid_0's quantile: 2.6797
[9000]	valid_0's quantile: 2.67599
Did not meet early stopping. Best iteration is:
[10000]	valid_0's quantile: 2.67413

quantile: 0.5

Trai

In [130]:
day7 = np.mean(loss_ls)
day8 = np.mean(loss_ls2)
final = np.mean([day7, day8])
print(f'day7 pinball loss: {day7}')
print(f'day8 pinball loss: {day8}')
print(f'Total pinball loss: {final}')

day7 pinball loss: 1.8913721912874997
day8 pinball loss: 1.9855307619452247
Total pinball loss: 1.938451476616362


- 성능이 약간 개선되었는데 공선성 문제 때문에 변수좀 소거해보자

### 변수중요도에 관련없는 변수 소거


In [132]:
oof_preds, fold_metric, loss_ls,feature_importance_df = LGBM_reg(
    X_train.drop(columns = ['D_I_mean','D_I_skew','D_I_kurt','D_I_median']),
    target_1
    )


quantile: 0.1

Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[436]	valid_0's quantile: 1.35699

quantile: 0.2

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.08405
[3000]	valid_0's quantile: 2.07108
[4500]	valid_0's quantile: 2.06464
Early stopping, best iteration is:
[5092]	valid_0's quantile: 2.06332

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.47607
[3000]	valid_0's quantile: 2.43675
[4500]	valid_0's quantile: 2.43063
Early stopping, best iteration is:
[4207]	valid_0's quantile: 2.43034

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.55298
[3000]	valid_0's quantile: 2.52857
[4500]	valid_0's quantile: 2.50445
[6000]	valid_0's quantile: 2.4981
[7500]	valid_0's quantile: 2.49137
Early stopping, best iteration is:
[7559]	valid_0's quantile: 2.49103

quantile: 0.5

Training 


quantile: 0.7

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 1.85437
[3000]	valid_0's quantile: 1.84264
Early stopping, best iteration is:
[3443]	valid_0's quantile: 1.84069

quantile: 0.8

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 1.35152
[3000]	valid_0's quantile: 1.34513
Early stopping, best iteration is:
[3287]	valid_0's quantile: 1.34428

quantile: 0.9

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 0.746045
Early stopping, best iteration is:
[2280]	valid_0's quantile: 0.744663
Pinball Loss: 1.9126598846955822

 FOLD 3의 total pinball loss: 1.9126598846955822


FOLD 3 Success


quantile: 0.1

Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[447]	valid_0's quantile: 1.35731

quantile: 0.2

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.11369
Early stopping, bes

In [133]:
oof_preds2, fold_metric2, loss_ls2, feature_importance_df2 = LGBM_reg(
    X_train.drop(columns = ['D_I_mean','D_I_skew','D_I_kurt','D_I_median']),
    target_2
    )


quantile: 0.1

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 1.37047
Early stopping, best iteration is:
[1465]	valid_0's quantile: 1.37047

quantile: 0.2

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.14003
Early stopping, best iteration is:
[2353]	valid_0's quantile: 2.12652

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.47442
[3000]	valid_0's quantile: 2.45122
Early stopping, best iteration is:
[3024]	valid_0's quantile: 2.45082

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.61601
[3000]	valid_0's quantile: 2.58555
[4500]	valid_0's quantile: 2.57415
Early stopping, best iteration is:
[4553]	valid_0's quantile: 2.57371

quantile: 0.5

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.52161
[3000]	valid_0's quantile: 2.4854
[4500]	valid_0's

[1500]	valid_0's quantile: 2.03954
[3000]	valid_0's quantile: 2.03404
Early stopping, best iteration is:
[4149]	valid_0's quantile: 2.03043

quantile: 0.8

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 1.47792
Early stopping, best iteration is:
[1793]	valid_0's quantile: 1.47616

quantile: 0.9

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 0.820268
Early stopping, best iteration is:
[1423]	valid_0's quantile: 0.819815
Pinball Loss: 2.064666155301017

 FOLD 3의 total pinball loss: 2.064666155301017


FOLD 3 Success


quantile: 0.1

Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[688]	valid_0's quantile: 1.38893

quantile: 0.2

Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[763]	valid_0's quantile: 2.2311

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quanti

In [134]:
day7 = np.mean(loss_ls)
day8 = np.mean(loss_ls2)
final = np.mean([day7, day8])
print(f'day7 pinball loss: {day7}')
print(f'day8 pinball loss: {day8}')
print(f'Total pinball loss: {final}')

day7 pinball loss: 1.8929697770060074
day8 pinball loss: 1.980082161656093
Total pinball loss: 1.9365259693310501


### 필령이형 데이터 train

In [164]:
oof_preds, fold_metric, loss_ls,feature_importance_df = LGBM_reg(
    X_train,
    target_1
    )


quantile: 0.1

Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[512]	valid_0's quantile: 1.32656

quantile: 0.2

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.07848
Early stopping, best iteration is:
[2015]	valid_0's quantile: 2.06851

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.44342
Early stopping, best iteration is:
[2346]	valid_0's quantile: 2.4343

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.57115
[3000]	valid_0's quantile: 2.55498
[4500]	valid_0's quantile: 2.54708
[6000]	valid_0's quantile: 2.54231
Early stopping, best iteration is:
[6239]	valid_0's quantile: 2.54137

quantile: 0.5

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.50013
Early stopping, best iteration is:
[2599]	valid_0's quantile: 2.48387

quantile: 0.6

T

Early stopping, best iteration is:
[240]	valid_0's quantile: 1.41379

quantile: 0.2

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.19613
Early stopping, best iteration is:
[2608]	valid_0's quantile: 2.18823

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.56854
[3000]	valid_0's quantile: 2.55082
Early stopping, best iteration is:
[3510]	valid_0's quantile: 2.54543

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.68466
[3000]	valid_0's quantile: 2.66832
[4500]	valid_0's quantile: 2.65609
[6000]	valid_0's quantile: 2.64785
[7500]	valid_0's quantile: 2.64215
[9000]	valid_0's quantile: 2.63993
Did not meet early stopping. Best iteration is:
[10000]	valid_0's quantile: 2.63748

quantile: 0.5

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.57503
[3000]	valid_0's quantile: 2.55878
[4500]	val

In [165]:
oof_preds2, fold_metric2, loss_ls2, feature_importance_df2 = LGBM_reg(
    X_train,
    target_2
    )


quantile: 0.1

Training until validation scores don't improve for 300 rounds
Early stopping, best iteration is:
[912]	valid_0's quantile: 1.38343

quantile: 0.2

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.20525
Early stopping, best iteration is:
[1219]	valid_0's quantile: 2.20494

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.60371
[3000]	valid_0's quantile: 2.58694
[4500]	valid_0's quantile: 2.57484
[6000]	valid_0's quantile: 2.56775
Early stopping, best iteration is:
[5786]	valid_0's quantile: 2.56737

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.73294
Early stopping, best iteration is:
[2013]	valid_0's quantile: 2.7111

quantile: 0.5

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.63204
[3000]	valid_0's quantile: 2.59792
[4500]	valid_0's quantile: 2.58848
Early stopping, b

[1500]	valid_0's quantile: 2.26695
Early stopping, best iteration is:
[1658]	valid_0's quantile: 2.26572

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.70855
[3000]	valid_0's quantile: 2.69148
Early stopping, best iteration is:
[2709]	valid_0's quantile: 2.69046

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.82063
[3000]	valid_0's quantile: 2.78172
Early stopping, best iteration is:
[3678]	valid_0's quantile: 2.77403

quantile: 0.5

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.73367
[3000]	valid_0's quantile: 2.68887
[4500]	valid_0's quantile: 2.67891
[6000]	valid_0's quantile: 2.67157
[7500]	valid_0's quantile: 2.66628
Early stopping, best iteration is:
[8217]	valid_0's quantile: 2.66362

quantile: 0.6

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.44684
[3000]	valid_0's quanti

In [171]:
day7 = np.mean(loss_ls)
day8 = np.mean(loss_ls2)
final = np.mean([day7, day8])
print(f'day7 pinball loss: {day7}')
print(f'day8 pinball loss: {day8}')
print(f'Total pinball loss: {final}')

day7 pinball loss: 1.9219965259494232
day8 pinball loss: 2.0151745116918103
Total pinball loss: 1.9685855188206167


# 필령쓰 ver2

In [172]:
oof_preds, fold_metric, loss_ls,feature_importance_df = LGBM_reg(
    X_train,
    target_1
    )


quantile: 0.1

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 1.29096
[3000]	valid_0's quantile: 1.27885
Early stopping, best iteration is:
[3686]	valid_0's quantile: 1.27547

quantile: 0.2

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 1.97847
[3000]	valid_0's quantile: 1.94365
Early stopping, best iteration is:
[4184]	valid_0's quantile: 1.92911

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.34478
[3000]	valid_0's quantile: 2.29744
[4500]	valid_0's quantile: 2.27002
[6000]	valid_0's quantile: 2.25273
[7500]	valid_0's quantile: 2.22996
[9000]	valid_0's quantile: 2.22425
Did not meet early stopping. Best iteration is:
[9961]	valid_0's quantile: 2.22225

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.53493
[3000]	valid_0's quantile: 2.43417
[4500]	valid_0's quantile: 2.39916
[6000]	val


quantile: 0.9

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 0.749201
[3000]	valid_0's quantile: 0.746923
Early stopping, best iteration is:
[3209]	valid_0's quantile: 0.746612
Pinball Loss: 1.780496978558774

 FOLD 2의 total pinball loss: 1.780496978558774


FOLD 2 Success


quantile: 0.1

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 1.34006
Early stopping, best iteration is:
[1306]	valid_0's quantile: 1.33899

quantile: 0.2

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.07516
[3000]	valid_0's quantile: 2.03136
Early stopping, best iteration is:
[3244]	valid_0's quantile: 2.02639

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.405
[3000]	valid_0's quantile: 2.36088
[4500]	valid_0's quantile: 2.32427
[6000]	valid_0's quantile: 2.30347
[7500]	valid_0's quantile: 2.29963
Early stopping, best iteratio

In [173]:
oof_preds2, fold_metric2, loss_ls2, feature_importance_df2 = LGBM_reg(
    X_train,
    target_2
    )


quantile: 0.1

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 1.36991
[3000]	valid_0's quantile: 1.36246
Early stopping, best iteration is:
[3149]	valid_0's quantile: 1.36027

quantile: 0.2

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.14757
[3000]	valid_0's quantile: 2.11448
Early stopping, best iteration is:
[3007]	valid_0's quantile: 2.11432

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.52525
[3000]	valid_0's quantile: 2.4552
[4500]	valid_0's quantile: 2.42877
[6000]	valid_0's quantile: 2.41938
[7500]	valid_0's quantile: 2.40702
[9000]	valid_0's quantile: 2.3998
Did not meet early stopping. Best iteration is:
[9943]	valid_0's quantile: 2.39724

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.63453
[3000]	valid_0's quantile: 2.56361
[4500]	valid_0's quantile: 2.52245
[6000]	valid

Early stopping, best iteration is:
[6444]	valid_0's quantile: 1.35024

quantile: 0.9

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 0.77109
[3000]	valid_0's quantile: 0.768761
Early stopping, best iteration is:
[3210]	valid_0's quantile: 0.768489
Pinball Loss: 1.841574607967453

 FOLD 2의 total pinball loss: 1.841574607967453


FOLD 2 Success


quantile: 0.1

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 1.33971
Early stopping, best iteration is:
[2501]	valid_0's quantile: 1.33114

quantile: 0.2

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.08797
[3000]	valid_0's quantile: 2.04496
[4500]	valid_0's quantile: 2.02759
Early stopping, best iteration is:
[4792]	valid_0's quantile: 2.02647

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.46631
[3000]	valid_0's quantile: 2.38274
[4500]	valid_0's quantile: 2

In [174]:
day7 = np.mean(loss_ls)
day8 = np.mean(loss_ls2)
final = np.mean([day7, day8])
print(f'day7 pinball loss: {day7}')
print(f'day8 pinball loss: {day8}')
print(f'Total pinball loss: {final}')

day7 pinball loss: 1.7984904085191904
day8 pinball loss: 1.8401536093706121
Total pinball loss: 1.8193220089449014
