In [34]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc, rcParams
import seaborn as sns
import glob
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold
# import lightgbm
from lightgbm import LGBMRegressor
import datetime

import warnings
warnings.filterwarnings('ignore')

plt.style.use('ggplot')
pd.set_option('max_columns', 100)
pd.set_option("display.precision", 4)
rcParams['figure.figsize'] = (16, 8)
rc('font', family='AppleGothic')

In [35]:
train_path = './trainingdata'
infer_path = './inferencedata'
submission_path = './submission'
importance_path = './feature_importance'
oof_path = './oof_preds'
pil_path = './magok'

In [36]:
submission = pd.read_csv('./data/sample_submission.csv')
submission2 = pd.read_csv('./data/sample_submission.csv')

In [37]:
# 테스트 time_str
time_str = '20210105_221155_변수소거테스트'

In [55]:
time_str = '20210111_231706'
train = pd.read_csv(
    os.path.join(train_path, f'training_{time_str}.csv')
)
X_test = pd.read_csv(
    os.path.join(infer_path, f'inference_{time_str}.csv')
)

In [56]:
train_day23 = pd.read_csv(
    os.path.join(train_path, f'training_{time_str}_day23.csv')
)
X_test_day23 = pd.read_csv(
    os.path.join(infer_path, f'inference_{time_str}_day23.csv')
)

In [57]:
train['Hour'] = train['Hour'].astype('category')
train_day23['Hour'] = train_day23['Hour'].astype('category')
X_test['Hour'] = X_test['Hour'].astype('category')
X_test_day23['Hour'] = X_test_day23['Hour'].astype('category')

# Metric

In [59]:
def pinball(pred, actual):
    quantile_col = pred.columns.tolist()
    pred2 = pred.copy()
    actual2 = actual.copy()
    
    pred2.index = range(len(pred2))
    actual2.index = range(len(actual2))

    pinball_loss = []
    for item in quantile_col:
        pre_pinball = []
        for idx in range(len(pred2)):
            y = actual2.iloc[idx]
            z = pred2[item].iloc[idx]
            q = item
            
            if y >= z:
                value = (y - z)*q
                pre_pinball.append(value)
            else:
                value = (z - y)*(1 - q)
                pre_pinball.append(value)
        loss_mean = np.mean(pre_pinball)
        pinball_loss.append(loss_mean)
    
    fin = np.mean(pinball_loss)
    print(f'Pinball Loss: {fin}')
    
    return pinball_loss         

# Inference

In [60]:
# X_train = X_train.drop(columns = drop_col)
# X_test = X_test.drop(columns = drop_col)

In [61]:
X_train = train.drop(columns = ['1day_after_target', '2day_after_target'])
target_1 = train['1day_after_target']
target_2 = train['2day_after_target']

In [62]:
X_train_day23 = train_day23.drop(columns = ['1day_after_target', '2day_after_target'])
target_1_day23 = train_day23['1day_after_target']
target_2_day23 = train_day23['2day_after_target']

In [63]:
def LGBM_reg(X_train,y_train, test, target_num):
    quantiles = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
    
    # 랜덤모수 샘플 추론이 왜 안되지;;;
    # fold = StratifiedKFold(n_splits=5, shuffle = True)
    
    fold = KFold(n_splits=7, shuffle = True)

    oof_preds = np.zeros([X_train.shape[0], len(quantiles)])
    
    feature_importance_df = pd.DataFrame(np.zeros([len(X_train.columns), len(quantiles)]))
    feature_importance_df.columns = quantiles
    
    test_df = pd.DataFrame(np.zeros([test.shape[0], len(quantiles)]))
    test_df.columns = quantiles
    
    fold_metric = []
    # training
    target = y_train
    for fold_, (train_idx, valid_idx) in enumerate(
        fold.split(np.array(X_train), target)
    ):
        
#         if fold_ >0:
#             break
        train_x, train_y = X_train.iloc[train_idx], target.iloc[train_idx]
        valid_x, valid_y = X_train.iloc[valid_idx], target.iloc[valid_idx]

        oof_df = pd.DataFrame()
        for idx, q in enumerate(quantiles):
            print(f'\nquantile: {q}\n')
            model = LGBMRegressor(objective='quantile',
                                  alpha=q,
                                  n_estimators=10000, 
                                  bagging_fraction=0.7, 
                                  learning_rate=0.027, 
                                  subsample=0.7
                                 )   

            model.fit(train_x, train_y, eval_metric = ['quantile'], 
              eval_set=[(valid_x, valid_y)], early_stopping_rounds=300, verbose=1500)

            oof_preds[[valid_idx],idx] = \
                model.predict(valid_x).round(2)

            oof_df[q] = model.predict(valid_x).round(2)
            test_df[q] += model.predict(test).round(2) / fold.n_splits
            
            feature_importance_df[q] = model.feature_importances_ / 7
            
        metric = pinball(oof_df, valid_y)
        loss = np.mean(metric)
        print(f'\n FOLD {fold_}의 total pinball loss: {loss}\n')
        fold_metric.append(metric)
        # feature importance
        
        print('\n===================================================')
        print(f'FOLD {fold_} Success')
        print('===================================================\n')
        
    feature_importance_df.index = X_train.columns.tolist()
    feature_importance_df2 = feature_importance_df.reset_index()
    
    feature_importance_df2.to_csv(f'{importance_path}/{time_str}_{target_num}_fe.csv', index = False)
    pd.DataFrame(oof_preds).to_csv(f'{oof_path}/{time_str}_{target_num}_oof_pred.csv', index = False)
    
    return oof_preds, fold_metric, test_df, feature_importance_df2

In [64]:
oof_preds, fold_metric, test_df, feature_imp = \
    LGBM_reg(X_train, target_1, X_test,1)


quantile: 0.1

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 1.27884
Early stopping, best iteration is:
[2076]	valid_0's quantile: 1.27069

quantile: 0.2

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 1.91067
[3000]	valid_0's quantile: 1.87221
[4500]	valid_0's quantile: 1.85972
Early stopping, best iteration is:
[4542]	valid_0's quantile: 1.85953

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.23932
[3000]	valid_0's quantile: 2.18371
[4500]	valid_0's quantile: 2.1506
[6000]	valid_0's quantile: 2.13535
Early stopping, best iteration is:
[7178]	valid_0's quantile: 2.12886

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.38566
[3000]	valid_0's quantile: 2.31118
[4500]	valid_0's quantile: 2.27864
[6000]	valid_0's quantile: 2.26712
[7500]	valid_0's quantile: 2.25187
[9000]	valid_0's quantil


quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.21494
[3000]	valid_0's quantile: 2.1574
Early stopping, best iteration is:
[3185]	valid_0's quantile: 2.15247

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.35399
[3000]	valid_0's quantile: 2.28796
[4500]	valid_0's quantile: 2.25787
[6000]	valid_0's quantile: 2.24393
[7500]	valid_0's quantile: 2.23195
[9000]	valid_0's quantile: 2.22576
Did not meet early stopping. Best iteration is:
[9846]	valid_0's quantile: 2.22149

quantile: 0.5

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.33341
[3000]	valid_0's quantile: 2.29099
[4500]	valid_0's quantile: 2.24647
Early stopping, best iteration is:
[5336]	valid_0's quantile: 2.23201

quantile: 0.6

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.13209
[3000]	valid_0's quantile: 2.1045
[4500]	valid

[7500]	valid_0's quantile: 2.34769
[9000]	valid_0's quantile: 2.33904
Did not meet early stopping. Best iteration is:
[9947]	valid_0's quantile: 2.32601

quantile: 0.5

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.42235
[3000]	valid_0's quantile: 2.36471
[4500]	valid_0's quantile: 2.3265
[6000]	valid_0's quantile: 2.302
[7500]	valid_0's quantile: 2.29017
Early stopping, best iteration is:
[8232]	valid_0's quantile: 2.28514

quantile: 0.6

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.20023
[3000]	valid_0's quantile: 2.1684
[4500]	valid_0's quantile: 2.14423
Early stopping, best iteration is:
[5126]	valid_0's quantile: 2.13446

quantile: 0.7

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 1.81489
Early stopping, best iteration is:
[2492]	valid_0's quantile: 1.79387

quantile: 0.8

Training until validation scores don't improve for 300 rounds
[1500]	valid_0

In [65]:
oof_preds2, fold_metric2, test_df2, feature_imp2 = \
    LGBM_reg(X_train, target_2, X_test,2)


quantile: 0.1

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 1.28219
Early stopping, best iteration is:
[1987]	valid_0's quantile: 1.27994

quantile: 0.2

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 1.9861
[3000]	valid_0's quantile: 1.94835
[4500]	valid_0's quantile: 1.9366
[6000]	valid_0's quantile: 1.92629
[7500]	valid_0's quantile: 1.91962
Early stopping, best iteration is:
[7591]	valid_0's quantile: 1.91899

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.32246
[3000]	valid_0's quantile: 2.24944
[4500]	valid_0's quantile: 2.21805
Early stopping, best iteration is:
[4905]	valid_0's quantile: 2.2131

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.46648
[3000]	valid_0's quantile: 2.38431
[4500]	valid_0's quantile: 2.33433
[6000]	valid_0's quantile: 2.30361
[7500]	valid_0's quantile:

[1500]	valid_0's quantile: 2.01787
[3000]	valid_0's quantile: 1.97619
[4500]	valid_0's quantile: 1.96049
Early stopping, best iteration is:
[5058]	valid_0's quantile: 1.95605

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.33048
[3000]	valid_0's quantile: 2.27918
[4500]	valid_0's quantile: 2.24728
[6000]	valid_0's quantile: 2.23504
[7500]	valid_0's quantile: 2.22427
[9000]	valid_0's quantile: 2.21905
Did not meet early stopping. Best iteration is:
[9969]	valid_0's quantile: 2.21397

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.4531
[3000]	valid_0's quantile: 2.36652
[4500]	valid_0's quantile: 2.32713
[6000]	valid_0's quantile: 2.3028
[7500]	valid_0's quantile: 2.28591
[9000]	valid_0's quantile: 2.27295
Did not meet early stopping. Best iteration is:
[10000]	valid_0's quantile: 2.26456

quantile: 0.5

Training until validation scores don't improve for 300 rounds
[1500]

[4500]	valid_0's quantile: 2.33679
[6000]	valid_0's quantile: 2.32168
Early stopping, best iteration is:
[6431]	valid_0's quantile: 2.31957

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.55023
[3000]	valid_0's quantile: 2.47515
[4500]	valid_0's quantile: 2.45019
[6000]	valid_0's quantile: 2.428
[7500]	valid_0's quantile: 2.41103
[9000]	valid_0's quantile: 2.39894
Did not meet early stopping. Best iteration is:
[9985]	valid_0's quantile: 2.39325

quantile: 0.5

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.48786
[3000]	valid_0's quantile: 2.43747
[4500]	valid_0's quantile: 2.40329
[6000]	valid_0's quantile: 2.39498
Early stopping, best iteration is:
[5894]	valid_0's quantile: 2.39417

quantile: 0.6

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.25792
[3000]	valid_0's quantile: 2.22095
[4500]	valid_0's quantile: 2.20649
[6000]	valid_0's qua

In [66]:
oof_preds_day23, fold_metric_day23, test_df_day23, feature_imp_day23 = \
    LGBM_reg(X_train_day23, target_1_day23, X_test_day23,11)


quantile: 0.1

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 1.29616
Early stopping, best iteration is:
[2126]	valid_0's quantile: 1.29331

quantile: 0.2

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 1.9561
[3000]	valid_0's quantile: 1.92549
Early stopping, best iteration is:
[2777]	valid_0's quantile: 1.9241

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.2708
[3000]	valid_0's quantile: 2.23251
[4500]	valid_0's quantile: 2.20149
[6000]	valid_0's quantile: 2.18497
Early stopping, best iteration is:
[7134]	valid_0's quantile: 2.17534

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.44632
[3000]	valid_0's quantile: 2.36457
[4500]	valid_0's quantile: 2.32924
[6000]	valid_0's quantile: 2.30771
[7500]	valid_0's quantile: 2.28881
[9000]	valid_0's quantile: 2.27667
Did not meet early stoppin

[1500]	valid_0's quantile: 2.35985
[3000]	valid_0's quantile: 2.29471
[4500]	valid_0's quantile: 2.27315
[6000]	valid_0's quantile: 2.25902
[7500]	valid_0's quantile: 2.24791
Early stopping, best iteration is:
[7711]	valid_0's quantile: 2.24578

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.47797
[3000]	valid_0's quantile: 2.4115
[4500]	valid_0's quantile: 2.38434
[6000]	valid_0's quantile: 2.36719
[7500]	valid_0's quantile: 2.3518
[9000]	valid_0's quantile: 2.33968
Did not meet early stopping. Best iteration is:
[10000]	valid_0's quantile: 2.3355

quantile: 0.5

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.41481
[3000]	valid_0's quantile: 2.3439
[4500]	valid_0's quantile: 2.32347
[6000]	valid_0's quantile: 2.3039
[7500]	valid_0's quantile: 2.2982
[9000]	valid_0's quantile: 2.29315
Early stopping, best iteration is:
[9646]	valid_0's quantile: 2.28862

quantile: 0.6

Training until 


quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.45671
[3000]	valid_0's quantile: 2.39186
[4500]	valid_0's quantile: 2.35855
[6000]	valid_0's quantile: 2.33957
[7500]	valid_0's quantile: 2.3146
[9000]	valid_0's quantile: 2.30737
Did not meet early stopping. Best iteration is:
[10000]	valid_0's quantile: 2.30203

quantile: 0.5

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.41205
[3000]	valid_0's quantile: 2.3464
[4500]	valid_0's quantile: 2.30462
[6000]	valid_0's quantile: 2.28031
[7500]	valid_0's quantile: 2.26095
Early stopping, best iteration is:
[8033]	valid_0's quantile: 2.25538

quantile: 0.6

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.20181
[3000]	valid_0's quantile: 2.15179
Early stopping, best iteration is:
[3690]	valid_0's quantile: 2.14197

quantile: 0.7

Training until validation scores don't improve for 300 rounds
[1500]	vali

In [67]:
oof_preds2_day23, fold_metric2_day23, test_df2_day23, feature_imp2_day23 = \
    LGBM_reg(X_train_day23, target_2_day23, X_test_day23,22)


quantile: 0.1

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 1.3294
[3000]	valid_0's quantile: 1.31958
Early stopping, best iteration is:
[2803]	valid_0's quantile: 1.31893

quantile: 0.2

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.02275
[3000]	valid_0's quantile: 1.97012
Early stopping, best iteration is:
[3931]	valid_0's quantile: 1.95113

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.32008
[3000]	valid_0's quantile: 2.25993
[4500]	valid_0's quantile: 2.23022
[6000]	valid_0's quantile: 2.2121
[7500]	valid_0's quantile: 2.19859
Early stopping, best iteration is:
[7765]	valid_0's quantile: 2.19701

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.41175
[3000]	valid_0's quantile: 2.35018
[4500]	valid_0's quantile: 2.31563
[6000]	valid_0's quantile: 2.29375
[7500]	valid_0's quantile

[3000]	valid_0's quantile: 1.98832
[4500]	valid_0's quantile: 1.96984
[6000]	valid_0's quantile: 1.96043
[7500]	valid_0's quantile: 1.95428
[9000]	valid_0's quantile: 1.94696
Early stopping, best iteration is:
[8886]	valid_0's quantile: 1.94689

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.31161
[3000]	valid_0's quantile: 2.24932
Early stopping, best iteration is:
[4160]	valid_0's quantile: 2.22597

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.45356
[3000]	valid_0's quantile: 2.36333
[4500]	valid_0's quantile: 2.32727
[6000]	valid_0's quantile: 2.31109
[7500]	valid_0's quantile: 2.29571
[9000]	valid_0's quantile: 2.28325
Did not meet early stopping. Best iteration is:
[10000]	valid_0's quantile: 2.28008

quantile: 0.5

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.42326
[3000]	valid_0's quantile: 2.35066
[4500]	valid_0's 

Early stopping, best iteration is:
[4672]	valid_0's quantile: 1.99473

quantile: 0.3

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.41522
[3000]	valid_0's quantile: 2.3467
[4500]	valid_0's quantile: 2.31651
[6000]	valid_0's quantile: 2.28924
Early stopping, best iteration is:
[6048]	valid_0's quantile: 2.28862

quantile: 0.4

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.49744
[3000]	valid_0's quantile: 2.40807
[4500]	valid_0's quantile: 2.37289
[6000]	valid_0's quantile: 2.35237
[7500]	valid_0's quantile: 2.34005
[9000]	valid_0's quantile: 2.33309
Did not meet early stopping. Best iteration is:
[10000]	valid_0's quantile: 2.32718

quantile: 0.5

Training until validation scores don't improve for 300 rounds
[1500]	valid_0's quantile: 2.44995
[3000]	valid_0's quantile: 2.38756
[4500]	valid_0's quantile: 2.35325
[6000]	valid_0's quantile: 2.34336
[7500]	valid_0's quantile: 2.33427
[9000]	valid_0's q

# submission

In [68]:
submission.loc[submission.id.str.contains("Day7"), "q_0.1":] = test_df.sort_index().values
submission.loc[submission.id.str.contains("Day8"), "q_0.1":] = test_df2.sort_index().values
submission

Unnamed: 0,id,q_0.1,q_0.2,q_0.3,q_0.4,q_0.5,q_0.6,q_0.7,q_0.8,q_0.9
0,0.csv_Day7_0h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.csv_Day7_0h30m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.csv_Day7_1h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.csv_Day7_1h30m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.csv_Day7_2h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
7771,80.csv_Day8_21h30m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7772,80.csv_Day8_22h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7773,80.csv_Day8_22h30m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7774,80.csv_Day8_23h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [69]:
submission2.loc[submission2.id.str.contains("Day7"), "q_0.1":] = test_df_day23.sort_index().values
submission2.loc[submission2.id.str.contains("Day8"), "q_0.1":] = test_df2_day23.sort_index().values
submission2

Unnamed: 0,id,q_0.1,q_0.2,q_0.3,q_0.4,q_0.5,q_0.6,q_0.7,q_0.8,q_0.9
0,0.csv_Day7_0h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.csv_Day7_0h30m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.csv_Day7_1h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.csv_Day7_1h30m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.csv_Day7_2h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
7771,80.csv_Day8_21h30m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7772,80.csv_Day8_22h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7773,80.csv_Day8_22h30m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7774,80.csv_Day8_23h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Ensemble

In [70]:
submission3 = submission[['id']]
sub_value = submission.iloc[:,1:] * 0.7 + submission2.iloc[:,1:] * 0.3

submission3 = pd.concat([submission3,sub_value],axis = 1)

# 제출

In [63]:
# time_str = datetime.datetime.strftime(
#     datetime.datetime.today(),
#     '%Y%m%d_%H%M%S'
# )

# training 데이터 저장한 시간이랑 맞춰서 사용
submission.to_csv(
    os.path.join(submission_path, f'제출용_{time_str}.csv'), 
    index=False
)

In [71]:
# 앙상블 결과 제출
submission3.to_csv(
    os.path.join(submission_path, f'제출용_{time_str}_ensemble_가중평균.csv'), 
    index=False
)

- 가중평균: 0.7/0.3
- 가중평균2: 0.8/0.2
- 가중평균3: 0.6/0.4

# 외부 데이터와 앙상블(magok)

In [20]:
# 필령이형 파일 앙상블
time_str = '20210110_225651'
temp = pd.read_csv(os.path.join(pil_path, 'pilryoung_0105_3days_allcolumns_v2.csv'))
target = pd.read_csv(os.path.join(submission_path, f'제출용_{time_str}_ensemble_가중평균.csv'))

target.iloc[:,1:] = temp.iloc[:,1:] * 0.3 + target.iloc[:,1:] * 0.7

In [33]:
target.to_csv(
    os.path.join(submission_path, f'제출용_{time_str}_ensemble_external_submission_ensemble.csv'), 
    index=False
)

# 변수중요도 데이터 추출

In [46]:
time_str = '20210105_221155'
imp_path1 = f'{importance_path}/{time_str}_1_fe.csv'
imp_path2 = f'{importance_path}/{time_str}_2_fe.csv'

imp1 = pd.read_csv(
    imp_path1
)
imp2 = pd.read_csv(
    imp_path2
)

In [48]:
drop_col = ['DNI_label','TARGET_interval','2days_mean_TARGET_rolling']

# inference data load

In [216]:
infer_time = '20210107_231340'
temp = pd.read_csv(
    os.path.join(submission_path,f'제출용_{infer_time}_ensemble_가중평균.csv')
)    

In [219]:
submission3[submission3['q_0.2'] > 0]

Unnamed: 0,id,q_0.1,q_0.2,q_0.3,q_0.4,q_0.5,q_0.6,q_0.7,q_0.8,q_0.9
1464,15.csv_Day7_12h00m,0.0583,0.0317,0.4843,0.2177,0.1127,-0.3424,-0.5417,-0.1243,0.3677
1465,15.csv_Day7_12h30m,0.0000,0.1504,0.3266,0.1161,0.1414,-0.3514,-0.3390,0.0201,0.3806
1483,15.csv_Day7_21h30m,0.0000,0.0947,0.3749,-0.1961,-0.6714,-0.7457,-0.7140,0.0957,0.6906
1487,15.csv_Day7_23h30m,0.5910,0.1130,-0.8010,-0.2110,-0.9950,-1.4290,-1.4220,1.2080,1.4630
1512,15.csv_Day8_12h00m,0.6081,0.9960,0.5841,0.4749,0.3951,0.3527,0.1316,0.3450,0.3909
...,...,...,...,...,...,...,...,...,...,...
6455,67.csv_Day7_11h30m,0.0000,0.0440,0.3720,-0.2900,-0.3560,-0.4870,-0.9970,0.2280,0.5550
6480,67.csv_Day8_0h00m,0.1240,0.1250,0.1100,0.1360,0.2830,0.3890,0.3810,0.2710,0.4630
6481,67.csv_Day8_0h30m,-0.0220,0.0430,-0.2070,-0.3850,-1.1430,-1.4040,-0.8530,0.1530,0.1000
6502,67.csv_Day8_11h00m,0.6560,0.2460,0.8870,1.0260,0.6070,-1.0190,0.5090,0.9180,1.2640


In [230]:
temp

Unnamed: 0,id,q_0.1,q_0.2,q_0.3,q_0.4,q_0.5,q_0.6,q_0.7,q_0.8,q_0.9
0,0.csv_Day7_0h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.csv_Day7_0h30m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.csv_Day7_1h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.csv_Day7_1h30m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.csv_Day7_2h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
7771,80.csv_Day8_21h30m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7772,80.csv_Day8_22h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7773,80.csv_Day8_22h30m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7774,80.csv_Day8_23h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
