<a href="https://colab.research.google.com/github/noweahc/energy/blob/main/0815%EC%A0%9C%EC%B6%9C(63%EC%A0%90_%E3%85%8B).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [34]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
import numpy as np
from tqdm import tqdm
import xgboost


In [20]:
pip install sktime

Collecting sktime
  Downloading sktime-0.21.0-py3-none-any.whl (17.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.1/17.1 MB[0m [31m35.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting deprecated>=1.2.13 (from sktime)
  Downloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)
Collecting scikit-base<0.6.0 (from sktime)
  Downloading scikit_base-0.5.1-py3-none-any.whl (118 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m118.7/118.7 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: scikit-base, deprecated, sktime
Successfully installed deprecated-1.2.14 scikit-base-0.5.1 sktime-0.21.0


In [21]:
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
from sktime.forecasting.model_selection import temporal_train_test_split

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) # Seed 고정

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
train = pd.read_csv('/content/drive/MyDrive/23energy/23data/train.csv',encoding = 'utf-8')
test = pd.read_csv('/content/drive/MyDrive/23energy/23data/test.csv',encoding = 'utf-8')
building = pd.read_csv('/content/drive/MyDrive/23energy/23data/building_info.csv',encoding = 'utf-8')

In [6]:
train = train.rename(columns={
    '건물번호': 'num',
    '일시': 'date_time',
    '기온(C)': 'gion',
    '강수량(mm)': 'rain',
    '풍속(m/s)': 'wind',
    '습도(%)': 'hum',
    '일조(hr)': 'iljo',
    '일사(MJ/m2)': 'ilsa',
    '전력소비량(kWh)': 'power'
})
train.drop('num_date_time', axis = 1, inplace=True)

In [7]:
test = test.rename(columns={
    '건물번호': 'num',
    '일시': 'date_time',
    '기온(C)': 'gion',
    '강수량(mm)': 'rain',
    '풍속(m/s)': 'wind',
    '습도(%)': 'hum',
    '일조(hr)': 'iljo',
    '일사(MJ/m2)': 'ilsa',
    '전력소비량(kWh)': 'power'
})
test.drop('num_date_time', axis = 1, inplace=True)

In [8]:
building = building.rename(columns={
    '건물번호': 'num',
    '건물유형': 'type',
    '연면적(m2)': 'total_area',
    '냉방면적(m2)': 'cooling_area',
    '태양광용량(kW)': 'solar',
    'ESS저장용량(kWh)': 'ess',
    'PCS용량(kW)': 'pcs'
})

In [9]:
train = pd.merge(train, building, on='num', how='left')
test = pd.merge(test, building, on='num', how='left')

In [10]:
def make_time(train):
    """
    시간 관련 변수를 추가하기 위한 함수
    """
    train['date_time'] = pd.to_datetime(train.date_time)

    train['month'] = train.date_time.dt.month                    # 월(숫자)
    train['day'] = train.date_time.dt.day                        # 일(숫자)
    train['hour'] = train.date_time.dt.hour                      # 시(숫자)
    train['weekday'] = train.date_time.dt.weekday                # 요일(숫자)
    train['dayofyear'] = train.date_time.dt.dayofyear            # 연 기준 몇일째(숫자)
    train['monthday'] = train.date_time.dt.strftime('%m%d')      # 월일

    return train

In [11]:
train = make_time(train)
test = make_time(test)

In [12]:
# 문자열을 실수형으로 변환하고 "-"을 0으로 대체합니다.
train["solar"] = train["solar"].apply(lambda x: float(x) if x != "-" else 0)
train["ess"] = train["ess"].apply(lambda x: float(x) if x != "-" else 0)
train["pcs"] = train["pcs"].apply(lambda x: float(x) if x != "-" else 0)

# 문자열을 실수형으로 변환하고 "-"을 0으로 대체합니다.
test["solar"] = test["solar"].apply(lambda x: float(x) if x != "-" else 0)
test["ess"] = test["ess"].apply(lambda x: float(x) if x != "-" else 0)
test["pcs"] = test["pcs"].apply(lambda x: float(x) if x != "-" else 0)

In [13]:
#토요일, 일요일은 주말로 처리
#전국동시지방선거일, 현충일, 광복절도 주말로 처리

train.loc[train.weekday >= 5, 'holiday'] = 1
train.loc[(train.weekday < 5), 'holiday'] = 0

#train.loc[train.monthday == '0601', 'holiday'] = 1
#train.loc[train.monthday == '0606', 'holiday'] = 1
#train.loc[train.monthday == '0815', 'holiday'] = 1
train.loc[train['monthday'].isin(['0601', '0606', '0815']), 'holiday'] = 1

test.loc[test.weekday >= 5, 'holiday'] = 1
test.loc[test.weekday < 5, 'holiday'] = 0

In [14]:
#건물유형 숫자로 처리하는 라벨인코딩

label_mapping = {
    '건물기타': 0,
    '공공': 1,
    '대학교': 2,
    '데이터센터': 3,
    '백화점및아울렛': 4,
    '병원': 5,
    '상용': 6,
    '아파트': 7,
    '연구소': 8,
    '지식산업센터': 9,
    '할인마트': 10,
    '호텔및리조트': 11
}

train['type'] = train['type'].replace(label_mapping)
test['type'] = test['type'].replace(label_mapping)

In [15]:
#파생변수 : 체감온도
train['sensory_temp'] = 13.12 + 0.6215 * train['gion'] - 11.37 * (train['wind'] ** 0.16) + 0.3965 * (train['wind'] ** 0.16) * train['gion']
test['sensory_temp'] = 13.12 + 0.6215 * test['gion'] - 11.37 * (test['wind'] ** 0.16) + 0.3965 * (test['wind'] ** 0.16) * test['gion']

In [16]:
#https://dacon.io/competitions/official/235680/codeshare/2366?page=1&dtype=recent
#아이디어 참고
#시간의 순환적 성격 반영하기

train['sin_time'] = np.sin(2*np.pi*train.hour/24)
train['cos_time'] = np.cos(2*np.pi*train.hour/24)

test['sin_time'] = np.sin(2*np.pi*test.hour/24)
test['cos_time'] = np.cos(2*np.pi*test.hour/24)

In [17]:
#https://dacon.io/competitions/official/235736/codeshare/2743?page=1&dtype=recent
#아이디어 참고
#기온과 습도를 고려한 불쾌지수(THI)
#CDH(Cooling Degree Hour) : 기온이 실제 냉방 가동에 이르기까지의 시간적 오차를 고려

train['THI'] = 9/5*train['gion'] - 0.55*(1-train['hum']/100)*(9/5*train['hum']-26)+32
test['THI'] = 9/5*test['gion'] - 0.55*(1-test['hum']/100)*(9/5*test['hum']-26)+32

def CDH(xs):
    ys = []
    for i in range(len(xs)):
        if i < 11:
            ys.append(np.sum(xs[:(i+1)]-26))
        else:
            ys.append(np.sum(xs[(i-11):(i+1)]-26))
    return np.array(ys)

cdhs = np.array([])
for num in range(1,101,1):
    temp = train[train['num'] == num]
    cdh = CDH(temp['gion'].values)
    cdhs = np.concatenate([cdhs, cdh])
train['CDH'] = cdhs

cdhs = np.array([])
for num in range(1,101,1):
    temp = test[test['num'] == num]
    cdh = CDH(temp['gion'].values)
    cdhs = np.concatenate([cdhs, cdh])
test['CDH'] = cdhs

In [23]:
#성능 평가 지표 : weighted_mse
#### alpha를 argument로 받는 함수로 실제 objective function을 wrapping하여 alpha값을 쉽게 조정할 수 있도록 작성했습니다.
# custom objective function for forcing model not to underestimate
def weighted_mse(alpha = 1):
    def weighted_mse_fixed(label, pred):
        residual = (label - pred).astype("float")
        grad = np.where(residual>0, -2*alpha*residual, -2*residual)
        hess = np.where(residual>0, 2*alpha, 2.0)
        return grad, hess
    return weighted_mse_fixed

In [24]:
def SMAPE(true, pred):
    return np.mean((np.abs(true-pred))/(np.abs(true) + np.abs(pred))) * 100

In [26]:
train.drop(['date_time','rain','iljo','ilsa','solar','ess','pcs','hour','monthday'], axis = 1, inplace = True)
test.drop(['date_time','rain','solar','ess','pcs','hour','monthday'], axis = 1, inplace = True)

In [36]:
#best iteration 찾기 : 하이퍼파라미터 후보 df인 xgb_params에 저장하면 됨
scores = []   # smape 값을 저장할 list
best_it = []  # best interation을 저장할 list
for i in tqdm(range(100)):
    y = train.loc[train.num== i+1, 'power']
    x = train.loc[train.num == i+1, ].iloc[:, 3:]
    y_train, y_valid, x_train, x_valid = temporal_train_test_split(y = y, X = x, test_size = 168)

    xgb_model = xgboost.XGBRegressor(n_estimators=100, learning_rate=0.08, gamma=0, subsample=0.75,
                           colsample_bytree=1, max_depth=7)

    xgb_model.set_params(**{'objective':weighted_mse(100)}) # alpha = 100으로 고정

    xgb_model.fit(x_train, y_train, eval_set=[(x_train, y_train),
                                            (x_valid, y_valid)], early_stopping_rounds=300, verbose=False)
    y_pred = xgb_model.predict(x_valid)
    pred = pd.Series(y_pred)

    sm = SMAPE(y_valid, y_pred)
    scores.append(sm)
    best_it.append(xgb_model.best_iteration) ## 실제 best iteration은 이 값에 +1 해주어야 함.

100%|██████████| 100/100 [01:16<00:00,  1.30it/s]


In [37]:
#weighted MSE의 alpha값 튜닝단계 : 하이퍼파라미터 후보 df인 xgb_params에 저장하면됨
alpha_list = []
smape_list = []
for i in tqdm(range(100)):
    y = train.loc[train.num == i+1, 'power']
    x = train.loc[train.num == i+1, ].iloc[:, 3:]
    y_train, y_test, x_train, x_test = temporal_train_test_split(y = y, X = x, test_size = 168)
    xgb_model = xgboost.XGBRegressor(n_estimators=100, learning_rate=0.08, gamma=0, subsample=0.75,
                           colsample_bytree=1, max_depth=7)

    xgb_model.fit(x_train, y_train)
    pred0 = xgb_model.predict(x_test)
    best_alpha = 0
    score0 = SMAPE(y_test,pred0)

    for j in [1, 3, 5, 7, 10, 25, 50, 75, 100]:
        xgb_model = xgboost.XGBRegressor(n_estimators=100, learning_rate=0.08, gamma=0, subsample=0.75,
                           colsample_bytree=1, max_depth=7)
        xgb_model.set_params(**{'objective' : weighted_mse(j)})

        xgb_model.fit(x_train, y_train)
        pred1 = xgb_model.predict(x_test)
        score1 = SMAPE(y_test, pred1)
        if score1 < score0:
            best_alpha = j
            score0 = score1

    alpha_list.append(best_alpha)
    smape_list.append(score0)
    print("building {} || best score : {} || alpha : {}".format(i+1, score0, best_alpha))

  1%|          | 1/100 [00:08<14:13,  8.62s/it]

building 1 || best score : 0.024593073735724086 || alpha : 100


  2%|▏         | 2/100 [00:13<10:22,  6.35s/it]

building 2 || best score : 0.025594065433553255 || alpha : 100


  3%|▎         | 3/100 [00:18<09:07,  5.65s/it]

building 3 || best score : 0.027891955856816024 || alpha : 100


  4%|▍         | 4/100 [00:26<10:38,  6.65s/it]

building 4 || best score : 0.022408321184390623 || alpha : 100


  5%|▌         | 5/100 [00:31<09:36,  6.07s/it]

building 5 || best score : 0.028917455395777246 || alpha : 100


  6%|▌         | 6/100 [00:43<12:30,  7.98s/it]

building 6 || best score : 0.02426139600688384 || alpha : 100


  7%|▋         | 7/100 [00:58<16:09, 10.43s/it]

building 7 || best score : 0.02474045778821683 || alpha : 50


  8%|▊         | 8/100 [01:10<16:28, 10.75s/it]

building 8 || best score : 0.014591518925317138 || alpha : 100


  9%|▉         | 9/100 [01:23<17:29, 11.53s/it]

building 9 || best score : 0.026257541185933805 || alpha : 100


 10%|█         | 10/100 [01:27<14:00,  9.34s/it]

building 10 || best score : 0.017172380903007798 || alpha : 75


 11%|█         | 11/100 [01:35<13:12,  8.90s/it]

building 11 || best score : 0.018857968065158326 || alpha : 75


 12%|█▏        | 12/100 [01:40<11:07,  7.59s/it]

building 12 || best score : 0.017646144121592065 || alpha : 100


 13%|█▎        | 13/100 [01:44<09:41,  6.69s/it]

building 13 || best score : 0.01566032940238986 || alpha : 100


 14%|█▍        | 14/100 [01:52<10:11,  7.11s/it]

building 14 || best score : 0.02274831489975422 || alpha : 100


 15%|█▌        | 15/100 [01:58<09:15,  6.53s/it]

building 15 || best score : 0.018539330468638027 || alpha : 100


 16%|█▌        | 16/100 [02:06<09:58,  7.12s/it]

building 16 || best score : 0.028015954690083618 || alpha : 100


 17%|█▋        | 17/100 [02:11<08:52,  6.42s/it]

building 17 || best score : 0.029742014749274914 || alpha : 100


 18%|█▊        | 18/100 [02:19<09:28,  6.93s/it]

building 18 || best score : 0.025810079165088123 || alpha : 100


 19%|█▉        | 19/100 [02:24<08:31,  6.32s/it]

building 19 || best score : 0.023568579492431976 || alpha : 100


 20%|██        | 20/100 [02:32<09:09,  6.87s/it]

building 20 || best score : 0.02815418984216789 || alpha : 100


 21%|██        | 21/100 [02:37<08:11,  6.23s/it]

building 21 || best score : 0.023145955574485094 || alpha : 100


 22%|██▏       | 22/100 [02:45<08:57,  6.89s/it]

building 22 || best score : 0.01776668520652361 || alpha : 50


 23%|██▎       | 23/100 [02:50<08:04,  6.30s/it]

building 23 || best score : 0.016520721466160668 || alpha : 100


 24%|██▍       | 24/100 [02:59<08:57,  7.08s/it]

building 24 || best score : 0.023471482711169406 || alpha : 100


 25%|██▌       | 25/100 [03:07<09:14,  7.39s/it]

building 25 || best score : 0.01887395532787551 || alpha : 100


 26%|██▌       | 26/100 [03:16<09:29,  7.70s/it]

building 26 || best score : 0.02223219986980767 || alpha : 100


 27%|██▋       | 27/100 [03:20<08:12,  6.75s/it]

building 27 || best score : 0.020597455147339808 || alpha : 50


 28%|██▊       | 28/100 [03:28<08:37,  7.19s/it]

building 28 || best score : 0.02648825444052737 || alpha : 100


 29%|██▉       | 29/100 [03:33<07:36,  6.43s/it]

building 29 || best score : 0.020280620706045405 || alpha : 100


 30%|███       | 30/100 [03:41<08:08,  6.98s/it]

building 30 || best score : 0.04428824554012966 || alpha : 75


 31%|███       | 31/100 [03:46<07:15,  6.31s/it]

building 31 || best score : 0.023820043948070555 || alpha : 100


 32%|███▏      | 32/100 [03:50<06:20,  5.60s/it]

building 32 || best score : 0.01219985851392029 || alpha : 50


 33%|███▎      | 33/100 [03:57<06:46,  6.07s/it]

building 33 || best score : 0.012565249662921394 || alpha : 75


 34%|███▍      | 34/100 [04:01<06:00,  5.47s/it]

building 34 || best score : 0.014426825529521885 || alpha : 25


 35%|███▌      | 35/100 [04:08<06:29,  6.00s/it]

building 35 || best score : 0.012315827549102442 || alpha : 75


 36%|███▌      | 36/100 [04:12<05:41,  5.34s/it]

building 36 || best score : 0.011227091527374498 || alpha : 75


 37%|███▋      | 37/100 [04:17<05:27,  5.20s/it]

building 37 || best score : 0.025296220484290883 || alpha : 100


 38%|███▊      | 38/100 [04:25<06:19,  6.13s/it]

building 38 || best score : 0.027836172398989004 || alpha : 75


 39%|███▉      | 39/100 [04:30<05:50,  5.74s/it]

building 39 || best score : 0.027534208992948332 || alpha : 100


 40%|████      | 40/100 [04:39<06:32,  6.55s/it]

building 40 || best score : 0.033769314942675384 || alpha : 100


 41%|████      | 41/100 [04:43<05:56,  6.04s/it]

building 41 || best score : 0.031278112709916886 || alpha : 100


 42%|████▏     | 42/100 [04:52<06:35,  6.82s/it]

building 42 || best score : 0.03604199681579108 || alpha : 100


 43%|████▎     | 43/100 [04:57<05:58,  6.28s/it]

building 43 || best score : 0.03598187527077645 || alpha : 100


 44%|████▍     | 44/100 [05:05<06:24,  6.87s/it]

building 44 || best score : 0.030354044728381914 || alpha : 75


 45%|████▌     | 45/100 [05:10<05:39,  6.18s/it]

building 45 || best score : 0.017137184012669893 || alpha : 100


 46%|████▌     | 46/100 [05:18<06:06,  6.78s/it]

building 46 || best score : 0.0202092620134761 || alpha : 100


 47%|████▋     | 47/100 [05:23<05:22,  6.08s/it]

building 47 || best score : 0.018199533082909735 || alpha : 100


 48%|████▊     | 48/100 [05:27<04:52,  5.63s/it]

building 48 || best score : 0.020880565295447406 || alpha : 100


 49%|████▉     | 49/100 [05:35<05:23,  6.35s/it]

building 49 || best score : 0.020463350834259316 || alpha : 75


 50%|█████     | 50/100 [05:40<04:55,  5.91s/it]

building 50 || best score : 0.01939045147621631 || alpha : 100


 51%|█████     | 51/100 [05:48<05:21,  6.55s/it]

building 51 || best score : 0.02195106746612979 || alpha : 100


 52%|█████▏    | 52/100 [05:53<04:54,  6.13s/it]

building 52 || best score : 0.020360039837476758 || alpha : 75


 53%|█████▎    | 53/100 [06:02<05:18,  6.78s/it]

building 53 || best score : 0.029942794143241466 || alpha : 100


 54%|█████▍    | 54/100 [06:06<04:46,  6.23s/it]

building 54 || best score : 0.03466728419298132 || alpha : 100


 55%|█████▌    | 55/100 [06:14<05:00,  6.67s/it]

building 55 || best score : 0.013095883063575904 || alpha : 100


 56%|█████▌    | 56/100 [06:18<04:19,  5.90s/it]

building 56 || best score : 0.013863500704368526 || alpha : 100


 57%|█████▋    | 57/100 [06:23<04:00,  5.60s/it]

building 57 || best score : 0.027422605121461715 || alpha : 100


 58%|█████▊    | 58/100 [06:38<05:49,  8.33s/it]

building 58 || best score : 0.011813739700914093 || alpha : 100


 59%|█████▉    | 59/100 [06:48<06:08,  8.98s/it]

building 59 || best score : 0.0260174434381306 || alpha : 100


 60%|██████    | 60/100 [06:55<05:34,  8.37s/it]

building 60 || best score : 0.02479075872292346 || alpha : 50


 61%|██████    | 61/100 [07:07<05:59,  9.22s/it]

building 61 || best score : 0.026582919436373764 || alpha : 100


 62%|██████▏   | 62/100 [07:12<05:05,  8.03s/it]

building 62 || best score : 0.028779039366441998 || alpha : 100


 63%|██████▎   | 63/100 [07:24<05:39,  9.18s/it]

building 63 || best score : 0.026961850437049927 || alpha : 100


 64%|██████▍   | 64/100 [07:36<05:59,  9.99s/it]

building 64 || best score : 0.02287257516915706 || alpha : 75


 65%|██████▌   | 65/100 [07:43<05:20,  9.16s/it]

building 65 || best score : 0.031099815493183223 || alpha : 100


 66%|██████▌   | 66/100 [07:51<04:59,  8.80s/it]

building 66 || best score : 0.01658649493281905 || alpha : 100


 67%|██████▋   | 67/100 [07:55<04:09,  7.57s/it]

building 67 || best score : 0.02540709986025459 || alpha : 100


 68%|██████▊   | 68/100 [08:04<04:08,  7.78s/it]

building 68 || best score : 0.02421263143116482 || alpha : 100


 69%|██████▉   | 69/100 [08:08<03:31,  6.82s/it]

building 69 || best score : 0.018951802015803813 || alpha : 100


 70%|███████   | 70/100 [08:17<03:37,  7.25s/it]

building 70 || best score : 0.01869771014482134 || alpha : 100


 71%|███████   | 71/100 [08:21<03:07,  6.46s/it]

building 71 || best score : 0.019696710741999277 || alpha : 100


 72%|███████▏  | 72/100 [08:29<03:12,  6.88s/it]

building 72 || best score : 0.018632436898357148 || alpha : 100


 73%|███████▎  | 73/100 [08:33<02:46,  6.15s/it]

building 73 || best score : 0.018673758803065016 || alpha : 75


 74%|███████▍  | 74/100 [08:38<02:29,  5.75s/it]

building 74 || best score : 0.023522533205848052 || alpha : 100


 75%|███████▌  | 75/100 [08:46<02:42,  6.48s/it]

building 75 || best score : 0.019653284398481217 || alpha : 50


 76%|███████▌  | 76/100 [08:51<02:21,  5.90s/it]

building 76 || best score : 0.019096742369890108 || alpha : 100


 77%|███████▋  | 77/100 [08:59<02:30,  6.56s/it]

building 77 || best score : 0.020480999704880338 || alpha : 100


 78%|███████▊  | 78/100 [09:04<02:12,  6.03s/it]

building 78 || best score : 0.025310302138874936 || alpha : 100


 79%|███████▉  | 79/100 [09:12<02:21,  6.72s/it]

building 79 || best score : 0.02046263781825583 || alpha : 100


 80%|████████  | 80/100 [09:17<02:04,  6.20s/it]

building 80 || best score : 0.028653317347168146 || alpha : 100


 81%|████████  | 81/100 [09:25<02:09,  6.81s/it]

building 81 || best score : 0.03110261733023183 || alpha : 75


 82%|████████▏ | 82/100 [09:31<01:55,  6.40s/it]

building 82 || best score : 0.03314364096903938 || alpha : 100


 83%|████████▎ | 83/100 [09:39<01:58,  7.00s/it]

building 83 || best score : 0.032478452369399564 || alpha : 75


 84%|████████▍ | 84/100 [09:44<01:41,  6.34s/it]

building 84 || best score : 0.032426401661927326 || alpha : 100


 85%|████████▌ | 85/100 [09:55<01:57,  7.85s/it]

building 85 || best score : 0.01651348712015852 || alpha : 75


 86%|████████▌ | 86/100 [10:00<01:37,  6.93s/it]

building 86 || best score : 0.027775817803769785 || alpha : 100


 87%|████████▋ | 87/100 [10:09<01:38,  7.55s/it]

building 87 || best score : 0.03640516085713437 || alpha : 100


 88%|████████▊ | 88/100 [10:14<01:20,  6.72s/it]

building 88 || best score : 0.02555342364594687 || alpha : 100


 89%|████████▉ | 89/100 [10:22<01:18,  7.18s/it]

building 89 || best score : 0.025558920501924005 || alpha : 75


 90%|█████████ | 90/100 [10:28<01:06,  6.70s/it]

building 90 || best score : 0.03092276125897425 || alpha : 100


 91%|█████████ | 91/100 [10:36<01:04,  7.18s/it]

building 91 || best score : 0.032509978754613304 || alpha : 100


 92%|█████████▏| 92/100 [10:41<00:51,  6.45s/it]

building 92 || best score : 0.03017289991011336 || alpha : 75


 93%|█████████▎| 93/100 [10:49<00:48,  6.93s/it]

building 93 || best score : 0.01900120318375487 || alpha : 75


 94%|█████████▍| 94/100 [10:54<00:38,  6.40s/it]

building 94 || best score : 0.03584938619790484 || alpha : 100


 95%|█████████▌| 95/100 [11:07<00:41,  8.28s/it]

building 95 || best score : 0.023301545764485856 || alpha : 100


 96%|█████████▌| 96/100 [11:12<00:28,  7.23s/it]

building 96 || best score : 0.018465172447915457 || alpha : 100


 97%|█████████▋| 97/100 [11:22<00:24,  8.12s/it]

building 97 || best score : 0.024020304177918585 || alpha : 75


 98%|█████████▊| 98/100 [11:33<00:18,  9.20s/it]

building 98 || best score : 0.03578518664358602 || alpha : 100


 99%|█████████▉| 99/100 [11:41<00:08,  8.79s/it]

building 99 || best score : 0.024052575725348888 || alpha : 75


100%|██████████| 100/100 [11:52<00:00,  7.13s/it]

building 100 || best score : 0.027790464102279727 || alpha : 100





In [48]:
#seed별로 예측값이 바뀌니까 seed 0부터 6까지 평균내서
preds = np.array([])
for i in tqdm(range(100)):

    pred_df = pd.DataFrame()   # 시드별 예측값을 담을 data frame

    for seed in [0,1,2,3,4,5]: # 각 시드별 예측
        y_train = train.loc[train.num == i+1, 'power']
        x_train, x_test = train.loc[train.num == i+1, ].iloc[:, 3:], test.loc[test.num == i+1, ].iloc[:,1:]
        #x_test['power'] = 0 ####여기 이상 ..
        x_test = x_test[x_train.columns]
        '''
        xgb = XGBRegressor(seed = seed, n_estimators = best_it[i], eta = 0.01,
                           min_child_weight = xgb_params.iloc[i, 2], max_depth = xgb_params.iloc[i, 3],
                           colsample_bytree=xgb_params.iloc[i, 4], subsample=xgb_params.iloc[i, 5])
                           '''
        xgb = xgboost.XGBRegressor(n_estimators=100, learning_rate=0.08, gamma=0, subsample=0.75,
                           colsample_bytree=1, max_depth=7)
        '''
        if xgb_params.iloc[i,6] != 0:  # 만약 alpha가 0이 아니면 weighted_mse 사용
            xgb.set_params(**{'objective':weighted_mse(xgb_params.iloc[i,6])})
        '''
        xgb.fit(x_train, y_train)
        y_pred = xgb.predict(x_test)
        pred_df.loc[:,seed] = y_pred   # 각 시드별 예측 담기

    pred = pred_df.mean(axis=1)        # (i+1)번째 건물의 예측 =  (i+1)번째 건물의 각 시드별 예측 평균값
    preds = np.append(preds, pred)

100%|██████████| 100/100 [04:35<00:00,  2.76s/it]


In [49]:
submission = pd.read_csv('/content/drive/MyDrive/23energy/23data/sample_submission.csv')
submission['answer'] = preds
submission.to_csv('submission_xgb_0815.csv', index = False)

In [50]:
y_train = train.loc[train.num == 1, 'power']
x_train, x_test = train.loc[train.num == i+1, ].iloc[:, 3:], test.loc[test.num == i+1, ].iloc[:,1:]

In [51]:
train

Unnamed: 0,num,gion,wind,hum,power,type,total_area,cooling_area,month,day,weekday,dayofyear,holiday,sensory_temp,sin_time,cos_time,THI,CDH
0,1,18.6,0.9,42.0,1085.28,0,110634.00,39570.00,6,1,2,152,1.0,20.751584,0.000000,1.000000,49.6576,-7.4
1,1,18.0,1.1,45.0,1047.36,0,110634.00,39570.00,6,1,2,152,1.0,20.008954,0.258819,0.965926,47.7625,-15.4
2,1,17.7,1.5,45.0,974.88,0,110634.00,39570.00,6,1,2,152,1.0,19.476910,0.500000,0.866025,47.2225,-23.7
3,1,16.7,1.4,48.0,953.76,0,110634.00,39570.00,6,1,2,152,1.0,18.487958,0.707107,0.707107,44.7856,-33.0
4,1,18.4,2.8,43.0,986.40,0,110634.00,39570.00,6,1,2,152,1.0,19.751534,0.866025,0.500000,49.0061,-40.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203995,100,23.1,0.9,86.0,881.04,11,57497.84,40035.23,8,24,2,236,0.0,25.302757,-0.965926,0.258819,63.6624,-19.6
203996,100,22.4,1.3,86.0,798.96,11,57497.84,40035.23,8,24,2,236,0.0,24.446518,-0.866025,0.500000,62.4024,-20.2
203997,100,21.3,1.0,92.0,825.12,11,57497.84,40035.23,8,24,2,236,0.0,23.433400,-0.707107,0.707107,64.1976,-22.3
203998,100,21.0,0.3,94.0,640.08,11,57497.84,40035.23,8,24,2,236,0.0,23.661275,-0.500000,0.866025,65.0744,-25.1


In [52]:
test

Unnamed: 0,num,gion,wind,hum,type,total_area,cooling_area,month,day,weekday,dayofyear,holiday,sensory_temp,sin_time,cos_time,THI,CDH
0,1,23.5,2.2,72,0,110634.00,39570.00,8,25,3,237,0.0,25.397063,0.000000,1.000000,58.3456,-2.5
1,1,23.0,0.9,72,0,110634.00,39570.00,8,25,3,237,0.0,25.201620,0.258819,0.965926,57.4456,-5.5
2,1,22.7,1.5,75,0,110634.00,39570.00,8,25,3,237,0.0,24.699788,0.500000,0.866025,57.8725,-8.8
3,1,22.1,1.3,78,0,110634.00,39570.00,8,25,3,237,0.0,24.136018,0.707107,0.707107,57.9376,-12.7
4,1,21.8,1.0,77,0,110634.00,39570.00,8,25,3,237,0.0,23.942400,0.866025,0.500000,56.9961,-16.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16795,100,22.5,0.9,84,11,57497.84,40035.23,8,31,2,243,0.0,24.695934,-0.965926,0.258819,61.4824,-34.5
16796,100,20.7,0.4,95,11,57497.84,40035.23,8,31,2,243,0.0,23.253851,-0.866025,0.500000,65.2725,-34.4
16797,100,20.2,0.4,98,11,57497.84,40035.23,8,31,2,243,0.0,22.771886,-0.707107,0.707107,66.7056,-35.3
16798,100,20.1,1.1,97,11,57497.84,40035.23,8,31,2,243,0.0,22.159549,-0.500000,0.866025,65.7281,-36.8
