# pycaret을 이용한 가스공급량 예측
* 2019년 test 데이터 : '../data/test.csv'
* 2019년 제출 파일 : '../data/sample_submission.csv'
* 2013-2018년 가스공급량과 기온 자료 : '../2013-2018년_가스공급량_기온_습도_기압01.csv'
* 사용 특성 : 'month', 'day', '시간', '구분', '기온','습도','기압'
* 3개 모델

In [None]:
# 제출 점수 : 

## 데이터 가져오기
* 2019년 test 데이터 : '../data/test.csv'
* 2019년 제출 파일 : '../data/sample_submission.csv'
* 2013-2018년 가스공급량과 기온 자료 : '../2013-2018년_가스공급량_기온_습도_기압01.csv'

In [1]:
import pandas as pd

### 2013-2018년 가스공급량과 기온 자료

In [2]:
total = pd.read_csv('../2013-2018년_가스공급량_기온_습도_기압01.csv')
total.head()

Unnamed: 0,year,month,day,weekday,시간,구분,공급량,기온,습도,기압
0,2013,1,1,1,1,0,2497.129,-8.5,57.0,1010.0
1,2013,1,1,1,1,1,2169.093,-8.5,57.0,1010.0
2,2013,1,1,1,1,2,226.178,-8.5,57.0,1010.0
3,2013,1,1,1,1,3,1434.516,-8.5,57.0,1010.0
4,2013,1,1,1,1,4,3272.837,-8.5,57.0,1010.0


#### 2019년 데이터

In [3]:
test2019 = pd.read_csv('../data/test.csv')
test2019.head()

Unnamed: 0,일자|시간|구분
0,2019-01-01 01 A
1,2019-01-01 02 A
2,2019-01-01 03 A
3,2019-01-01 04 A
4,2019-01-01 05 A


In [4]:
test2019[['연월일', '시간', '구분']] = test2019['일자|시간|구분'].str.split(' ').tolist()
test2019.head()

Unnamed: 0,일자|시간|구분,연월일,시간,구분
0,2019-01-01 01 A,2019-01-01,1,A
1,2019-01-01 02 A,2019-01-01,2,A
2,2019-01-01 03 A,2019-01-01,3,A
3,2019-01-01 04 A,2019-01-01,4,A
4,2019-01-01 05 A,2019-01-01,5,A


In [5]:
# 구분 열을 숫자로 바꾸기
d_map = {}
for i, d in enumerate(test2019['구분'].unique()):
    d_map[d] = i
test2019['구분'] = test2019['구분'].map(d_map)
test2019.head()

Unnamed: 0,일자|시간|구분,연월일,시간,구분
0,2019-01-01 01 A,2019-01-01,1,0
1,2019-01-01 02 A,2019-01-01,2,0
2,2019-01-01 03 A,2019-01-01,3,0
3,2019-01-01 04 A,2019-01-01,4,0
4,2019-01-01 05 A,2019-01-01,5,0


In [6]:
# 연월일 열은 object형이므로 년, 월, 일로 나눈다.
test2019['연월일'] = pd.to_datetime(test2019['연월일'])
test2019['year'] = test2019['연월일'].dt.year
test2019['month'] = test2019['연월일'].dt.month
test2019['day'] = test2019['연월일'].dt.day
test2019['weekday'] = test2019['연월일'].dt.weekday
test2019.head()

Unnamed: 0,일자|시간|구분,연월일,시간,구분,year,month,day,weekday
0,2019-01-01 01 A,2019-01-01,1,0,2019,1,1,1
1,2019-01-01 02 A,2019-01-01,2,0,2019,1,1,1
2,2019-01-01 03 A,2019-01-01,3,0,2019,1,1,1
3,2019-01-01 04 A,2019-01-01,4,0,2019,1,1,1
4,2019-01-01 05 A,2019-01-01,5,0,2019,1,1,1


## 2019년 기온 예측하기
* 습도 예측
* 기압 예측

In [7]:
from pycaret.regression import *

#### 습도(Humidity) 예측

In [8]:
# 학습 특성 : 'month', 'day', 'weekday', '시간', '구분'
# 타겟 특성 : '습도'

humidity_prediction_model = load_model('humidity_prediction_model_01')

Transformation Pipeline and Model Successfully Loaded


In [15]:
%%time
df_for_temp = test2019[['month', 'day', 'weekday', '시간', '구분']]
humidity2019_pred = predict_model(humidity_prediction_model, data=df_for_temp)

Wall time: 2.69 s


In [16]:
humidity_pred.head()

Unnamed: 0,month,day,weekday,시간,구분,Label
0,1,1,1,1,0,64.463024
1,1,1,1,2,0,65.66405
2,1,1,1,3,0,65.659699
3,1,1,1,4,0,66.32069
4,1,1,1,5,0,67.903796


In [17]:
test2019.head()

Unnamed: 0,일자|시간|구분,연월일,시간,구분,year,month,day,weekday
0,2019-01-01 01 A,2019-01-01,1,0,2019,1,1,1
1,2019-01-01 02 A,2019-01-01,2,0,2019,1,1,1
2,2019-01-01 03 A,2019-01-01,3,0,2019,1,1,1
3,2019-01-01 04 A,2019-01-01,4,0,2019,1,1,1
4,2019-01-01 05 A,2019-01-01,5,0,2019,1,1,1


In [18]:
test2019['습도'] = humidity2019_pred['Label']
test2019.head()

Unnamed: 0,일자|시간|구분,연월일,시간,구분,year,month,day,weekday,습도
0,2019-01-01 01 A,2019-01-01,1,0,2019,1,1,1,64.463024
1,2019-01-01 02 A,2019-01-01,2,0,2019,1,1,1,65.66405
2,2019-01-01 03 A,2019-01-01,3,0,2019,1,1,1,65.659699
3,2019-01-01 04 A,2019-01-01,4,0,2019,1,1,1,66.32069
4,2019-01-01 05 A,2019-01-01,5,0,2019,1,1,1,67.903796


#### 기압 예측

In [23]:
# 학습 특성 : 'month', 'day', 'weekday', '시간', '구분'
# 타겟 특성 : '기압'

exp = setup(total, target='기압', ignore_features=['공급량', 'year', '기온', '습도'])

Unnamed: 0,Description,Value
0,session_id,4853
1,Target,기압
2,Original Data,"(368088, 10)"
3,Missing Values,False
4,Numeric Features,2
5,Categorical Features,3
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(257661, 28)"


In [25]:
%%time
pressure_model3 = compare_models(sort='MAPE', n_select=3)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
dt,Decision Tree Regressor,0.0059,0.0189,0.1298,0.9997,0.0001,0.0,0.83
rf,Random Forest Regressor,0.045,0.0096,0.098,0.9999,0.0001,0.0,51.215
et,Extra Trees Regressor,0.0787,0.025,0.158,0.9996,0.0002,0.0001,64.95
xgboost,Extreme Gradient Boosting,2.1009,7.3405,2.7093,0.8873,0.0027,0.0021,13.754
catboost,CatBoost Regressor,2.3363,8.7558,2.959,0.8656,0.0029,0.0023,21.093
lightgbm,Light Gradient Boosting Machine,2.9767,14.537,3.8127,0.7769,0.0038,0.003,1.032
knn,K Neighbors Regressor,3.2381,18.0498,4.2485,0.723,0.0042,0.0032,6.72
gbr,Gradient Boosting Regressor,3.5052,20.5728,4.5357,0.6843,0.0045,0.0035,19.546
lr,Linear Regression,3.7266,23.0261,4.7985,0.6466,0.0048,0.0037,0.761
ridge,Ridge Regression,3.7261,23.0018,4.796,0.647,0.0048,0.0037,0.097


Wall time: 35min 34s


In [26]:
%%time
blended_pressure_model3 = blend_models(estimator_list=pressure_model3)

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,0.0379,0.0052,0.0721,0.9999,0.0001,0.0
1,0.04,0.0091,0.0953,0.9999,0.0001,0.0
2,0.0381,0.0054,0.0735,0.9999,0.0001,0.0
3,0.0384,0.0096,0.0978,0.9999,0.0001,0.0
4,0.0402,0.0079,0.0888,0.9999,0.0001,0.0
5,0.0391,0.0074,0.0862,0.9999,0.0001,0.0
6,0.0393,0.006,0.0773,0.9999,0.0001,0.0
7,0.0403,0.0091,0.0956,0.9999,0.0001,0.0
8,0.0397,0.0073,0.0852,0.9999,0.0001,0.0
9,0.0398,0.0069,0.0831,0.9999,0.0001,0.0


In [27]:
%%time
final_pressure_model3 = finalize_model(blended_pressure_model3)

In [28]:
pressure_pred = predict_model(final_pressure_model3, data=test2019)

In [29]:
pressure_pred.head()

Unnamed: 0,일자|시간|구분,연월일,시간,구분,year,month,day,weekday,습도,Label
0,2019-01-01 01 A,2019-01-01,1,0,2019,1,1,1,64.463024,1010.0
1,2019-01-01 02 A,2019-01-01,2,0,2019,1,1,1,65.66405,1009.401358
2,2019-01-01 03 A,2019-01-01,3,0,2019,1,1,1,65.659699,1009.200012
3,2019-01-01 04 A,2019-01-01,4,0,2019,1,1,1,66.32069,1008.198012
4,2019-01-01 05 A,2019-01-01,5,0,2019,1,1,1,67.903796,1007.294988


In [61]:
test2019['기압'] = pressure_pred['Label']
test2019.head()

Unnamed: 0,일자|시간|구분,연월일,시간,구분,year,month,day,weekday,습도,기압,기온
0,2019-01-01 01 A,2019-01-01,1,0,2019,1,1,1,64.463024,1010.0,2.807896
1,2019-01-01 02 A,2019-01-01,2,0,2019,1,1,1,65.66405,1009.401358,0.471236
2,2019-01-01 03 A,2019-01-01,3,0,2019,1,1,1,65.659699,1009.200012,0.052972
3,2019-01-01 04 A,2019-01-01,4,0,2019,1,1,1,66.32069,1008.198012,1.895545
4,2019-01-01 05 A,2019-01-01,5,0,2019,1,1,1,67.903796,1007.294988,3.859672


#### 기온예측

In [62]:
# 학습 특성 : 'month', 'day', 'weekday', '시간', '구분', '습도', '기압'
# 타겟 특성 : '기온'

exp = setup(total, target='기온', ignore_features=['공급량', 'year'])

Unnamed: 0,Description,Value
0,session_id,4516
1,Target,기온
2,Original Data,"(368088, 10)"
3,Missing Values,False
4,Numeric Features,4
5,Categorical Features,3
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(257661, 30)"


In [None]:
%%time
# 07-05_결과_출력(pycaret)_ver0.1(2021.11.15) 참고 모델 선택(결정트리모델은 과대적합으로 판단되어 제외)
temp_models3 = compare_models(sort='MAPE',n_select=3, include=['knn','catboost','lightgbm'])

IntProgress(value=0, description='Processing: ', max=19)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
knn,K Neighbors Regressor,0.7257,2.9015,1.7032,0.9771,0.2201,0.2143,3.693


In [None]:
%%time
tuned_temp_models3 = [tune_model(model) for model in temp_models3]

In [None]:
%%time
blend_models3 = blend_models(estimator_list=tuned_temp_models3)

In [None]:
%%time
temp_prediction_model = finalize_model(blend_models3)

In [None]:
save_model(temp_prediction_model, 'temp_prediction_model01')

In [None]:
temp_pred = predict_model(temp_prediction_model, data=test2019)
temp_pred.head()

In [None]:
test2019['기온'] = temp_pred['Label']
test2019.head()

### 가스 공급량 예측

In [None]:
# 학습 특성 : 'month', 'day', 'weekday', '시간', '구분', '습도', '기압', '기온'
exp = setup(total, target='공급량', ignore_features=['year'])

In [None]:
# 모델 비교

compare_models(sort='MAPE', exclude=['lr','lasso','ridge','rf','dt','et'])

In [None]:
cat = create_model('catboost')
lgbm = create_model('lightgbm')
xgb = create_model('xgboost')

In [None]:
%%time
tuned_cat = tune_model(cat)
tuned_lgbm = tune_model(lgbm)
tuned_xgb = tune_model(xgb)

In [None]:
%%time
blend_model = blend_models(estimators=[tuned_cat, tuned_lgbm, tuned_xgb])

In [None]:
%%time
gas_prediction_model = finalize_model(blend_model)

In [None]:
save_model(gas_prediction_model, 'gas_prediction_model01')

In [None]:
gas_pred = predict_model(gas_prediction_model, data=test2019)
gas_pred.head()

### 제출 파일 만들기

In [None]:
sub = pd.read_csv('../data/sample_submission.csv')
sub.head()

In [None]:
sub['공급량'] = gas_pred['Label']
sub.head()

In [None]:
sub.info()

In [None]:
sub.to_csv("sub13_pycaret01.csv", index=False)