# 제2회 통신망 안정성 확보를 위한 인공지능 해커톤
# 분야 1 :무선 기지국 장비의 통계 데이터를 활용한 인구 밀집도 예측  

- 출제 배경<br>
    : 많은 사람들이 특정 시간/장소에 모이면 무선 통신 품질의 저하와 함께 각종 사고 위험으로부터 안전할 수 없게 됩니다. 통신사업자의 무선 통신 기지국에서는 서비스를 제공하는 단말의 통계 정보를 통해 인구의 밀집 정도를 파악 가능합니다. 무선 네트워크의 성능을 안정적으로 유지하고 인구 밀집도를 완화하기 위해, AI를 활용하여 특정 축제 지역 내의 인구 수를 예측하고자 합니다.
- 학습 데이터<br>
    : 무선 기지국 장비인 RU(Radio Unit)에서는 서비스 지역인 셀 내에 존재하는 단말들의 통계 데이터를 관리합니다. 본 데이터에서는 업링크/다운링크 데이터 크기 및 블록 오류율(BLER), 셀 내 평균 수신 신호 강도(RSSI) 및 사용자 단말 수 등의 각종 통계 정보가 5분 단위로 제공됩니다.
- 문제 구성 및 풀이 요령<br>
    : 축제 지역 근방 10곳의 기지국에서 축제 기간 전후에 해당하는 모든 타임스탬프에 대해 인구 수를 예측하는 회귀 문제입니다. 다양한 데이터 컬럼 중 인구 수 변화에 유의미한 값을 선별하고 시계열 데이터의 주기성을 고려하는 것이 중요합니다. 인구 1인당 단말 1대를 가지고 있다고 가정합니다.
- 채점 기준<br>
    : 평균 절대 오차(MAE)

# Import

In [None]:
# # AUTOML 관련 패키지 설치
# !pip install autogluon
# # 설치 후 런타임 재시작

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import numpy as np
import warnings
from autogluon.tabular import TabularDataset, TabularPredictor # autogluon

pd.options.display.max_columns = None
warnings.filterwarnings(action='ignore')
%config InlineBackend.figure_format = 'retina'

# 해당 경로는 저장 위치에 따라 변경
%cd '/content/drive/MyDrive/colab/제2회 통신망 안정성 확보를 위한 인공지능 해커톤/Q1'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/colab/제2회 통신망 안정성 확보를 위한 인공지능 해커톤/Q1


# Submit 1 : 상관관계 높은 컬럼 사용 / 로그 스케일링 / 이상치 제거 / 정규화

## 01) data set load

In [None]:
train_merge_f = pd.read_csv('preprocessed_data/submit1_train.csv', index_col=0)
x_test_merge_f = pd.read_csv('preprocessed_data/submit1_test.csv', index_col=0)

In [None]:
train_merge_f.head()

Unnamed: 0,scgfail,erabaddatt,erabaddsucc,endcaddatt,endcaddsucc,endcmodbymenbatt,endcmodbymenbsucc,endcmodbysgnbatt,endcmodbysgnbsucc,connestabatt,connestabsucc,redirectiontolte_coverageout,redirectiontolte_epsfallback,handoveratt,handoversucc,reestabatt,reestabsucc,rlculbyte,rlcdlbyte,totprbulavg,totprbdlavg,dlreceivedriavg,airmaculbyte,airmacdlbyte,rachpreamblea,numrar,nummsg3,attpaging,rssipathavg,dlreceivedcqiavg,endcrelbymenb,uenomax
0,-0.650928,-0.832874,-0.832691,-0.096652,-0.096512,-1.790962,-1.552873,-3.350626,-3.353591,-0.268361,-0.264663,-0.418631,-0.324742,-1.738269,-1.73541,0,0,-3.713571,-3.83482,-1.315288,-1.154916,-0.524373,-2.750116,-2.937247,-1.066092,-1.066092,-1.201694,-0.975277,-1.160101,-0.817691,-1.330158,1
1,-0.657152,0.324175,0.324511,0.682312,0.682469,1.310287,1.618175,1.100066,1.077706,-0.986157,-0.983742,-0.511178,-0.235394,-0.98475,-0.984162,0,0,0.255654,0.472608,0.033991,0.222232,-0.949112,-0.033831,0.48998,1.216417,1.216417,0.368558,-0.976752,-0.626002,-0.573283,1.419193,5
2,1.030015,-0.426198,-0.425954,-0.424311,-0.424243,0.612832,0.631136,-0.628696,-0.615033,-2.072921,-2.070899,1.000958,-0.479227,-1.311945,-1.309987,0,0,0.631021,0.388912,0.515251,-0.045995,1.262533,0.700004,0.306421,-0.26889,-0.26889,-0.44687,-1.058652,-1.035583,-0.283147,-0.633761,6
3,-0.406465,-1.114238,-1.113994,-0.976695,-0.976614,-0.826929,-0.781595,-1.818113,-1.813472,1.005675,1.008167,-0.312775,-0.183432,-0.688403,-0.687824,0,0,-0.487264,-0.623035,-0.544307,-0.590302,-0.708825,-0.599738,-0.756642,2.114622,2.114622,-0.438913,-0.859361,-2.259611,0.915922,-0.575646,1
4,-0.561473,0.803532,0.803636,0.974515,0.974593,0.59381,0.596539,0.709139,0.72125,-0.815109,-0.813346,-0.246645,-0.207081,0.780193,0.780557,0,0,0.306237,0.848377,-0.019062,0.465122,-0.507213,0.144306,0.884744,0.554075,0.554075,0.72907,-1.020477,-1.54614,1.869399,0.963641,4


In [None]:
x_test_merge_f.head()

Unnamed: 0,scgfail,erabaddatt,erabaddsucc,endcaddatt,endcaddsucc,endcmodbymenbatt,endcmodbymenbsucc,endcmodbysgnbatt,endcmodbysgnbsucc,connestabatt,connestabsucc,redirectiontolte_coverageout,redirectiontolte_epsfallback,handoveratt,handoversucc,reestabatt,reestabsucc,rlculbyte,rlcdlbyte,totprbulavg,totprbdlavg,dlreceivedriavg,airmaculbyte,airmacdlbyte,rachpreamblea,numrar,nummsg3,attpaging,rssipathavg,dlreceivedcqiavg,endcrelbymenb
0,-0.667542,-0.481082,-0.480596,-0.257629,-0.257425,-0.718056,-0.67018,-1.382757,-1.37972,0.010077,0.011238,-0.226592,-0.171584,1.982971,1.987162,0,0,-1.56221,-1.751742,-0.410825,-0.523185,-1.070125,-0.602207,-0.271383,0.375598,0.375598,-0.332304,-0.859361,-0.318702,-0.011695,-0.577303
1,-0.42704,-1.454413,-1.453854,-1.349899,-1.349338,-1.000441,-0.916495,-1.413417,-1.410143,-0.469494,-0.468596,-0.128414,-0.086296,-0.447929,-0.447758,0,0,-1.509133,-1.624831,-0.469801,-0.63337,0.27868,-1.920447,-1.815504,-1.46793,-1.46793,-1.201107,-1.020477,-0.298595,-1.559757,-1.002757
2,-0.667542,-1.004315,-1.003808,-1.144738,-1.144495,-0.718056,-0.67018,-0.328479,-0.324267,0.010077,0.011238,-0.226592,-0.171584,-0.323645,-0.322739,0,0,0.52945,-0.234396,-0.410825,-0.523185,2.216891,0.32441,-0.267118,-0.652779,-0.652779,0.489501,-0.91968,-0.415243,0.862119,-0.577303
3,-0.42704,0.447827,0.448464,0.530739,0.531318,-1.000441,-0.916495,0.464955,0.470439,-0.469494,-0.468596,-0.128414,-0.086296,-0.447929,-0.447758,0,0,0.132145,-0.138808,-0.469801,-0.63337,-0.835845,-0.060003,-0.204914,0.089064,0.089064,0.278165,-1.084683,-0.298595,-0.75017,0.398415
4,-0.667542,-0.716359,-0.715864,-0.49097,-0.490755,-0.718056,-0.67018,-0.916231,-0.912675,0.57304,0.574515,-0.226592,-0.171584,-0.323645,-0.322739,0,0,0.251629,-0.391863,-0.410825,-0.523185,-0.193587,-0.107811,-0.508853,-0.065758,-0.065758,-0.506874,-0.984066,-0.415243,0.586177,-0.577303


In [None]:
# train set, test set autogluon input dataset 으로 변경
train_data = TabularDataset(train_merge_f)
test_data = TabularDataset(x_test_merge_f)

## 02) autogluon modeling

In [None]:
# 모델 서칭
save_path = 'aj_predictmodel1' # 모델.pkl 파일 저장 경로
predictor = TabularPredictor(label='uenomax', problem_type='regression', path=save_path, eval_metric='mean_absolute_error').fit(train_data, presets='best_quality', num_bag_folds=5, num_bag_sets=1) # 예측 모델 찾기

Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=5, num_bag_sets=1
	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "aj_predictmodel1/"
AutoGluon Version:  0.8.2
Python Version:     3.10.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Jun 9 10:57:30 UTC 2023
Disk Space Avail:   33.55 GB / 107.37 GB (31.2%)
Train Data Rows:    113432
Train Data Columns: 31
Label Column: uenomax
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    52941.52 MB
	Train Data (Original)  Memory Usage: 28.13 MB (0.1% of available memory)
	Inferring data type of each feature based on column values.

In [None]:
# best model load
predictor = TabularPredictor.load('aj_predictmodel1/')

In [None]:
# 개별 모델의 train 성능 확인
ld_board = predictor.leaderboard(train_data, silent=True)
ld_board=ld_board.sort_values(by='score_val',ascending=False)

In [None]:
ld_board

Unnamed: 0,model,score_test,score_val,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
4,WeightedEnsemble_L3,-0.4321677,-0.578554,200.571017,167.213305,2225.167214,0.018635,0.002272,1.60052,3,True,22
3,NeuralNetTorch_BAG_L2,-0.4317649,-0.578848,192.718829,152.767797,1899.581012,5.376245,1.106852,509.167055,2,True,20
16,WeightedEnsemble_L2,-0.5193677,-0.587493,63.343241,48.391464,1055.487601,0.01149,0.002311,1.92553,2,True,12
17,NeuralNetTorch_BAG_L1,-0.5313509,-0.589308,2.558141,0.815364,510.86158,2.558141,0.815364,510.86158,1,True,10
11,NeuralNetFastAI_BAG_L2,-0.4869839,-0.601192,203.957915,154.00983,1564.039213,16.615332,2.348884,173.625256,2,True,18
13,XGBoost_BAG_L2,-0.4907488,-0.601483,189.405494,152.437626,1398.198838,2.062911,0.776681,7.784882,2,True,19
12,LightGBM_BAG_L2,-0.4877279,-0.603856,188.89463,152.553315,1399.567073,1.552047,0.89237,9.153116,2,True,14
9,CatBoost_BAG_L2,-0.4808473,-0.604611,187.550742,151.736341,1418.772259,0.208159,0.075395,28.358302,2,True,16
7,LightGBMLarge_BAG_L2,-0.477485,-0.605125,191.616407,153.553452,1409.227131,4.273824,1.892506,18.813174,2,True,21
6,ExtraTreesMSE_BAG_L2,-0.4755182,-0.606215,191.190409,158.71661,1438.707139,3.847826,7.055665,48.293183,2,True,17


In [None]:
# predict
y_test = predictor.predict(test_data)

In [None]:
y_test.head()

0    3.039357
1    1.184619
2    3.762382
3    3.470598
4    3.671516
Name: uenomax, dtype: float32

## 03) make submit file

In [None]:
# x_test + y_test
test = pd.read_csv('raw_data/Q1_test.csv')
pred = test.loc[:,['ru_id']]
pred['pred'] = y_test

# make submit file
submit = pd.read_csv('raw_data/Q1_label_sample.csv')
submit['BaseStationB']=pred.loc[pred['ru_id']=='BaseStationB',['pred']].reset_index(drop=True)
submit['BaseStationJ']=pred.loc[pred['ru_id']=='BaseStationJ',['pred']].reset_index(drop=True)

# 함수 돌리기 전에 순서바꿔야함
submit.to_csv('submission_data/submit1.csv')
submit.head()

Unnamed: 0,datetime,BaseStationB,BaseStationJ
0,2023-05-19 00:00:00,1.184619,3.039357
1,2023-05-19 00:05:00,3.470598,3.762382
2,2023-05-19 00:10:00,3.71602,3.671516
3,2023-05-19 00:15:00,2.945974,4.099239
4,2023-05-19 00:20:00,3.294462,4.687873


## 제출 결과 : 1.532884604점

# Submit2 : 상관관계 높은 컬럼 사용 / 로그 스케일링 / 정규화

## 01) data set load

In [None]:
train_merge_f = pd.read_csv('preprocessed_data/submit2_train.csv', index_col=0)
x_test_merge_f = pd.read_csv('preprocessed_data/submit2_test.csv', index_col=0)

In [None]:
train_merge_f.head()

Unnamed: 0,scgfail,erabaddatt,erabaddsucc,endcaddatt,endcaddsucc,endcmodbymenbatt,endcmodbymenbsucc,endcmodbysgnbatt,endcmodbysgnbsucc,connestabatt,connestabsucc,redirectiontolte_coverageout,redirectiontolte_epsfallback,handoveratt,handoversucc,reestabatt,reestabsucc,rlculbyte,rlcdlbyte,totprbulavg,totprbdlavg,dlreceivedriavg,airmaculbyte,airmacdlbyte,rachpreamblea,numrar,nummsg3,attpaging,rssipathavg,dlreceivedcqiavg,endcrelbymenb,uenomax
0,-0.642055,-0.832913,-0.83273,-0.096805,-0.096665,-1.788567,-1.531903,-3.349084,-3.352038,-0.26858,-0.264883,-0.418631,-0.324742,-1.738269,-1.73541,0,0,-3.682557,-3.832811,-0.944992,-0.924894,-0.5257,-2.722273,-2.93482,-1.065644,-1.065644,-1.200997,-0.975277,-0.622757,-0.817923,-1.330158,1
1,-0.633315,0.323117,0.323452,0.680445,0.680602,1.310287,1.606385,1.097706,1.074189,-0.986157,-0.983742,-0.511178,-0.235394,-0.984477,-0.983888,0,0,0.2478,0.469808,-0.11888,-0.00531,-0.940073,-0.043042,0.484373,1.216417,1.216417,0.368558,-0.976752,-0.29975,-0.574049,1.415736,5
2,0.992809,-0.426198,-0.425954,-0.424311,-0.424243,0.612405,0.629271,-0.628696,-0.615033,-2.072355,-2.070336,0.980891,-0.479227,-1.311945,-1.309987,0,0,0.58154,0.386094,0.245136,-0.09947,1.25079,0.642649,0.303265,-0.269324,-0.269324,-0.44687,-1.058652,-0.695473,-0.291506,-0.63381,6
3,-0.406465,-1.114238,-1.113994,-0.976695,-0.976614,-0.826728,-0.778351,-1.817508,-1.812862,0.996785,0.999245,-0.312775,-0.183432,-0.684746,-0.684214,0,0,-0.487264,-0.623035,-0.4546,-0.442444,-0.71896,-0.599815,-0.756642,2.061341,2.061341,-0.438986,-0.859361,-0.439955,0.915922,-0.575182,1
4,-0.556887,0.803532,0.803636,0.974515,0.974593,0.586366,0.370437,0.708844,0.72095,-0.815109,-0.813346,-0.246645,-0.207081,0.613963,0.615916,0,0,0.304983,0.848377,-0.111756,0.13598,-0.511826,0.139361,0.884744,0.542622,0.542622,0.723794,-1.020477,-0.463952,1.735165,0.963641,4


In [None]:
x_test_merge_f.head()

Unnamed: 0,scgfail,erabaddatt,erabaddsucc,endcaddatt,endcaddsucc,endcmodbymenbatt,endcmodbymenbsucc,endcmodbysgnbatt,endcmodbysgnbsucc,connestabatt,connestabsucc,redirectiontolte_coverageout,redirectiontolte_epsfallback,handoveratt,handoversucc,reestabatt,reestabsucc,rlculbyte,rlcdlbyte,totprbulavg,totprbdlavg,dlreceivedriavg,airmaculbyte,airmacdlbyte,rachpreamblea,numrar,nummsg3,attpaging,rssipathavg,dlreceivedcqiavg,endcrelbymenb
0,-0.667542,-0.481082,-0.480596,-0.257629,-0.257425,-0.718056,-0.67018,-1.382757,-1.37972,0.010077,0.011238,-0.226592,-0.171584,1.982971,1.987162,0,0,-1.56221,-1.751742,-0.410825,-0.523185,-1.070125,-0.602207,-0.271383,0.375598,0.375598,-0.332304,-0.859361,-0.318702,-0.011695,-0.577303
1,-0.42704,-1.454413,-1.453854,-1.349899,-1.349338,-1.000441,-0.916495,-1.413417,-1.410143,-0.469494,-0.468596,-0.128414,-0.086296,-0.447929,-0.447758,0,0,-1.509133,-1.624831,-0.469801,-0.63337,0.27868,-1.920447,-1.815504,-1.46793,-1.46793,-1.201107,-1.020477,-0.298595,-1.559757,-1.002757
2,-0.667542,-1.004315,-1.003808,-1.144738,-1.144495,-0.718056,-0.67018,-0.328479,-0.324267,0.010077,0.011238,-0.226592,-0.171584,-0.323645,-0.322739,0,0,0.52945,-0.234396,-0.410825,-0.523185,2.216891,0.32441,-0.267118,-0.652779,-0.652779,0.489501,-0.91968,-0.415243,0.862119,-0.577303
3,-0.42704,0.447827,0.448464,0.530739,0.531318,-1.000441,-0.916495,0.464955,0.470439,-0.469494,-0.468596,-0.128414,-0.086296,-0.447929,-0.447758,0,0,0.132145,-0.138808,-0.469801,-0.63337,-0.835845,-0.060003,-0.204914,0.089064,0.089064,0.278165,-1.084683,-0.298595,-0.75017,0.398415
4,-0.667542,-0.716359,-0.715864,-0.49097,-0.490755,-0.718056,-0.67018,-0.916231,-0.912675,0.57304,0.574515,-0.226592,-0.171584,-0.323645,-0.322739,0,0,0.251629,-0.391863,-0.410825,-0.523185,-0.193587,-0.107811,-0.508853,-0.065758,-0.065758,-0.506874,-0.984066,-0.415243,0.586177,-0.577303


In [None]:
# train set, test set autogluon input dataset 으로 변경
train_data = TabularDataset(train_merge_f)
test_data = TabularDataset(x_test_merge_f)

## 02) autogluon modeling

In [None]:
# 모델 서칭
save_path = 'aj_predictmodel2' # 모델.pkl 파일 저장 경로
predictor = TabularPredictor(label='uenomax', problem_type='regression', path=save_path, eval_metric='mean_absolute_error').fit(train_data, presets='best_quality', num_bag_folds=5, num_bag_sets=1) # 예측 모델 찾기

Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=5, num_bag_sets=1
	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "aj_predictmodel2/"
AutoGluon Version:  0.8.2
Python Version:     3.10.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Jun 9 10:57:30 UTC 2023
Disk Space Avail:   33.55 GB / 107.37 GB (31.2%)
Train Data Rows:    137445
Train Data Columns: 31
Label Column: uenomax
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    52111.87 MB
	Train Data (Original)  Memory Usage: 34.09 MB (0.1% of available memory)
	Inferring data type of each feature based on column values.

In [None]:
# best model load
predictor = TabularPredictor.load('aj_predictmodel2/')

In [None]:
# 개별 모델의 train 성능 확인
ld_board = predictor.leaderboard(train_data, silent=True)
ld_board=ld_board.sort_values(by='score_val',ascending=False)

In [None]:
ld_board

Unnamed: 0,model,score_test,score_val,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
9,WeightedEnsemble_L3,-0.4951916,-0.627559,363.20395,315.671498,2403.36062,0.012198,0.00265,1.849376,3,True,22
12,WeightedEnsemble_L2,-0.4979247,-0.628536,245.973893,227.457402,1324.266218,0.014046,0.00273,2.204195,2,True,12
14,XGBoost_BAG_L2,-0.5034558,-0.629452,321.440856,290.989713,1625.202433,3.521426,0.641585,7.874076,2,True,19
13,LightGBM_BAG_L2,-0.5010171,-0.630508,319.934978,291.511202,1627.541456,2.015548,1.163073,10.213099,2,True,14
6,LightGBMLarge_BAG_L2,-0.4911636,-0.631768,322.642261,292.594837,1637.775005,4.72283,2.246709,20.446647,2,True,21
5,ExtraTreesMSE_BAG_L2,-0.4910376,-0.631848,325.916983,299.696513,1677.275043,7.997553,9.348385,59.946685,2,True,17
10,CatBoost_BAG_L2,-0.4961332,-0.632454,318.286574,290.460847,1691.186514,0.367144,0.112719,73.858156,2,True,16
4,RandomForestMSE_BAG_L2,-0.4887627,-0.634455,322.928106,300.317856,1961.271584,5.008676,9.969727,343.943226,2,True,15
8,LightGBMXT_BAG_L2,-0.4949593,-0.634783,330.623857,299.105845,1645.783119,12.704427,8.757717,28.454762,2,True,13
17,CatBoost_BAG_L1,-0.5522067,-0.63612,1.260764,0.324276,442.4587,1.260764,0.324276,442.4587,1,True,6


In [None]:
# predict
y_test = predictor.predict(test_data)

In [None]:
y_test.head()

0    2.428789
1    1.397500
2    3.464953
3    2.869492
4    3.046493
Name: uenomax, dtype: float32

## 03) make submit file

In [None]:
# x_test + y_test
test = pd.read_csv('raw_data/Q1_test.csv')
pred = test.loc[:,['ru_id']]
pred['pred'] = y_test

# make submit file
submit = pd.read_csv('raw_data/Q1_label_sample.csv')
submit['BaseStationB']=pred.loc[pred['ru_id']=='BaseStationB',['pred']].reset_index(drop=True)
submit['BaseStationJ']=pred.loc[pred['ru_id']=='BaseStationJ',['pred']].reset_index(drop=True)

# 함수 돌리기 전에 순서바꿔야함
submit.to_csv('submission_data/submit2.csv')
submit.head()

Unnamed: 0,datetime,BaseStationB,BaseStationJ
0,2023-05-19 00:00:00,1.3975,2.428789
1,2023-05-19 00:05:00,2.869492,3.464953
2,2023-05-19 00:10:00,2.683626,3.046493
3,2023-05-19 00:15:00,2.165024,3.529145
4,2023-05-19 00:20:00,2.393902,4.188874


## 제출 결과 : 1.2805663609점

# Submit3 : 상관관계 높은 컬럼 사용 / 로그 스케일링 / 로버스트 스케일링

## 01) data set load

In [None]:
train_merge_f = pd.read_csv('preprocessed_data/submit3_train.csv', index_col=0)
x_test_merge_f = pd.read_csv('preprocessed_data/submit3_test.csv', index_col=0)

In [None]:
train_merge_f.head()

Unnamed: 0,scgfail,erabaddatt,erabaddsucc,endcaddatt,endcaddsucc,endcmodbymenbatt,endcmodbymenbsucc,endcmodbysgnbatt,endcmodbysgnbsucc,connestabatt,connestabsucc,redirectiontolte_coverageout,redirectiontolte_epsfallback,handoveratt,handoversucc,reestabatt,reestabsucc,rlculbyte,rlcdlbyte,totprbulavg,totprbdlavg,dlreceivedriavg,airmaculbyte,airmacdlbyte,rachpreamblea,numrar,nummsg3,attpaging,rssipathavg,dlreceivedcqiavg,endcrelbymenb,uenomax
0,0.0,-0.884523,-0.884523,-0.206003,-0.206003,-1.482307,-1.0,-2.734545,-2.734545,-0.30627,-0.30627,0.0,0.0,-1.160964,-1.160964,0.0,0.0,-3.670964,-4.399198,-1.095376,-0.718677,-0.4,-2.629482,-3.209006,-0.865209,-0.865209,-1.090766,-0.54294,-0.7,-0.818182,-0.5,1
1,0.0,0.103093,0.103093,0.43854,0.43854,1.014162,1.26186,0.773047,0.765527,-0.834044,-0.834044,0.0,0.0,-0.63093,-0.63093,0.0,0.0,0.026671,0.284699,0.0,0.199207,-0.666667,-0.193652,0.305302,0.751982,0.751982,0.147612,-0.544025,-0.4,-0.533333,0.834044,5
2,1.0,-0.603499,-0.603499,-0.542636,-0.542636,0.417311,0.442507,-0.833803,-0.833803,-1.571069,-1.571069,1.0,0.0,-1.26186,-1.26186,0.0,0.0,0.459742,0.172017,0.383373,0.093114,1.0,0.47457,0.071059,-0.383695,-0.383695,-0.572062,-0.571689,-0.6,-0.3,-0.602888,6
3,0.0,-0.912489,-0.912489,-0.834044,-0.834044,0.0,0.0,-1.63093,-1.464974,0.63093,0.63093,0.0,0.0,0.0,0.0,0.0,0.0,-0.353912,-0.439983,0.0,0.0,-0.714286,-0.526745,-0.58551,1.576382,1.576382,-0.439174,-0.466526,-2.0,0.758621,0.0,1
4,0.0,0.528462,0.528462,0.695977,0.695977,1.0,1.0,0.460209,0.460209,-0.553295,-0.553295,0.0,0.0,1.0,1.0,0.0,0.0,0.041314,0.524792,0.0,0.498862,-0.4,-0.020114,0.53328,0.324553,0.324553,0.489374,-0.555705,-1.0,1.5,0.684582,4


In [None]:
x_test_merge_f.head()

Unnamed: 0,scgfail,erabaddatt,erabaddsucc,endcaddatt,endcaddsucc,endcmodbymenbatt,endcmodbymenbsucc,endcmodbysgnbatt,endcmodbysgnbsucc,connestabatt,connestabsucc,redirectiontolte_coverageout,redirectiontolte_epsfallback,handoveratt,handoversucc,reestabatt,reestabsucc,rlculbyte,rlcdlbyte,totprbulavg,totprbdlavg,dlreceivedriavg,airmaculbyte,airmacdlbyte,rachpreamblea,numrar,nummsg3,attpaging,rssipathavg,dlreceivedcqiavg,endcrelbymenb
0,0.0,-0.36907,-0.36907,-0.244077,-0.244077,0.0,0.0,-1.062875,-1.140314,0.0,0.0,0.0,0.0,0.693147,0.693147,0.0,0.0,-1.07159,-1.365561,0.0,0.0,-1.666667,-0.491706,-0.361872,0.187483,0.187483,-0.33718,-0.466526,-1.5,-0.035714,0.0
1,0.0,-1.298091,-1.298091,-1.362611,-1.362611,-0.710382,-0.63093,-1.247567,-1.247567,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.985529,-1.334135,0.0,-0.513164,0.2,-1.451921,-1.47573,-1.390743,-1.390743,-1.185885,-0.555705,-1.0,-1.428571,-0.876759
2,0.0,-0.73814,-0.73814,-0.83216,-0.83216,0.0,0.0,-0.285471,-0.30627,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.19784,-0.360217,0.0,0.0,3.333333,0.130214,-0.359057,-0.507757,-0.507757,0.328393,-0.497886,-2.0,0.642857,0.0
3,0.0,0.201181,0.201181,0.310133,0.310133,-0.710382,-0.63093,0.231563,0.231563,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.04105,-0.338622,0.0,-0.513164,-0.8,-0.15285,-0.379105,-0.054164,-0.054164,0.06181,-0.588724,-1.0,-0.714286,0.223472
4,0.0,-0.535026,-0.535026,-0.398763,-0.398763,0.0,0.0,-0.718868,-0.771244,0.36907,0.36907,0.0,0.0,0.0,0.0,0.0,0.0,0.029231,-0.464549,0.0,0.0,-0.333333,-0.159881,-0.518627,-0.110899,-0.110899,-0.478564,-0.53136,-2.0,0.428571,0.0


In [None]:
# train set, test set autogluon input dataset 으로 변경
train_data = TabularDataset(train_merge_f)
test_data = TabularDataset(x_test_merge_f)

## 02) autogluon modeling

In [None]:
# 모델 서칭
save_path = 'aj_predictmodel3' # 모델.pkl 파일 저장 경로
predictor = TabularPredictor(label='uenomax', problem_type='regression', path=save_path, eval_metric='mean_absolute_error').fit(train_data, presets='best_quality', num_bag_folds=5, num_bag_sets=1) # 예측 모델 찾기

Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=5, num_bag_sets=1
	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "aj_predictmodel3/"
AutoGluon Version:  0.8.2
Python Version:     3.10.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Jun 9 10:57:30 UTC 2023
Disk Space Avail:   33.54 GB / 107.37 GB (31.2%)
Train Data Rows:    137445
Train Data Columns: 31
Label Column: uenomax
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    51478.9 MB
	Train Data (Original)  Memory Usage: 34.09 MB (0.1% of available memory)
	Inferring data type of each feature based on column values. 

In [None]:
# best model load
predictor = TabularPredictor.load('aj_predictmodel3/')

In [None]:
# 개별 모델의 train 성능 확인
ld_board = predictor.leaderboard(train_data, silent=True)
ld_board=ld_board.sort_values(by='score_val',ascending=False)

In [None]:
ld_board

Unnamed: 0,model,score_test,score_val,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
8,WeightedEnsemble_L3,-0.4655779,-0.64177,667.095204,764.853093,3307.180932,0.012337,0.00314,1.923252,3,True,22
14,XGBoost_BAG_L2,-0.4782557,-0.645147,635.90666,740.813404,2563.128344,2.506882,1.083516,17.575116,2,True,19
12,LightGBMLarge_BAG_L2,-0.4715883,-0.645202,637.452406,742.411726,2568.388685,4.052628,2.681839,22.835457,2,True,21
13,LightGBM_BAG_L2,-0.4780481,-0.645476,635.753711,741.314408,2559.924522,2.353933,1.584521,14.371295,2,True,14
4,ExtraTreesMSE_BAG_L2,-0.4592588,-0.645891,640.506251,748.909875,2603.460072,7.106473,9.179987,57.906844,2,True,17
10,CatBoost_BAG_L2,-0.4685131,-0.646259,633.996556,739.890941,2676.529166,0.596778,0.161053,130.975939,2,True,16
9,LightGBMXT_BAG_L2,-0.4677781,-0.647852,657.758095,756.833251,2593.534142,24.358317,17.103363,47.980915,2,True,13
6,RandomForestMSE_BAG_L2,-0.4651909,-0.647971,641.492032,749.454596,2868.363803,8.092254,9.724708,322.810576,2,True,15
7,WeightedEnsemble_L2,-0.4655169,-0.6495,307.613405,279.74987,2075.378418,0.016603,0.003151,2.188406,2,True,12
11,NeuralNetFastAI_BAG_L2,-0.4694311,-0.651496,644.727851,742.018849,2753.153746,11.328073,2.288961,207.600519,2,True,18


In [None]:
# predict
y_test = predictor.predict(test_data)

In [None]:
y_test.head()

0    1.847958
1    1.302597
2    1.580923
3    2.798252
4    1.586574
Name: uenomax, dtype: float32

## 03) make submit file

In [None]:
# x_test + y_test
test = pd.read_csv('raw_data/Q1_test.csv')
pred = test.loc[:,['ru_id']]
pred['pred'] = y_test

# make submit file
submit = pd.read_csv('raw_data/Q1_label_sample.csv')
submit['BaseStationB']=pred.loc[pred['ru_id']=='BaseStationB',['pred']].reset_index(drop=True)
submit['BaseStationJ']=pred.loc[pred['ru_id']=='BaseStationJ',['pred']].reset_index(drop=True)

# 함수 돌리기 전에 순서바꿔야함
submit.to_csv('submission_data/submit3.csv')
submit.head()

Unnamed: 0,datetime,BaseStationB,BaseStationJ
0,2023-05-19 00:00:00,1.302597,1.847958
1,2023-05-19 00:05:00,2.798252,1.580923
2,2023-05-19 00:10:00,2.796225,1.586574
3,2023-05-19 00:15:00,1.90834,1.499048
4,2023-05-19 00:20:00,2.418369,1.868755


## 제출 결과 : 0.7865075409점

# Submit4 : 모든 컬럼 사용 / 로그 스케일링 / 로버스트 스케일링

## 01) data set load

In [None]:
train_merge_f = pd.read_csv('preprocessed_data/submit4_train.csv', index_col=0)
x_test_merge_f = pd.read_csv('preprocessed_data/submit4_test.csv', index_col=0)

In [None]:
train_merge_f.head()

Unnamed: 0,scgfail,scgfailratio,erabaddatt,erabaddsucc,endcaddatt,endcaddsucc,endcmodbymenbatt,endcmodbymenbsucc,endcmodbysgnbatt,endcmodbysgnbsucc,connestabatt,connestabsucc,redirectiontolte_coverageout,redirectiontolte_epsfallback,handoveratt,handoversucc,reestabatt,reestabsucc,rlculbyte,rlcdlbyte,totprbulavg,totprbdlavg,dlreceivedriavg,dltransmittedmcsavg,airmaculbyte,airmacdlbyte,bler_ul,bler_dl,rachpreamblea,numrar,nummsg3,attpaging,rssipathavg,dlreceivedcqiavg,endcrelbymenb,uenomax
0,0.0,0.0,-0.884523,-0.884523,-0.206003,-0.206003,-1.482307,-1.0,-2.734545,-2.734545,-0.30627,-0.30627,0.0,0.0,-1.160964,-1.160964,0.0,0.0,-3.670964,-4.399198,-1.095376,-0.718677,-0.4,-1.483871,-2.629482,-3.209006,-1.887552,1.659999,-0.865209,-0.865209,-1.090766,-0.54294,-0.7,-0.818182,-0.5,1
1,0.0,0.0,0.103093,0.103093,0.43854,0.43854,1.014162,1.26186,0.773047,0.765527,-0.834044,-0.834044,0.0,0.0,-0.63093,-0.63093,0.0,0.0,0.026671,0.284699,0.0,0.199207,-0.666667,0.548387,-0.193652,0.305302,0.394987,0.304229,0.751982,0.751982,0.147612,-0.544025,-0.4,-0.533333,0.834044,5
2,1.0,1.503043,-0.603499,-0.603499,-0.542636,-0.542636,0.417311,0.442507,-0.833803,-0.833803,-1.571069,-1.571069,1.0,0.0,-1.26186,-1.26186,0.0,0.0,0.459742,0.172017,0.383373,0.093114,1.0,-0.321429,0.47457,0.071059,-0.600397,-0.400374,-0.383695,-0.383695,-0.572062,-0.571689,-0.6,-0.3,-0.602888,6
3,0.0,0.0,-0.912489,-0.912489,-0.834044,-0.834044,0.0,0.0,-1.63093,-1.464974,0.63093,0.63093,0.0,0.0,0.0,0.0,0.0,0.0,-0.353912,-0.439983,0.0,0.0,-0.714286,-0.22,-0.526745,-0.58551,-1.176345,-0.133592,1.576382,1.576382,-0.439174,-0.466526,-2.0,0.758621,0.0,1
4,0.0,0.0,0.528462,0.528462,0.695977,0.695977,1.0,1.0,0.460209,0.460209,-0.553295,-0.553295,0.0,0.0,1.0,1.0,0.0,0.0,0.041314,0.524792,0.0,0.498862,-0.4,0.978261,-0.020114,0.53328,0.857415,0.107598,0.324553,0.324553,0.489374,-0.555705,-1.0,1.5,0.684582,4


In [None]:
x_test_merge_f.head()

Unnamed: 0,scgfail,scgfailratio,erabaddatt,erabaddsucc,endcaddatt,endcaddsucc,endcmodbymenbatt,endcmodbymenbsucc,endcmodbysgnbatt,endcmodbysgnbsucc,connestabatt,connestabsucc,redirectiontolte_coverageout,redirectiontolte_epsfallback,handoveratt,handoversucc,reestabatt,reestabsucc,rlculbyte,rlcdlbyte,totprbulavg,totprbdlavg,dlreceivedriavg,dltransmittedmcsavg,airmaculbyte,airmacdlbyte,bler_ul,bler_dl,rachpreamblea,numrar,nummsg3,attpaging,rssipathavg,dlreceivedcqiavg,endcrelbymenb
0,0.0,0.0,-0.36907,-0.36907,-0.244077,-0.244077,0.0,0.0,-1.062875,-1.140314,0.0,0.0,0.0,0.0,0.693147,0.693147,0.0,0.0,-1.07159,-1.365561,0.0,0.0,-1.666667,0.038462,-0.491706,-0.361872,-0.34502,0.743185,0.187483,0.187483,-0.33718,-0.466526,-1.5,-0.035714,0.0
1,0.0,0.0,-1.298091,-1.298091,-1.362611,-1.362611,-0.710382,-0.63093,-1.247567,-1.247567,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.985529,-1.334135,0.0,-0.513164,0.2,-0.815385,-1.451921,-1.47573,-6.045324,-1.689675,-1.390743,-1.390743,-1.185885,-0.555705,-1.0,-1.428571,-0.876759
2,0.0,0.0,-0.73814,-0.73814,-0.83216,-0.83216,0.0,0.0,-0.285471,-0.30627,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.19784,-0.360217,0.0,0.0,3.333333,-0.615385,0.130214,-0.359057,1.732271,-0.404324,-0.507757,-0.507757,0.328393,-0.497886,-2.0,0.642857,0.0
3,0.0,0.0,0.201181,0.201181,0.310133,0.310133,-0.710382,-0.63093,0.231563,0.231563,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.04105,-0.338622,0.0,-0.513164,-0.8,-0.138462,-0.15285,-0.379105,-0.261025,-1.206677,-0.054164,-0.054164,0.06181,-0.588724,-1.0,-0.714286,0.223472
4,0.0,0.0,-0.535026,-0.535026,-0.398763,-0.398763,0.0,0.0,-0.718868,-0.771244,0.36907,0.36907,0.0,0.0,0.0,0.0,0.0,0.0,0.029231,-0.464549,0.0,0.0,-0.333333,-0.576923,-0.159881,-0.518627,-0.394908,-0.827277,-0.110899,-0.110899,-0.478564,-0.53136,-2.0,0.428571,0.0


In [None]:
# train set, test set autogluon input dataset 으로 변경
train_data = TabularDataset(train_merge_f)
test_data = TabularDataset(x_test_merge_f)

## 02) autogluon modeling

In [None]:
# 모델 서칭
save_path = 'aj_predictmodel4' # 모델.pkl 파일 저장 경로
predictor = TabularPredictor(label='uenomax', problem_type='regression', path=save_path, eval_metric='mean_absolute_error').fit(train_data, presets='best_quality', num_bag_folds=5, num_bag_sets=1) # 예측 모델 찾기

Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=5, num_bag_sets=1
	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "aj_predictmodel4/"
AutoGluon Version:  0.8.2
Python Version:     3.10.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Jun 9 10:57:30 UTC 2023
Disk Space Avail:   33.54 GB / 107.37 GB (31.2%)
Train Data Rows:    137445
Train Data Columns: 35
Label Column: uenomax
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    50283.68 MB
	Train Data (Original)  Memory Usage: 38.48 MB (0.1% of available memory)
	Inferring data type of each feature based on column values.

In [None]:
# best model load
predictor = TabularPredictor.load('aj_predictmodel4/')

In [None]:
# 개별 모델의 train 성능 확인
ld_board = predictor.leaderboard(train_data, silent=True)
ld_board=ld_board.sort_values(by='score_val',ascending=False)

In [None]:
ld_board

Unnamed: 0,model,score_test,score_val,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
8,WeightedEnsemble_L3,-0.4360084,-0.627885,725.047854,785.986719,3281.411934,0.017936,0.002804,1.932332,3,True,22
9,LightGBMLarge_BAG_L2,-0.4374177,-0.630687,683.714861,749.470597,2667.263183,4.921717,2.679038,24.229053,2,True,21
13,XGBoost_BAG_L2,-0.4495812,-0.631193,683.704768,747.562471,2657.042248,4.911625,0.770912,14.008119,2,True,19
6,ExtraTreesMSE_BAG_L2,-0.4312111,-0.631269,684.355785,756.112643,2707.603001,5.562641,9.321084,64.568872,2,True,17
11,LightGBM_BAG_L2,-0.4441086,-0.631367,682.611529,748.807235,2659.48595,3.818386,2.015676,16.451821,2,True,14
10,CatBoost_BAG_L2,-0.4388774,-0.631779,679.267575,746.943654,2764.088429,0.474431,0.152095,121.0543,2,True,16
7,RandomForestMSE_BAG_L2,-0.4345377,-0.633242,683.839644,756.819993,3003.546662,5.0465,10.028434,360.512532,2,True,15
4,LightGBMXT_BAG_L2,-0.4287661,-0.633357,704.113003,763.032351,2695.106725,25.31986,16.240793,52.072596,2,True,13
5,WeightedEnsemble_L2,-0.4299119,-0.635482,351.841859,321.556837,2349.879792,0.014251,0.00282,2.217328,2,True,12
17,CatBoost_BAG_L1,-0.5093162,-0.645169,2.438476,0.65779,694.387546,2.438476,0.65779,694.387546,1,True,6


In [None]:
# predict
y_test = predictor.predict(test_data)

In [None]:
y_test.head()

0    1.893983
1    1.232111
2    1.713050
3    2.864348
4    1.637138
Name: uenomax, dtype: float32

## 03) make submit file

In [None]:
# x_test + y_test
test = pd.read_csv('raw_data/Q1_test.csv')
pred = test.loc[:,['ru_id']]
pred['pred'] = y_test

# make submit file
submit = pd.read_csv('raw_data/Q1_label_sample.csv')
submit['BaseStationB']=pred.loc[pred['ru_id']=='BaseStationB',['pred']].reset_index(drop=True)
submit['BaseStationJ']=pred.loc[pred['ru_id']=='BaseStationJ',['pred']].reset_index(drop=True)

# 함수 돌리기 전에 순서바꿔야함
submit.to_csv('submission_data/submit4.csv')
submit.head()

Unnamed: 0,datetime,BaseStationB,BaseStationJ
0,2023-05-19 00:00:00,1.232111,1.893983
1,2023-05-19 00:05:00,2.864348,1.71305
2,2023-05-19 00:10:00,2.918217,1.637138
3,2023-05-19 00:15:00,1.941319,1.544662
4,2023-05-19 00:20:00,2.447587,2.01384


## 제출 결과 : 0.8005025288점

# Submit5 : 모든 컬럼 사용 / 로그 스케일링 / 로버스트 스케일링 / 차분 후 이동평균 feature 추가

## Stage 1 : test set의 이동평균모형(moving_average) 예측

### 01) data set load

In [None]:
train_merge_f = pd.read_csv('preprocessed_data/submit5_train.csv', index_col=0)
x_test_merge_f = pd.read_csv('preprocessed_data/submit5_test.csv', index_col=0)

In [None]:
train_merge_f.head()

Unnamed: 0,scgfail,erabaddatt,erabaddsucc,endcaddatt,endcaddsucc,endcmodbymenbatt,endcmodbymenbsucc,endcmodbysgnbatt,endcmodbysgnbsucc,connestabatt,connestabsucc,redirectiontolte_coverageout,redirectiontolte_epsfallback,handoveratt,handoversucc,reestabatt,reestabsucc,rlculbyte,rlcdlbyte,totprbulavg,totprbdlavg,dlreceivedriavg,airmaculbyte,airmacdlbyte,rachpreamblea,numrar,nummsg3,attpaging,rssipathavg,dlreceivedcqiavg,endcrelbymenb,uenomax,seasonal_diff,moving_average
19,-0.406465,0.354394,0.354647,0.492939,0.493019,-0.826929,-0.781595,-0.555948,-0.549283,1.005675,1.008167,-0.312775,-0.183432,-0.688403,-0.687824,0,0,-1.008692,-1.298108,-0.544307,-0.590302,-0.377691,-0.6881,-1.155042,-0.278979,-0.278979,0.410375,-0.984066,-2.259611,0.395832,-0.575646,1,0.0,0.166667
20,-0.561473,-0.026324,-0.026161,0.082987,0.0831,-0.802317,-0.743494,-0.38639,-0.377104,-0.279667,-0.277644,-0.246645,-0.207081,1.623207,1.623434,0,0,-0.148612,0.582296,-0.955251,-0.654832,-1.065858,-0.159705,0.568791,-0.370003,-0.370003,-0.080685,-1.111763,-1.54614,2.130529,0.334132,2,-2.0,-0.083333
21,-0.521983,0.086961,0.087599,-0.014509,-0.014115,-1.412955,-1.252297,0.609088,0.622151,0.875024,0.876645,-0.316598,-0.155505,-0.440922,-0.439731,0,0,-0.332495,0.613014,-0.387238,0.719185,-0.314893,-0.368698,0.623522,0.42215,0.42215,0.135509,-1.111744,-1.762692,-1.272567,0.285279,5,2.0,0.166667
22,-0.605127,-0.277835,-0.277737,-0.329298,-0.32928,0.921948,0.811282,0.043453,0.04708,0.917552,0.918158,-0.434171,-0.310128,1.122019,1.127556,0,0,0.174133,0.726444,-0.722408,1.433786,-0.338203,0.19802,0.660083,0.259042,0.259042,0.253013,-1.111719,-0.919822,0.354786,-0.680861,3,1.0,0.166667
23,1.232606,-0.189232,-0.188962,0.043767,0.04386,-0.148437,-0.363585,-0.720159,-0.71424,0.767531,0.76899,-0.263516,-0.286459,1.28637,1.288383,0,0,0.747192,0.855584,1.044322,2.673322,-0.510557,0.701621,0.797717,-0.860446,-0.860446,0.128012,-1.302677,-0.74949,-2.335046,-0.088507,2,1.0,0.25


In [None]:
x_test_merge_f.head()

Unnamed: 0,scgfail,erabaddatt,erabaddsucc,endcaddatt,endcaddsucc,endcmodbymenbatt,endcmodbymenbsucc,endcmodbysgnbatt,endcmodbysgnbsucc,connestabatt,connestabsucc,redirectiontolte_coverageout,redirectiontolte_epsfallback,handoveratt,handoversucc,reestabatt,reestabsucc,rlculbyte,rlcdlbyte,totprbulavg,totprbdlavg,dlreceivedriavg,airmaculbyte,airmacdlbyte,rachpreamblea,numrar,nummsg3,attpaging,rssipathavg,dlreceivedcqiavg,endcrelbymenb
0,-0.667542,-0.481082,-0.480596,-0.257629,-0.257425,-0.718056,-0.67018,-1.382757,-1.37972,0.010077,0.011238,-0.226592,-0.171584,1.982971,1.987162,0,0,-1.56221,-1.751742,-0.410825,-0.523185,-1.070125,-0.602207,-0.271383,0.375598,0.375598,-0.332304,-0.859361,-0.318702,-0.011695,-0.577303
1,-0.42704,-1.454413,-1.453854,-1.349899,-1.349338,-1.000441,-0.916495,-1.413417,-1.410143,-0.469494,-0.468596,-0.128414,-0.086296,-0.447929,-0.447758,0,0,-1.509133,-1.624831,-0.469801,-0.63337,0.27868,-1.920447,-1.815504,-1.46793,-1.46793,-1.201107,-1.020477,-0.298595,-1.559757,-1.002757
2,-0.667542,-1.004315,-1.003808,-1.144738,-1.144495,-0.718056,-0.67018,-0.328479,-0.324267,0.010077,0.011238,-0.226592,-0.171584,-0.323645,-0.322739,0,0,0.52945,-0.234396,-0.410825,-0.523185,2.216891,0.32441,-0.267118,-0.652779,-0.652779,0.489501,-0.91968,-0.415243,0.862119,-0.577303
3,-0.42704,0.447827,0.448464,0.530739,0.531318,-1.000441,-0.916495,0.464955,0.470439,-0.469494,-0.468596,-0.128414,-0.086296,-0.447929,-0.447758,0,0,0.132145,-0.138808,-0.469801,-0.63337,-0.835845,-0.060003,-0.204914,0.089064,0.089064,0.278165,-1.084683,-0.298595,-0.75017,0.398415
4,-0.667542,-0.716359,-0.715864,-0.49097,-0.490755,-0.718056,-0.67018,-0.916231,-0.912675,0.57304,0.574515,-0.226592,-0.171584,-0.323645,-0.322739,0,0,0.251629,-0.391863,-0.410825,-0.523185,-0.193587,-0.107811,-0.508853,-0.065758,-0.065758,-0.506874,-0.984066,-0.415243,0.586177,-0.577303


In [None]:
sea_xtrain = train_merge_f.drop(['uenomax', 'seasonal_diff' ], axis = 1)

In [None]:
sea_xtrain.head()

Unnamed: 0,scgfail,erabaddatt,erabaddsucc,endcaddatt,endcaddsucc,endcmodbymenbatt,endcmodbymenbsucc,endcmodbysgnbatt,endcmodbysgnbsucc,connestabatt,connestabsucc,redirectiontolte_coverageout,redirectiontolte_epsfallback,handoveratt,handoversucc,reestabatt,reestabsucc,rlculbyte,rlcdlbyte,totprbulavg,totprbdlavg,dlreceivedriavg,airmaculbyte,airmacdlbyte,rachpreamblea,numrar,nummsg3,attpaging,rssipathavg,dlreceivedcqiavg,endcrelbymenb,moving_average
19,-0.406465,0.354394,0.354647,0.492939,0.493019,-0.826929,-0.781595,-0.555948,-0.549283,1.005675,1.008167,-0.312775,-0.183432,-0.688403,-0.687824,0,0,-1.008692,-1.298108,-0.544307,-0.590302,-0.377691,-0.6881,-1.155042,-0.278979,-0.278979,0.410375,-0.984066,-2.259611,0.395832,-0.575646,0.166667
20,-0.561473,-0.026324,-0.026161,0.082987,0.0831,-0.802317,-0.743494,-0.38639,-0.377104,-0.279667,-0.277644,-0.246645,-0.207081,1.623207,1.623434,0,0,-0.148612,0.582296,-0.955251,-0.654832,-1.065858,-0.159705,0.568791,-0.370003,-0.370003,-0.080685,-1.111763,-1.54614,2.130529,0.334132,-0.083333
21,-0.521983,0.086961,0.087599,-0.014509,-0.014115,-1.412955,-1.252297,0.609088,0.622151,0.875024,0.876645,-0.316598,-0.155505,-0.440922,-0.439731,0,0,-0.332495,0.613014,-0.387238,0.719185,-0.314893,-0.368698,0.623522,0.42215,0.42215,0.135509,-1.111744,-1.762692,-1.272567,0.285279,0.166667
22,-0.605127,-0.277835,-0.277737,-0.329298,-0.32928,0.921948,0.811282,0.043453,0.04708,0.917552,0.918158,-0.434171,-0.310128,1.122019,1.127556,0,0,0.174133,0.726444,-0.722408,1.433786,-0.338203,0.19802,0.660083,0.259042,0.259042,0.253013,-1.111719,-0.919822,0.354786,-0.680861,0.166667
23,1.232606,-0.189232,-0.188962,0.043767,0.04386,-0.148437,-0.363585,-0.720159,-0.71424,0.767531,0.76899,-0.263516,-0.286459,1.28637,1.288383,0,0,0.747192,0.855584,1.044322,2.673322,-0.510557,0.701621,0.797717,-0.860446,-0.860446,0.128012,-1.302677,-0.74949,-2.335046,-0.088507,0.25


In [None]:
# train set, test set autogluon input dataset 으로 변경
train_data = TabularDataset(sea_xtrain)
test_data = TabularDataset(x_test_merge_f)

### 02) autogluon modeling

In [None]:
# 모델 서칭
save_path = 'aj_movingaverage' # 모델.pkl 파일 저장 경로
predictor = TabularPredictor(label='moving_average', problem_type='regression', path=save_path, eval_metric='mean_absolute_error').fit(train_data, presets='best_quality', num_bag_folds=5, num_bag_sets=1) # 예측 모델 찾기

Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=5, num_bag_sets=1
	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "aj_movingaverage/"
AutoGluon Version:  0.8.2
Python Version:     3.10.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Jun 9 10:57:30 UTC 2023
Disk Space Avail:   33.54 GB / 107.37 GB (31.2%)
Train Data Rows:    113413
Train Data Columns: 31
Label Column: moving_average
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    50275.21 MB
	Train Data (Original)  Memory Usage: 28.13 MB (0.1% of available memory)
	Inferring data type of each feature based on column 

In [None]:
# best model load
predictor = TabularPredictor.load('aj_movingaverage/')

In [None]:
# 개별 모델의 train 성능 확인
ld_board = predictor.leaderboard(train_data, silent=True)
ld_board=ld_board.sort_values(by='score_val',ascending=False)

In [None]:
ld_board

Unnamed: 0,model,score_test,score_val,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
8,WeightedEnsemble_L3,-0.313338,-0.330602,90.084834,72.7641,1354.236461,0.011857,0.002666,1.634002,3,True,22
10,WeightedEnsemble_L2,-0.31733,-0.330728,26.786048,16.195137,539.29304,0.013263,0.002191,1.907129,2,True,12
6,NeuralNetFastAI_BAG_L2,-0.311992,-0.330802,73.624297,53.809651,743.640421,9.989098,1.959722,186.68606,2,True,18
12,CatBoost_BAG_L2,-0.317966,-0.330872,63.801575,51.922128,565.065344,0.166376,0.072199,8.110982,2,True,16
13,LightGBMXT_BAG_L2,-0.319587,-0.330887,64.694132,52.426831,563.42815,1.058933,0.576901,6.473789,2,True,13
14,LightGBM_BAG_L2,-0.319983,-0.330898,64.484013,52.341867,564.223774,0.848814,0.491937,7.269413,2,True,14
19,CatBoost_BAG_L1,-0.328362,-0.330967,0.220574,0.069215,18.753,0.220574,0.069215,18.753,1,True,6
11,NeuralNetTorch_BAG_L2,-0.317388,-0.331002,67.663399,53.020085,664.500797,4.0282,1.170156,107.546435,2,True,20
16,XGBoost_BAG_L1,-0.326065,-0.331037,1.353238,0.652805,7.238509,1.353238,0.652805,7.238509,1,True,9
7,XGBoost_BAG_L2,-0.312388,-0.331037,65.700561,52.454417,564.799276,2.065362,0.604488,7.844914,2,True,19


In [None]:
# predict
y_test = predictor.predict(test_data)

In [None]:
y_test.head()

0    0.001258
1   -0.032343
2    0.026395
3    0.041967
4    0.028460
Name: moving_average, dtype: float32

### 03) test set에 moving_average feature 추가

In [None]:
test_dta = test_data.copy()
test_dta['moving_average'] = y_test

In [None]:
test_dta

Unnamed: 0,scgfail,erabaddatt,erabaddsucc,endcaddatt,endcaddsucc,endcmodbymenbatt,endcmodbymenbsucc,endcmodbysgnbatt,endcmodbysgnbsucc,connestabatt,connestabsucc,redirectiontolte_coverageout,redirectiontolte_epsfallback,handoveratt,handoversucc,reestabatt,reestabsucc,rlculbyte,rlcdlbyte,totprbulavg,totprbdlavg,dlreceivedriavg,airmaculbyte,airmacdlbyte,rachpreamblea,numrar,nummsg3,attpaging,rssipathavg,dlreceivedcqiavg,endcrelbymenb,moving_average
0,-0.667542,-0.481082,-0.480596,-0.257629,-0.257425,-0.718056,-0.670180,-1.382757,-1.379720,0.010077,0.011238,-0.226592,-0.171584,1.982971,1.987162,0,0,-1.562210,-1.751742,-0.410825,-0.523185,-1.070125,-0.602207,-0.271383,0.375598,0.375598,-0.332304,-0.859361,-0.318702,-0.011695,-0.577303,0.001258
1,-0.427040,-1.454413,-1.453854,-1.349899,-1.349338,-1.000441,-0.916495,-1.413417,-1.410143,-0.469494,-0.468596,-0.128414,-0.086296,-0.447929,-0.447758,0,0,-1.509133,-1.624831,-0.469801,-0.633370,0.278680,-1.920447,-1.815504,-1.467930,-1.467930,-1.201107,-1.020477,-0.298595,-1.559757,-1.002757,-0.032343
2,-0.667542,-1.004315,-1.003808,-1.144738,-1.144495,-0.718056,-0.670180,-0.328479,-0.324267,0.010077,0.011238,-0.226592,-0.171584,-0.323645,-0.322739,0,0,0.529450,-0.234396,-0.410825,-0.523185,2.216891,0.324410,-0.267118,-0.652779,-0.652779,0.489501,-0.919680,-0.415243,0.862119,-0.577303,0.026395
3,-0.427040,0.447827,0.448464,0.530739,0.531318,-1.000441,-0.916495,0.464955,0.470439,-0.469494,-0.468596,-0.128414,-0.086296,-0.447929,-0.447758,0,0,0.132145,-0.138808,-0.469801,-0.633370,-0.835845,-0.060003,-0.204914,0.089064,0.089064,0.278165,-1.084683,-0.298595,-0.750170,0.398415,0.041967
4,-0.667542,-0.716359,-0.715864,-0.490970,-0.490755,-0.718056,-0.670180,-0.916231,-0.912675,0.573040,0.574515,-0.226592,-0.171584,-0.323645,-0.322739,0,0,0.251629,-0.391863,-0.410825,-0.523185,-0.193587,-0.107811,-0.508853,-0.065758,-0.065758,-0.506874,-0.984066,-0.415243,0.586177,-0.577303,0.028460
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34357,1.532271,0.368125,0.368759,0.368058,0.368636,0.810988,0.658130,0.701703,0.707465,-0.469494,-0.468596,-0.128414,-0.086296,-0.447929,-0.447758,0,0,0.127206,-0.087128,-0.469801,-0.257831,0.278680,-0.171032,-0.153268,0.212320,0.212320,0.412612,1.863996,0.090991,-0.912087,0.310422,0.061120
34358,-0.667542,-0.109843,-0.109371,0.110554,0.110743,-0.718056,-0.670180,-1.382757,-1.379720,-0.952314,-0.951689,-0.226592,-0.171584,-0.323645,-0.322739,0,0,-0.215680,-0.174460,-0.410825,-0.523185,0.244682,-0.602207,-0.288326,0.760466,0.760466,-0.332304,1.878725,-0.029079,0.540187,-0.577303,0.026227
34359,-0.427040,0.522972,0.523613,0.451942,0.452521,1.124914,1.070460,1.265812,1.272238,-0.469494,-0.468596,-0.128414,-0.086296,-0.447929,-0.447758,0,0,0.106777,0.821717,-0.469801,0.437326,0.724490,-0.163275,0.803051,0.212320,0.212320,0.412612,1.872295,0.772766,-0.966060,0.398415,0.070592
34360,-0.667542,0.612314,0.612758,0.731920,0.732082,-0.718056,-0.670180,0.058664,0.063307,0.573040,0.574515,-0.226592,-0.171584,-0.323645,-0.322739,0,0,-0.103053,-0.787735,-0.410825,-0.523185,-0.193587,-0.389456,-0.872964,0.030453,0.030453,0.573875,2.088826,0.067462,0.678158,-0.577303,0.085644


## Stage 2 : test set의 target(uenomax) 예측

### 01) train set에 기존 train set의 index로 target('uenomax') 추가

In [None]:
# 수정된 train set
sea_xtrain_t = sea_xtrain.copy()

# 붙일 target 변수
tar = train_merge_f.loc[:, 'uenomax']

# train set에 target 변수 추가
sea_xtrain_t['uenomax'] = tar

In [None]:
sea_xtrain_t

Unnamed: 0,scgfail,erabaddatt,erabaddsucc,endcaddatt,endcaddsucc,endcmodbymenbatt,endcmodbymenbsucc,endcmodbysgnbatt,endcmodbysgnbsucc,connestabatt,connestabsucc,redirectiontolte_coverageout,redirectiontolte_epsfallback,handoveratt,handoversucc,reestabatt,reestabsucc,rlculbyte,rlcdlbyte,totprbulavg,totprbdlavg,dlreceivedriavg,airmaculbyte,airmacdlbyte,rachpreamblea,numrar,nummsg3,attpaging,rssipathavg,dlreceivedcqiavg,endcrelbymenb,moving_average,uenomax
19,-0.406465,0.354394,0.354647,0.492939,0.493019,-0.826929,-0.781595,-0.555948,-0.549283,1.005675,1.008167,-0.312775,-0.183432,-0.688403,-0.687824,0,0,-1.008692,-1.298108,-0.544307,-0.590302,-0.377691,-0.688100,-1.155042,-0.278979,-0.278979,0.410375,-0.984066,-2.259611,0.395832,-0.575646,0.166667,1
20,-0.561473,-0.026324,-0.026161,0.082987,0.083100,-0.802317,-0.743494,-0.386390,-0.377104,-0.279667,-0.277644,-0.246645,-0.207081,1.623207,1.623434,0,0,-0.148612,0.582296,-0.955251,-0.654832,-1.065858,-0.159705,0.568791,-0.370003,-0.370003,-0.080685,-1.111763,-1.546140,2.130529,0.334132,-0.083333,2
21,-0.521983,0.086961,0.087599,-0.014509,-0.014115,-1.412955,-1.252297,0.609088,0.622151,0.875024,0.876645,-0.316598,-0.155505,-0.440922,-0.439731,0,0,-0.332495,0.613014,-0.387238,0.719185,-0.314893,-0.368698,0.623522,0.422150,0.422150,0.135509,-1.111744,-1.762692,-1.272567,0.285279,0.166667,5
22,-0.605127,-0.277835,-0.277737,-0.329298,-0.329280,0.921948,0.811282,0.043453,0.047080,0.917552,0.918158,-0.434171,-0.310128,1.122019,1.127556,0,0,0.174133,0.726444,-0.722408,1.433786,-0.338203,0.198020,0.660083,0.259042,0.259042,0.253013,-1.111719,-0.919822,0.354786,-0.680861,0.166667,3
23,1.232606,-0.189232,-0.188962,0.043767,0.043860,-0.148437,-0.363585,-0.720159,-0.714240,0.767531,0.768990,-0.263516,-0.286459,1.286370,1.288383,0,0,0.747192,0.855584,1.044322,2.673322,-0.510557,0.701621,0.797717,-0.860446,-0.860446,0.128012,-1.302677,-0.749490,-2.335046,-0.088507,0.250000,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137431,-0.670763,0.238136,0.238423,-0.697900,-0.697832,0.612832,0.631136,0.070188,0.035298,1.419520,1.423413,1.000958,1.664328,0.917318,0.923125,0,0,0.424183,0.004399,0.787134,-0.188178,0.687495,0.844085,0.236163,0.602736,0.602736,0.807792,1.817827,0.627705,1.081933,0.140567,0.166667,7
137437,1.089254,0.514000,0.514366,0.816927,0.817073,1.394304,1.788079,0.635208,0.651939,0.921490,0.925966,-0.418631,2.765298,0.396290,0.400208,0,0,0.174152,-0.059989,-0.191296,-0.615876,1.171733,0.052529,-0.181625,0.563675,0.563675,1.235115,1.834157,-0.058104,0.669103,0.452049,0.333333,4
137440,2.031058,-0.301652,-0.301403,-0.163554,-0.163474,-0.826929,-0.781595,-1.021776,-1.015858,-0.982390,-0.981255,-0.312775,-0.183432,0.841217,0.842257,0,0,-1.008692,-0.907988,-0.544307,-0.590302,0.284577,-0.343893,-0.381625,0.887608,0.887608,-0.935714,2.088826,1.772095,0.187796,-0.575646,-0.083333,1
137442,-0.521983,0.227076,0.227708,0.143675,0.144069,0.663566,0.546714,-0.314773,-0.571641,1.814054,1.815895,2.830195,-0.155505,-0.440922,-0.439731,0,0,0.186566,0.144946,-0.387238,-0.431759,-0.314893,0.049908,0.104228,0.236891,0.236891,0.562804,2.018318,1.016026,0.015750,0.352772,-0.166667,3


### 02) make data set

In [None]:
xtrain_t = sea_xtrain_t.copy()
test_dta_t = test_dta.copy()

In [None]:
# train set, test set autogluon input dataset 으로 변경
train_data = TabularDataset(xtrain_t)
test_data = TabularDataset(test_dta_t)

### 03) autogluon modeling

In [None]:
# 모델 서칭
save_path = 'predict_t_1' # 모델.pkl 파일 저장 경로
predictor = TabularPredictor(label='uenomax', problem_type='regression', path=save_path, eval_metric='mean_absolute_error').fit(train_data, presets='best_quality', num_bag_folds=5, num_bag_sets=1) # 예측 모델 찾기

Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=5, num_bag_sets=1
	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "predict_t_1/"
AutoGluon Version:  0.8.2
Python Version:     3.10.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Jun 9 10:57:30 UTC 2023
Disk Space Avail:   33.54 GB / 107.37 GB (31.2%)
Train Data Rows:    113413
Train Data Columns: 32
Label Column: uenomax
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    49998.78 MB
	Train Data (Original)  Memory Usage: 29.03 MB (0.1% of available memory)
	Inferring data type of each feature based on column values. Set 

In [None]:
# best model load
predictor = TabularPredictor.load('predict_t_1/')

In [None]:
# 개별 모델의 train 성능 확인
ld_board = predictor.leaderboard(train_data, silent=True)
ld_board=ld_board.sort_values(by='score_val',ascending=False)

In [None]:
ld_board

Unnamed: 0,model,score_test,score_val,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
4,WeightedEnsemble_L3,-0.4415637,-0.578639,222.690067,174.223144,2195.876349,0.007505,0.002274,1.565339,3,True,22
3,NeuralNetTorch_BAG_L2,-0.4407314,-0.579098,209.3302,164.797806,1972.132483,4.391854,1.176534,428.938084,2,True,20
15,WeightedEnsemble_L2,-0.5046395,-0.580221,17.598181,12.580487,1151.983137,0.011943,0.002383,1.760685,2,True,12
17,NeuralNetTorch_BAG_L1,-0.5134909,-0.581789,3.508707,0.870879,695.476951,3.508707,0.870879,695.476951,1,True,10
11,NeuralNetFastAI_BAG_L2,-0.4779207,-0.593798,213.929851,165.866955,1716.915729,8.991505,2.245683,173.72133,2,True,18
13,XGBoost_BAG_L2,-0.4854395,-0.594998,207.035449,164.150532,1549.780105,2.097103,0.52926,6.585706,2,True,19
12,LightGBM_BAG_L2,-0.4836634,-0.597553,206.675699,164.498021,1552.27682,1.737354,0.87675,9.082422,2,True,14
8,CatBoost_BAG_L2,-0.4702084,-0.59812,205.189949,163.690456,1568.586954,0.251603,0.069185,25.392556,2,True,16
10,LightGBMLarge_BAG_L2,-0.4746578,-0.599036,207.891852,165.513994,1562.188786,2.953506,1.892722,18.994387,2,True,21
9,LightGBMXT_BAG_L2,-0.4743815,-0.599495,209.575874,166.565034,1558.357745,4.637528,2.943762,15.163347,2,True,13


In [None]:
# predict
y_test = predictor.predict(test_data)

In [None]:
y_test.head()

0    3.073051
1    1.345345
2    3.900595
3    3.110903
4    3.768381
Name: uenomax, dtype: float32

## 03) make submit file

In [None]:
# x_test + y_test
test = pd.read_csv('raw_data/Q1_test.csv')
pred = test.loc[:,['ru_id']]
pred['pred'] = y_test

# make submit file
submit = pd.read_csv('raw_data/Q1_label_sample.csv')
submit['BaseStationB']=pred.loc[pred['ru_id']=='BaseStationB',['pred']].reset_index(drop=True)
submit['BaseStationJ']=pred.loc[pred['ru_id']=='BaseStationJ',['pred']].reset_index(drop=True)

# 함수 돌리기 전에 순서바꿔야함
submit.to_csv('submission_data/submit5.csv')
submit.head()

Unnamed: 0,datetime,BaseStationB,BaseStationJ
0,2023-05-19 00:00:00,1.345345,3.073051
1,2023-05-19 00:05:00,3.110903,3.900595
2,2023-05-19 00:10:00,3.162115,3.768381
3,2023-05-19 00:15:00,2.964196,3.794328
4,2023-05-19 00:20:00,3.057208,4.416271


## 제출 결과 : 0.7658398103점

# 느낀 점

최종 0.7658398103점으로 전체 311팀 중 48위