In [33]:
import random 
import pandas as pd 
import numpy as np 
import os 
from sklearn.ensemble import RandomForestRegressor 
from sklearn.preprocessing import LabelEncoder 
import warnings 
warnings.filterwarnings(action="ignore") 

from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor 

In [34]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) # Seed 고정

In [35]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

In [36]:
train_df["item_info"] = train_df["item"] + "_" + train_df["corporation"] + "_" + train_df["location"] 

In [37]:
train_df.drop(columns={"item", "corporation", "location", "supply(kg)", "ID"}, inplace=True)

In [39]:
train_df.head(3) 

Unnamed: 0,timestamp,price(원/kg),item_info
0,2019-01-01,0.0,TG_A_J
1,2019-01-02,0.0,TG_A_J
2,2019-01-03,1728.0,TG_A_J


In [41]:
test_df["item_info"] = test_df["item"] + "_" + test_df["corporation"] + "_" + test_df["location"] 

In [43]:
test_df.drop(columns={"ID", "item", "corporation", "location"}, inplace=True) 

In [44]:
test_df.head(3) 

Unnamed: 0,timestamp,item_info
0,2023-03-04,TG_A_J
1,2023-03-05,TG_A_J
2,2023-03-06,TG_A_J


In [45]:
train_data = TimeSeriesDataFrame.from_data_frame(
    train_df, 
    id_column = "item_info", 
    timestamp_column = "timestamp" 
)

In [51]:
predictor = TimeSeriesPredictor(
    prediction_length=28, 
    path = "autgluon_baseline", 
    target = "price(원/kg)", 
    eval_metric="RMSE" 
) 

In [53]:
predictor.fit(
    train_data, 
    presets="best_quality", 
    time_limit=2400
)

TimeSeriesPredictor.fit() called
Setting presets to: best_quality
Fitting with arguments:
{'enable_ensemble': True,
 'evaluation_metric': 'RMSE',
 'excluded_model_types': None,
 'hyperparameter_tune_kwargs': {'num_trials': 3,
                                'scheduler': 'local',
                                'searcher': 'auto'},
 'hyperparameters': 'best_quality',
 'num_val_windows': 1,
 'prediction_length': 28,
 'random_seed': None,
 'target': 'price(원/kg)',
 'time_limit': 2400,
 'verbosity': 2}
Provided training data set with 59397 rows, 39 items (item = single time series). Average time series length is 1523.0. Data frequency is 'D'.
AutoGluon will save models to autgluon_baseline/
AutoGluon will gauge predictive performance using evaluation metric: 'RMSE'
	This metric's sign has been flipped to adhere to being 'higher is better'. The reported score can be multiplied by -1 to get the metric value.

Provided dataset contains following columns:
	target:           'price(원/kg)'

Star

  0%|          | 0/3 [00:00<?, ?it/s]

	Trained 1 models while tuning DeepAR.
	-777.5963     = Validation score (-RMSE)
	123.39  s     = Total tuning time
Hyperparameter tuning model: TemporalFusionTransformer. Tuning model for up to 240.00s of the 2399.96s remaining.
	-732.1917     = Validation score (-RMSE)
	191.81  s     = Training runtime
	0.06    s     = Validation (prediction) runtime
Hyperparameter tuning model: PatchTST. Tuning model for up to 240.00s of the 2399.96s remaining.
	-1152.0670    = Validation score (-RMSE)
	62.03   s     = Training runtime
	0.12    s     = Validation (prediction) runtime
Hyperparameter tuning model: DirectTabular. Tuning model for up to 240.00s of the 2399.96s remaining.
	-784.4630     = Validation score (-RMSE)
	2.03    s     = Training runtime
	0.12    s     = Validation (prediction) runtime
Hyperparameter tuning model: AutoARIMA. Tuning model for up to 240.00s of the 2399.96s remaining.
	-845.6888     = Validation score (-RMSE)
	0.05    s     = Training runtime
	69.59   s     = Valid

<autogluon.timeseries.predictor.TimeSeriesPredictor at 0x7ff4d399ca60>

In [63]:
train_data

Unnamed: 0_level_0,Unnamed: 1_level_0,price(원/kg)
item_id,timestamp,Unnamed: 2_level_1
TG_A_J,2019-01-01,0.0
TG_A_J,2019-01-02,0.0
TG_A_J,2019-01-03,1728.0
TG_A_J,2019-01-04,1408.0
TG_A_J,2019-01-05,1250.0
...,...,...
RD_F_J,2023-02-27,468.0
RD_F_J,2023-02-28,531.0
RD_F_J,2023-03-01,574.0
RD_F_J,2023-03-02,523.0


In [64]:
predictions = predictor.predict(train_data)

Global seed set to 123
Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble


In [68]:
predictions.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
item_id,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
TG_A_J,2023-03-04,3401.248829,-1665.040701,188.260663,1432.684071,2475.849004,3407.045033,4353.034502,5344.710361,6555.831583,8230.938173
TG_A_J,2023-03-05,480.910737,-4166.9664,-2566.782067,-1423.078622,-432.857925,483.094244,1400.508371,2386.235733,3537.077301,5148.16686
TG_A_J,2023-03-06,3231.283135,-2104.40812,-158.107623,1127.442743,2203.39432,3205.596246,4228.483946,5312.003089,6619.319201,8438.796933
TG_A_J,2023-03-07,2961.149189,-2531.061061,-544.699288,775.240511,1911.259862,2965.179401,4009.306094,5122.843312,6471.572035,8325.179724
TG_A_J,2023-03-08,3191.01674,-2382.471484,-334.153259,1045.168487,2197.70297,3251.234574,4339.220263,5473.50316,6832.066422,8696.824821


In [69]:
predictions.shape

(1092, 10)

In [70]:
submission = pd.read_csv("sample_submission.csv") 

In [73]:
mean_predictions = predictions["mean"].values 
mean_predictions.shape

(1092,)

In [75]:
submission["answer"] = mean_predictions

In [76]:
submission

Unnamed: 0,ID,answer
0,TG_A_J_20230304,3401.248829
1,TG_A_J_20230305,480.910737
2,TG_A_J_20230306,3231.283135
3,TG_A_J_20230307,2961.149189
4,TG_A_J_20230308,3191.016740
...,...,...
1087,RD_F_J_20230327,540.214996
1088,RD_F_J_20230328,548.239436
1089,RD_F_J_20230329,555.573222
1090,RD_F_J_20230330,551.135088


In [77]:
submission.to_csv("autogluon_baseline.csv", index=False)