In [None]:
!pip install -q flaml[notebook,ts_forecast]

In [1]:
import pandas as pd

In [2]:
#use plotly for pandas backend
pd.options.plotting.backend = "plotly"
import plotly.io as pio
pio.renderers
pio.renderers.default = "notebook"

In [3]:
df = pd.read_csv('train_IxoE5JN.csv')
df = df.bfill().ffill()
df['datetime'] = pd.to_datetime(df['datetime'])
df = df.drop(['row_id'], axis=1)


In [12]:
data=df[['datetime','energy']]
num_samples = data.shape[0]
time_horizon = 100
split_idx = num_samples - time_horizon
# train_df is a dataframe with two columns: timestamp and label
train_df = data[:split_idx]
# X_test is a dataframe with dates for prediction
X_test = data[split_idx:]['datetime'].to_frame()
y_test = data[split_idx:]['energy']


In [13]:
from flaml import AutoML
automl = AutoML()

In [14]:
settings = {
    "time_budget": 240,  # total running time in seconds
    # primary metric for validation: 'mape' is generally used for forecast tasks
    "metric": 'mape',
    "task": 'ts_forecast',  # task type
    # "log_file_name": 'CO2_forecast.log',  # flaml log file
    # validation method can be chosen from ['auto', 'holdout', 'cv']
    "eval_method": "holdout",
    "seed": 7654321,  # random seed
}


In [15]:
'''The main flaml automl API'''
automl.fit(dataframe=train_df,  # training data
           label='energy',  # label column
           period=time_horizon,  # key word argument 'period' must be included for forecast task)
           **settings)


[flaml.automl: 11-18 07:46:20] {2599} INFO - task = ts_forecast
INFO:flaml.automl:task = ts_forecast
[flaml.automl: 11-18 07:46:20] {2601} INFO - Data split method: time
INFO:flaml.automl:Data split method: time
[flaml.automl: 11-18 07:46:20] {2604} INFO - Evaluation method: holdout
INFO:flaml.automl:Evaluation method: holdout
[flaml.automl: 11-18 07:46:20] {2726} INFO - Minimizing error metric: mape
INFO:flaml.automl:Minimizing error metric: mape
[flaml.automl: 11-18 07:46:20] {2870} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'prophet', 'arima', 'sarimax']
INFO:flaml.automl:List of ML learners in AutoML Run: ['lgbm', 'rf', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'prophet', 'arima', 'sarimax']
[flaml.automl: 11-18 07:46:20] {3166} INFO - iteration 0, current learner lgbm
INFO:flaml.automl:iteration 0, current learner lgbm
[flaml.automl: 11-18 07:46:21] {3297} INFO - Estimated sufficient time budget=84993s. Estimated necessary 

In [16]:
print('Best ML leaner:', automl.best_estimator)
print('Best hyperparmeter config:', automl.best_config)
print(f'Best mape on validation data: {automl.best_loss}')
print(f'Training duration of best run: {automl.best_config_train_time}s')

Best ML leaner: rf
Best hyperparmeter config: {'n_estimators': 10, 'max_features': 1.0, 'max_leaves': 13, 'optimize_for_horizon': False, 'lags': 64, 'FLAML_sample_size': 94792}
Best mape on validation data: 0.10987484425193735
Training duration of best run: 20.847909688949585s


In [17]:
import pickle
with open('automl.pkl', 'wb') as f:
    pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)

In [None]:
flaml_y_pred = automl.predict(X_test)
print(f"Predicted labels\n{flaml_y_pred}")
print(f"True labels\n{y_test}")

In [19]:
from flaml.ml import sklearn_metric_loss_score
print('mape', '=', sklearn_metric_loss_score('mape', y_true=y_test, y_predict=flaml_y_pred))

mape = 0.09217681202274092


In [20]:
from sklearn.metrics import mean_squared_error

rms = mean_squared_error(y_test, flaml_y_pred, squared=False)

In [21]:
rms

247.34025245518433

In [39]:
test=pd.read_csv('/content/test_WudNWDM.csv')

In [25]:
# test=test.drop(['row_id'], axis=1)

In [40]:
ans=automl.predict(test.drop(['row_id'], axis=1))

In [41]:
test['energy']=ans

In [45]:
test[['row_id','energy']].to_csv('submission.csv',index=False)

In [37]:
pd.read_csv('/content/sample_submission_jn0a7vR.csv')

Unnamed: 0,row_id,energy
0,94993,1702.995014
1,94994,1702.995014
2,94995,1702.995014
3,94996,1702.995014
4,94997,1702.995014
...,...,...
26299,121292,1702.995014
26300,121293,1702.995014
26301,121294,1702.995014
26302,121295,1702.995014
