## Import

In [None]:
import pandas as pd
import numpy as np

## 데이터 불러오기

In [None]:
train = pd.read_csv('./data/train.csv')
test = pd.read_csv('./data/test.csv')

## 입력 Feature와 Target(착과량) 분할

In [None]:
y_train = train['착과량(int)']
X_train = train.drop(['ID', '착과량(int)'],axis=1)

X_test = test.drop('ID', axis = 1)

## AutoML(flaml)을 활용한 Model Selection

In [None]:
from flaml import AutoML

MODEL_TIME_BUDGET = 60*5
MODEL_METRIC = 'mae'
MODEL_TASK = "regression"

auto_model = AutoML()
params = {
    "time_budget": MODEL_TIME_BUDGET,
    "metric": MODEL_METRIC,
    "task": MODEL_TASK,
    "seed": 42
}
auto_model.fit(X_train, y_train, **params)

In [None]:
print('Best hyperparmeter:', auto_model.model.estimator)
print('Best hyperparmeter config:', auto_model.best_config)

## 학습 및 추론(4개 모델 앙상블)

In [None]:
from xgboost import XGBRFRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor

In [None]:
RF1 = RandomForestRegressor(max_features=0.6346669492585846, max_leaf_nodes=26,n_estimators=14, n_jobs=-1)
RF1.fit(X_train, y_train)

In [None]:
RF2 = RandomForestRegressor(max_features=1.0, max_leaf_nodes=22, n_estimators=16,n_jobs=-1)
RF2.fit(X_train, y_train)

In [None]:
ET = ExtraTreesRegressor(max_features=0.9002433907979883, max_leaf_nodes=32, n_estimators=18, n_jobs=-1)
ET.fit(X_train, y_train)

In [None]:
RFX = XGBRFRegressor(random_state=42)
RFX.fit(X_train, y_train)

In [None]:
ensemble_result = (RF1.predict(X_test) + RF2.predict(X_test) + ET.predict(X_test) + RFX.predict(X_test)) / 4

In [None]:
file_name = 'submit_final.csv'
submit = pd.read_csv('./data/sample_submission.csv')
submit['착과량(int)'] = ensemble_result
submit.to_csv(file_name, index=False)