In [None]:
import numpy as np
import pandas as pd

FLAML参数使用请查询https://github.com/microsoft/FLAML

In [None]:
!pip install flaml

In [None]:
from flaml import AutoML

In [None]:
train = pd.read_csv('../input/mercedes-benz-greener-manufacturing/train.csv.zip')
test = pd.read_csv('../input/mercedes-benz-greener-manufacturing/test.csv.zip')
submission = pd.read_csv('../input/mercedes-benz-greener-manufacturing/sample_submission.csv.zip')

In [None]:
print("\ntrain shape",train.shape)
print("\ntest shape", test.shape)
print("\nsubmission", submission.shape)

# 使用label encoder进行转换

In [None]:
from sklearn.preprocessing import LabelEncoder
for c in train.columns:
    if train[c].dtype == 'object':
        lbl = LabelEncoder()
        lbl.fit(list(train[c].values) + list(test[c].values))
        train[c] = lbl.transform(list(train[c].values))
        test[c] = lbl.transform(list(test[c].values))

In [None]:
X=train.drop(['y'],axis=1)
y=train['y']
print(X.shape)
print(y.shape,type(y))

In [None]:
from sklearn.model_selection import train_test_split
train_x,test_x,train_y,test_y=train_test_split(X,y,test_size=0.2)
print(train_x.shape)
print(test_x.shape)
print(train_y.shape)
print(test_y.shape)

# 调用模型实例，训练模型，进行预测

In [None]:
automl = AutoML()
automl_settings = {
    "time_budget": 10,  # in seconds
    "metric": 'r2',
    "task": 'regression'
}
automl.fit(X_train=train_x, y_train=train_y,
           **automl_settings)
print(automl.predict(train_x).shape)

# Export the best model
print(automl.model)

In [None]:
print('Best ML leaner:', automl.best_estimator)
print('Best hyperparmeter config:', automl.best_config)
print('Best accuracy on validation data: {0:.4g}'.format(1-automl.best_loss))
print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))

In [None]:
test_predict = automl.predict(test)
submission['y']=test_predict
submission.to_csv('submission.csv', index=False)

In [None]:
automl.best_config

# 讲上述[9]~[11]的代码反复运行，获得多组参数，并将其中最好的参数拿来使用

In [None]:
from mlxtend.regressor import StackingCVRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

In [None]:
lgbm = LGBMRegressor(n_estimators= 40, 
          num_leaves= 46, 
          min_child_samples= 13,
          learning_rate= 0.0955054451824824, 
          log_max_bin= 10, 
          colsample_bytree= 1.0, 
          reg_alpha= 0.005626855491108521, 
          reg_lambda= 11.660799315054836)

In [None]:
xgboost = XGBRegressor(n_estimators= 2,         
                     max_leaves=8,         
                     min_child_weight= 1.888631728256368,         
                     learning_rate=1.0,         
                     subsample= 0.86572740032985,         
                     colsample_bylevel= 1.0,         
                     colsample_bytree= 1.0,         
                     reg_alpha= 0.0014067552771442214,         
                     tree_method='gpu_hist',         
                     num_boost_round= 2,         
                     gpu_id=0,         
                     reg_lambda= 0.0182039394877554)

In [None]:
stack_gen = StackingCVRegressor(regressors=(xgboost,lgbm),
                                meta_regressor=xgboost,
                                use_features_in_secondary=True)

In [None]:
from datetime import datetime

In [None]:
print('进行模型参数训练 START Fit')

print(datetime.now(), '对stack_gen集成器模型进行参数训练')
stack_gen_model = stack_gen.fit(np.array(X), np.array(y))

In [None]:
stacking_predict = stack_gen_model.predict(np.array(test))

In [None]:
xgboost.fit(train_x, train_y)

In [None]:
xgboost_predict = xgboost.predict(test)

In [None]:
lgbm.fit(train_x,train_y)

In [None]:
lgbm_predict = lgbm.predict(test)

# 以下的三个模型的权重是，通过几次实验“感觉”出来的，出来submission文件的private score在0.55279

In [None]:
submission['y']=0.2*stacking_predict+0.5*xgboost_predict+0.3*lgbm_predict
submission.to_csv('stacking_submission.csv',index=False)