In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import optuna

train_df = pd.read_table('../input/train.tsv')
test_df = pd.read_table('../input/test.tsv')
drop_list = ['car name', 'horsepower', 'id']
train_df.drop(drop_list, axis=1, inplace=True)
test_id = test_df.id.values
test_df.drop(drop_list, axis=1, inplace=True)
feature_list = ['weight', 'model year']
print("train data shape:{}, test data shape:{}".format(train_df.shape, test_df.shape))

corr = train_df.corr()
sns.heatmap(corr, square=True, annot=True);

Using TensorFlow backend.
This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


train data shape:(199, 7), test data shape:(199, 6)


In [2]:
import lightgbm as lgb
from sklearn.model_selection import KFold

N = 5
kf = KFold(n_splits=N)

def create_model(params, train_data_set, test_data_set):
    return lgb.train(params,                                                                                  
                    train_data_set,                                                                               
                    num_boost_round=200,                                                                      
                    valid_sets=test_data_set,                                                                     
                    early_stopping_rounds=10
                    )

def objective(trial):
    #最適化するパラメータの設定
    num_leaves = trial.suggest_int("num_leaves", 7, 40)
    learning_rate = trial.suggest_uniform("learning_rate", 0.001, 0.1)
    feature_fraction = trial.suggest_uniform("feature_fraction", 0, 1)
    bagging_fraction = trial.suggest_uniform("bagging_fraction", 0, 1)
    bagging_freq = trial.suggest_int("bagging_freq", 0, 10)
    lambda_l2 = trial.suggest_uniform("lambda_l2", 0, 3)
    params = {                                                                                               
        'boosting_type': 'gbdt',                                                                             
        'objective': 'regression_l2',                                                        
        'metric': 'RMSE',                                               
        'num_leaves': num_leaves,                                                      
        'learning_rate': learning_rate,                                                         
        'feature_fraction': feature_fraction,                                                                  
        'bagging_fraction': bagging_fraction,                                                                       
        'bagging_freq': bagging_freq,
        'lambda_l2': lambda_l2,
    }

    test_pred = np.zeros(len(test_df))
    accuracy_train = []
    index = 1
    for train, test in kf.split(train_df):
        X_train = train_df.iloc[train]
        y_train = X_train['mpg'].values
        X_train = X_train[feature_list].values

        X_test = train_df.iloc[test]
        y_test = X_test['mpg'].values
        X_test = X_test[feature_list].values

        train_data_set = lgb.Dataset(X_train, y_train)
        test_data_set = lgb.Dataset(X_test, y_test, reference=train_data_set)

        model = create_model(params, train_data_set, test_data_set)
        pred_train = model.predict(X_test, num_iteration=model.best_iteration)

        test_data = test_df[feature_list].values
        test_pred += model.predict(test_data, num_iteration=gbm.best_iteration)/N
        accuracy_train.append(np.sqrt(mean_squared_error(y_test, pred_train)))
        print('Fold {} CV RMSE: {:.6}'.format(index, accuracy_train[-1]))
        index += 1
    ave = sum(accuracy_train)/len(accuracy_train)
    # print('CV average RMSE: {:.6}'.format(ave))

    #検証用データに対する正答率が最大となるハイパーパラメータを求める
    return ave

In [None]:
study = optuna.create_study()
study.optimize(objective, n_trials=10)