In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from lightgbm import LGBMRegressor
import random
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
import sklearn.model_selection
from sklearn.preprocessing import StandardScaler, MinMaxScaler

import warnings
warnings.filterwarnings('ignore')

In [None]:
train = pd.read_csv("/kaggle/input/tabular-playground-series-aug-2021/train.csv")
test = pd.read_csv("/kaggle/input/tabular-playground-series-aug-2021/test.csv")

In [None]:
print("Train Set Shape: ", train.shape)
print("Test  Set Shape: ", test.shape)

In [None]:
data = train.drop(['id', 'loss'], axis=1)
target = train['loss'].to_frame()
test_data = test.drop('id', axis=1)

In [None]:
print("Data   Shape: ", data.shape)
print("Target Shape: ", target.shape)

In [None]:
columns = data.columns.tolist()

In [None]:
scaler = MinMaxScaler()
data[columns] = scaler.fit_transform(data[columns])
test_data[columns] = scaler.fit_transform(test_data[columns])

In [None]:
data

In [None]:
def lgb_objective(trial, data=data, target=target):
    
    X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.25, random_state=123)
    
    params={
        "metric": "RMSE",
        "boosting_type": "gbdt",
        'device': 'gpu',
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-3, 10.0),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-3, 10.0),
        "learning_rate": trial.suggest_loguniform("learning_rate", 0.001, 0.3),
        "lambda_l1": trial.suggest_float('lambda_l1', 1e-8, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 2, 512),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.1, 1.0),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.1, 1.0),
        "min_child_samples": trial.suggest_int("min_child_samples", 1, 100),
        "bagging_freq": trial.suggest_int("bagging_freq", 0, 15)
    }
    
    model = LGBMRegressor(**params, random_state=123)
    model.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=100, verbose=False)
    
    pred = model.predict(X_test)
    rmse = mean_squared_error(y_test, pred, squared=False)
    
    return rmse

In [None]:
study = optuna.create_study(direction='minimize')
study.optimize(lgb_objective, n_trials=100)

print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

In [None]:
lgb_params = study.best_params
lgb_params

In [None]:
lgb_params = {
    'reg_alpha': 0.052770769442532,
    'reg_lambda': 0.010257425545102752,
    'learning_rate': 0.10862226733015466,
    'lambda_l1': 0.6814009802818505,
    'lambda_l2': 2.3018129040169558e-08,
    'num_leaves': 19,
    'feature_fraction': 0.8543611093272531,
    'bagging_fraction': 0.7581993503617803,
    'min_child_samples': 69,
    'bagging_freq': 3,
    'device_type': 'gpu',
    'metric': 'rmse',
    'boosting_type': 'gbdt'}

In [None]:
model = LGBMRegressor(**lgb_params)

In [None]:
model.fit(data, target)

In [None]:
pred = model.predict(test_data)

In [None]:
pred

In [None]:
predictions = pd.DataFrame()
predictions["id"] = test["id"]
predictions['loss'] = pred

predictions.to_csv('submission.csv', index=False, header=predictions.columns)
predictions