In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

import os
import time
import gc
import optuna
from optuna import Trial
from optuna.samplers import TPESampler
from lightgbm import LGBMRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [5]:
train = pd.read_pickle('train.pkl')
test = pd.read_pickle('test.pkl')

In [6]:
train.shape, test.shape

((4446966, 29), (1934174, 28))

In [18]:


train['headshotrate'] = train['kills']/train['headshotKills']
train['killStreakrate'] = train['killStreaks']/train['kills']
train['healthitems'] = train['heals'] + train['boosts']
train['totalDistance'] = train['rideDistance'] + train["walkDistance"] + train["swimDistance"]
train['headshotKills_over_kills'] = train['headshotKills'] / train['kills']
train['distance_over_weapons'] = train['totalDistance'] / train['weaponsAcquired']
train['walkDistance_over_heals'] = train['walkDistance'] / train['heals']
train['walkDistance_over_kills'] = train['walkDistance'] / train['kills']
train['killsPerWalkDistance'] = train['kills'] / train['walkDistance']
train["skill"] = train["headshotKills"] + train["roadKills"]

train[train == np.Inf] = np.NaN
train[train == np.NINF] = np.NaN

train.fillna(0, inplace=True)

train = train.drop(['Id', 'groupId', 'matchId'], axis=1)



Removing Na's From DF


In [28]:
matchType = train.matchType.unique()
train.matchType = train.matchType.map(match_dict)
matchtype_test = test.matchType.unique()
match_dict_test = {}
for i, each in enumerate(matchtype_test):
    match_dict_test[each] = i
test.matchType = test.matchType.map(match_dict_test)


In [29]:
X = train.drop('winPlacePerc', axis=1)
y = train['winPlacePerc']

In [30]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)

In [34]:
# random sampler
sampler = TPESampler(seed=10)

# define function
def objective(trial):

    lgbm_param = {
        'objective': 'regression',
        'verbose': -1,
        'metric': 'mae', 
        'num_leaves': trial.suggest_int('num_leaves', 2, 1024, step=1, log=True), 
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.7, 1.0),
        'reg_alpha': trial.suggest_uniform('reg_alpha', 0.0, 1.0),
        'reg_lambda': trial.suggest_uniform('reg_lambda', 0.0, 10.0),
        'max_depth': trial.suggest_int('max_depth',3, 15),
        'learning_rate': trial.suggest_loguniform("learning_rate", 1e-8, 1e-3),
        'n_estimators': trial.suggest_int('n_estimators', 100, 3000),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'subsample': trial.suggest_loguniform('subsample', 0.4, 1),
    }

    # Generate model
    model_lgbm = LGBMRegressor(**lgbm_param)
    model_lgbm = model_lgbm.fit(X_train, y_train, eval_set=[(X_val, y_val)], 
                           verbose=0, early_stopping_rounds=25)
                           
    # * 평기 지표이다.
    # 원하는 평가 지표에 따라 사용하면 된다.                         
    MAE = mean_absolute_error(y_val, model_lgbm.predict(X_val))
    return MAE

optuna_lgbm = optuna.create_study(direction='minimize', sampler=sampler)

# * n_trials의 경우 optuna를 몇번 실행하여 hyper parameter를 찾을 것인지를 정한다.
# 50으로 설정해도 유의미한 값이 나온다.
optuna_lgbm.optimize(objective, n_trials=50)

[32m[I 2022-06-14 15:50:48,733][0m A new study created in memory with name: no-name-e340f519-aa69-450a-91fe-79b48f4154ff[0m
[32m[I 2022-06-14 16:00:36,361][0m Trial 0 finished with value: 0.2678315961675794 and parameters: {'num_leaves': 230, 'colsample_bytree': 0.7062255848078204, 'reg_alpha': 0.6336482349262754, 'reg_lambda': 7.488038825386118, 'max_depth': 9, 'learning_rate': 1.33040303714882e-07, 'n_estimators': 674, 'min_child_samples': 78, 'subsample': 0.46704202331689854}. Best is trial 0 with value: 0.2678315961675794.[0m
[32m[I 2022-06-14 16:08:48,842][0m Trial 1 finished with value: 0.2305204762112505 and parameters: {'num_leaves': 3, 'colsample_bytree': 0.9056079455103392, 'reg_alpha': 0.9533933461949365, 'reg_lambda': 0.039482663279144514, 'max_depth': 9, 'learning_rate': 0.00011563912803570738, 'n_estimators': 1876, 'min_child_samples': 74, 'subsample': 0.5226478358414336}. Best is trial 1 with value: 0.2305204762112505.[0m
[32m[I 2022-06-14 16:24:55,909][0m Tri

KeyboardInterrupt: 

In [None]:
lgbm_trial = optuna_lgbm.best_trial
lgbm_trial_params = lgbm_trial.params
print('Best Trial: score {},\nparams {}'.format(lgbm_trial.value, lgbm_trial_params))


In [None]:
lgbm = LGBMRegressor(**lgbm_trial_params)
lgbm_study = lgbm.fit(X_train, y_train)
