In [None]:
import numpy as np
import pandas as pd

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
train = pd.read_csv('../input/pubg-finish-placement-prediction/train_V2.csv', index_col = 0)

In [None]:
test = pd.read_csv('../input/pubg-finish-placement-prediction/test_V2.csv', index_col = 0)

In [None]:
import lightgbm as lgb
from sklearn.preprocessing import OrdinalEncoder

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

In [None]:
encoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=np.nan)

In [None]:
train["matchType"] = encoder.fit_transform(train["matchType"].to_numpy().reshape(-1, 1))

In [None]:
train = train.fillna(0)

In [None]:
y = train['winPlacePerc']
X = train.drop(['winPlacePerc'],axis=1)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
df_train, df_val = train_test_split(train, test_size=0.2, random_state=42, shuffle = True)

In [None]:
features = ['damageDealt', 'walkDistance', 'assists', 'kills', 'killPlace', 
            'killPoints', 'winPoints', 'matchType']
categorical_features = ['matchType']
label = 'winPlacePerc'

In [None]:
dtrain = lgb.Dataset(data=df_train[features],
                     label=df_train[label],
                     feature_name=features,
                     categorical_feature=categorical_features,
                     free_raw_data=False)

dval = lgb.Dataset(data=df_val[features],
                   label=df_val[label],
                   feature_name=features,
                   categorical_feature=categorical_features,
                   free_raw_data=False)

In [None]:
params = {
        'boosting_type': 'gbdt',
        'objective': 'mae',
        'metric': 'mae',
        'num_leaves': 63,
        'max_depth': None,
        'learning_rate': 0.1,
        'min_data_in_leaf': 10,
        'feature_fraction': 0.6,
        'bagging_fraction': 0.6,
        'bagging_freq': 5,
        'num_threads': 4,
    
        #'cat_smooth': 10,
        'max_cat_threshold': 16,
        #'max_cat_to_onehot': 4,
    }

In [None]:
model = lgb.train(params, dtrain,
              valid_sets=[dtrain,dval],
              num_boost_round=7500,
              early_stopping_rounds=25,
              verbose_eval=25)

In [None]:
y_pred = model.predict(df_val[features])

In [None]:
print('MAE:', mean_absolute_error(df_val[label], y_pred))
print('MSE:', mean_squared_error(df_val[label], y_pred))

In [None]:
from sklearn.model_selection import KFold

In [None]:
def eval_lgb(params):
    base_params = {
        'boosting_type': 'gbdt',
        'objective': 'mae',
        'metric': 'mae',
        'num_threads': 6
    }
    base_params.update(params)
    results = lgb.cv(
        params,
        dtrain,
        num_boost_round = 10000,
        nfold=3, 
        eval_train_metric = 'mae',
        stratified=False,
        verbose_eval=25,
        early_stopping_rounds=25)
    return results

In [None]:
result = eval_lgb(params)