<div style="font-size:35px;
            stylr:bold;
            text-align:center">
    <a href="https://www.kaggle.com/c/tabular-playground-series-oct-2021/">Tabular Playground Series - Oct 2021</a>
</div>


# import libraries

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

import lightgbm as lgb
import catboost as cat
import xgboost as xgb

from warnings import filterwarnings
filterwarnings('ignore')

print('done!')

# Data

In [None]:
path = "../input/tabular-playground-series-oct-2021/"

data = {
    "train" : path + "train.csv",
    "test"  : path + "test.csv",
    "sample": path + "sample_submission.csv"
}

train = pd.read_csv(data["train"])
test  = pd.read_csv(data["test"])
sample= pd.read_csv(data["sample"])


# Memory reduce

In [None]:
# this function will help to reduce momory 
# data will be samller with the same value

def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df

In [None]:
train = reduce_mem_usage(train)
test  = reduce_mem_usage(test)

# Feature

In [None]:
features = [col for col in train.columns if col not in ('id', 'target')]

train['min'] = train[features].min(axis=1)
train['max'] = train[features].max(axis=1)
train['std'] = train[features].std(axis=1)
train['men'] = train[features].mean(axis=1)

test['min'] = test[features].min(axis=1)
test['max'] = test[features].max(axis=1)
test['std'] = test[features].std(axis=1)
test['men'] = test[features].mean(axis=1)

TARGET = 'target'
target = train[TARGET]

features += ['std']


# Fit

In [None]:
%%time

params = {
    'n_estimators' : 10000,
    'learning_rate': 0.0472,
    
    'tree_method': 'gpu_hist',
    'gpu_id': 0,
    'predictor': 'gpu_predictor'    
}

train['kfold'] = -1

kf = KFold(n_splits=5, shuffle=True, random_state=1)

for fold, (trn_idx, val_idx) in enumerate(kf.split(X = train, y = target)):
    train.loc[val_idx, "kfold"] = fold

xtrain = train[train.kfold != 0].reset_index(drop=True)
xvalid = train[train.kfold == 0].reset_index(drop=True)

ytrain = xtrain[TARGET]
yvalid = xvalid[TARGET]

xtrain = xtrain[features]
xvalid = xvalid[features]

model = xgb.XGBClassifier(**params)

model.fit(xtrain, ytrain,
#          eval_set=[(xvalid, yvalid)],
#          early_stopping_rounds=200
    )

print('')

In [None]:
pred = model.predict_proba(xvalid)[:, -1]
auc = roc_auc_score(yvalid, pred)
print('auc: ', auc)

test_pred = model.predict_proba(test[features])[:, -1]

# Create submission file

In [None]:
output = sample.copy()
output.target = test_pred

output.to_csv('submission.csv', index=False)



<div style="font-size:35px;
            stylr:bold;
            text-align:center">The End
</div>

![Upvote!](https://img.shields.io/badge/Upvote-If%20you%20like%20my%20work-07b3c8?style=for-the-badge&logo=kaggle)