 From notebooks of
 https://www.kaggle.com/xhlulu/ieee-fraud-xgboost-with-gpu-fit-in-40s  
 https://www.kaggle.com/davidcairuz/feature-engineering-lightgbm

In [None]:
print('loading libs...')
import warnings
warnings.filterwarnings("ignore")
import os
import gc
import numpy as np
import pandas as pd
from sklearn import preprocessing
import xgboost as xgb
from tqdm import tqdm
from sklearn.model_selection import KFold
import lightgbm as lgb
from sklearn.metrics import roc_auc_score
print('done')

In [None]:
%%time
print('loading data...')
train_transaction = pd.read_csv('../input/ieee-fraud-detection/train_transaction.csv', index_col='TransactionID')
test_transaction = pd.read_csv('../input/ieee-fraud-detection/test_transaction.csv', index_col='TransactionID')
train_identity = pd.read_csv('../input/ieee-fraud-detection/train_identity.csv', index_col='TransactionID')
test_identity = pd.read_csv('../input/ieee-fraud-detection/test_identity.csv', index_col='TransactionID')
sample_submission = pd.read_csv('../input/ieee-fraud-detection/sample_submission.csv')
print('done')


In [None]:
features=['TransactionDT', 'card1', 'TransactionAmt', 'card2', 'addr1',
       'P_emaildomain', 'D15', 'card5', 'C13', 'dist1', 'D10', 'D4',
       'id_02', 'D1', 'id_20', 'C1', 'id_19', 'D2', 'id_31', 'D8', 'C2',
       'DeviceInfo', 'D11', 'C14', 'C6', 'C11', 'R_emaildomain', 'C9',
       'id_06', 'V313', 'id_05', 'M4', 'D3', 'id_33', 'M6', 'D5', 'dist2',
       'V307', 'V310', 'M5', 'id_01', 'card4', 'id_13', 'C5', 'D9',
       'card3', 'card6', 'id_30', 'V315', 'V314', 'D14', 'C10', 'C8',
       'V130', 'C12', 'id_14', 'V312', 'V83', 'V87', 'V127', 'V62',
       'id_18', 'D6', 'V317', 'V308', 'V320', 'ProductCD', 'V82', 'V76',
       'V61', 'M7', 'V53', 'V54', 'D13', 'V20', 'M3', 'V55', 'V78', 'D12',
       'V283', 'M8', 'V45', 'V38', 'V75', 'M9', 'V285', 'V309', 'V13',
       'V311', 'V131', 'V77', 'V291', 'V12', 'V37', 'V281', 'V282', 'V19',
       'V56', 'V35', 'V36']

params = {
          'objective':'binary',
          'boosting_type':'gbdt',
          'metric':'auc',
          'n_jobs':-1,
          'max_depth':-1,
          'tree_learner':'serial',
          'n_estimators':5000,
          'max_bin':255,
          'verbose':-1,
          'seed': 1229,
          'learning_rate': 0.01,
          'early_stopping_rounds':100,
          'colsample_bytree': 0.7,          
          'num_leaves': 256, 
          'reg_alpha': 0.35, 
         }

In [None]:
%%time
print('merging data...')
train = train_transaction.merge(train_identity, how='left', left_index=True, right_index=True)
test = test_transaction.merge(test_identity, how='left', left_index=True, right_index=True)

print('dropping target...')
y_train = train['isFraud'].copy()
del train_transaction, train_identity, test_transaction, test_identity
X_train = train.drop('isFraud', axis=1)
X_test = test.copy()
del train, test

print('fillnas...')
X_train = X_train.fillna(0)
X_test = X_test.fillna(0)
gc.collect()

print('Label Encoding...')
for f in X_train.columns:
    if X_train[f].dtype=='object' or X_test[f].dtype=='object': 
        lbl = preprocessing.LabelEncoder()
        lbl.fit(list(X_train[f].values) + list(X_test[f].values))
        X_train[f] = lbl.transform(list(X_train[f].values))
        X_test[f] = lbl.transform(list(X_test[f].values))

print('selecting features...')
X_train = X_train[features]
X_test=X_test[features]
print('Done')

In [None]:
%%time
NFOLDS = 8
folds = KFold(n_splits=NFOLDS)
columns = X_train.columns
splits = folds.split(X_train, y_train)
y_preds = np.zeros(X_test.shape[0])
y_oof = np.zeros(X_train.shape[0])
score = 0
  
for fold_n, (train_index, valid_index) in enumerate(splits):
    X_tr, X_val = X_train[columns].iloc[train_index], X_train[columns].iloc[valid_index]
    y_tr, y_val = y_train.iloc[train_index], y_train.iloc[valid_index]    
    dtrain = lgb.Dataset(X_tr, label=y_tr)
    dvalid = lgb.Dataset(X_val, label=y_val)
    clf = lgb.train(params, dtrain,  valid_sets = [dtrain, dvalid], verbose_eval=200)        
    y_pred_valid = clf.predict(X_val)
    y_oof[valid_index] = y_pred_valid
    print(f"Fold {fold_n + 1} | AUC: {roc_auc_score(y_val, y_pred_valid)}")   
    score += roc_auc_score(y_val, y_pred_valid) / NFOLDS
    y_preds += clf.predict(X_test) / NFOLDS    
    del X_tr, X_val, y_tr, y_val
    gc.collect()    
print(f"\nMean AUC = {score}")
print(f"Out of folds AUC = {roc_auc_score(y_train, y_oof)}")

print('submission...')
sample_submission['isFraud'] = y_preds
sample_submission.to_csv("submission_lgb.csv", index=False)
