In [1]:
import time
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.preprocessing import LabelEncoder
from itertools import product
import lightgbm as lgb
from sklearn.metrics import f1_score, roc_auc_score
import gc
import warnings
warnings.filterwarnings('ignore')

pd.set_option('max_columns', None)
pd.set_option('max_rows', None)
pd.set_option('max_colwidth', 200)

In [2]:
# count编码
def count_coding(df, fea_col):
    for f in fea_col:
        df[f + '_count'] = df[f].map(df[f].value_counts())
    return df

# 交叉特征统计
def cross_cat_num(df, num_col, cat_col):
    for f1 in tqdm(cat_col):
        g = df.groupby(f1, as_index=False)
        for f2 in tqdm(num_col):
            feat = g[f2].agg({
                '{}_{}_max'.format(f1, f2): 'max',
                '{}_{}_min'.format(f1, f2): 'min',
                '{}_{}_median'.format(f1, f2): 'median'
            })
            df = df.merge(feat, on=f1, how='left')
    return df

In [3]:
class MeanEncoder:
    def __init__(self, categorical_features, n_splits=10, target_type='classification', prior_weight_func=None):
        """
        :param categorical_features: list of str, the name of the categorical columns to encode
 
        :param n_splits: the number of splits used in mean encoding
 
        :param target_type: str, 'regression' or 'classification'
 
        :param prior_weight_func:
        a function that takes in the number of observations, and outputs prior weight
        when a dict is passed, the default exponential decay function will be used:
        k: the number of observations needed for the posterior to be weighted equally as the prior
        f: larger f --> smaller slope
        """
 
        self.categorical_features = categorical_features
        self.n_splits = n_splits
        self.learned_stats = {}
 
        if target_type == 'classification':
            self.target_type = target_type
            self.target_values = []
        else:
            self.target_type = 'regression'
            self.target_values = None
 
        if isinstance(prior_weight_func, dict):
            self.prior_weight_func = eval('lambda x: 1 / (1 + np.exp((x - k) / f))', dict(prior_weight_func, np=np))
        elif callable(prior_weight_func):
            self.prior_weight_func = prior_weight_func
        else:
            self.prior_weight_func = lambda x: 1 / (1 + np.exp((x - 2) / 1))
 
    @staticmethod
    def mean_encode_subroutine(X_train, y_train, X_test, variable, target, prior_weight_func):
        X_train = X_train[[variable]].copy()
        X_test = X_test[[variable]].copy()
 
        if target is not None:
            nf_name = '{}_pred_{}'.format(variable, target)
            X_train['pred_temp'] = (y_train == target).astype(int)  # classification
        else:
            nf_name = '{}_pred'.format(variable)
            X_train['pred_temp'] = y_train  # regression
        prior = X_train['pred_temp'].mean()
 
        col_avg_y = X_train.groupby(by=variable, axis=0)['pred_temp'].agg(['mean', 'size'])
        col_avg_y['size'] = prior_weight_func(col_avg_y['size'])
        col_avg_y[nf_name] = col_avg_y['size'] * prior + (1 - col_avg_y['size']) * col_avg_y['mean']
        col_avg_y.drop(['size', 'mean'], axis=1, inplace=True)
 
        nf_train = X_train.join(col_avg_y, on=variable)[nf_name].values
        nf_test = X_test.join(col_avg_y, on=variable).fillna(prior, inplace=False)[nf_name].values
 
        return nf_train, nf_test, prior, col_avg_y
 
    def fit_transform(self, X, y):
        """
        :param X: pandas DataFrame, n_samples * n_features
        :param y: pandas Series or numpy array, n_samples
        :return X_new: the transformed pandas DataFrame containing mean-encoded categorical features
        """
        X_new = X.copy()
        if self.target_type == 'classification':
            skf = StratifiedKFold(self.n_splits)
        else:
            skf = KFold(self.n_splits)
 
        if self.target_type == 'classification':
            self.target_values = sorted(set(y))
            self.learned_stats = {'{}_pred_{}'.format(variable, target): [] for variable, target in
                                  product(self.categorical_features, self.target_values)}
            for variable, target in product(self.categorical_features, self.target_values):
                nf_name = '{}_pred_{}'.format(variable, target)
                X_new.loc[:, nf_name] = np.nan
                for large_ind, small_ind in skf.split(X, y):
                    nf_large, nf_small, prior, col_avg_y = MeanEncoder.mean_encode_subroutine(
                        X_new.iloc[large_ind], y.iloc[large_ind], X_new.iloc[small_ind], variable, target, self.prior_weight_func)
                    X_new.iloc[small_ind, -1] = nf_small
                    self.learned_stats[nf_name].append((prior, col_avg_y))
        else:
            self.learned_stats = {'{}_pred'.format(variable): [] for variable in self.categorical_features}
            for variable in self.categorical_features:
                nf_name = '{}_pred'.format(variable)
                X_new.loc[:, nf_name] = np.nan
                for large_ind, small_ind in skf.split(X, y):
                    nf_large, nf_small, prior, col_avg_y = MeanEncoder.mean_encode_subroutine(
                        X_new.iloc[large_ind], y.iloc[large_ind], X_new.iloc[small_ind], variable, None, self.prior_weight_func)
                    X_new.iloc[small_ind, -1] = nf_small
                    self.learned_stats[nf_name].append((prior, col_avg_y))
        return X_new
 
    def transform(self, X):
        """
        :param X: pandas DataFrame, n_samples * n_features
        :return X_new: the transformed pandas DataFrame containing mean-encoded categorical features
        """
        X_new = X.copy()
 
        if self.target_type == 'classification':
            for variable, target in product(self.categorical_features, self.target_values):
                nf_name = '{}_pred_{}'.format(variable, target)
                X_new[nf_name] = 0
                for prior, col_avg_y in self.learned_stats[nf_name]:
                    X_new[nf_name] += X_new[[variable]].join(col_avg_y, on=variable).fillna(prior, inplace=False)[
                        nf_name]
                X_new[nf_name] /= self.n_splits
        else:
            for variable in self.categorical_features:
                nf_name = '{}_pred'.format(variable)
                X_new[nf_name] = 0
                for prior, col_avg_y in self.learned_stats[nf_name]:
                    X_new[nf_name] += X_new[[variable]].join(col_avg_y, on=variable).fillna(prior, inplace=False)[
                        nf_name]
                X_new[nf_name] /= self.n_splits
 
        return X_new

### 用户表

In [4]:
train_user = pd.read_hdf('../input/train_user.h5')
test_user = pd.read_hdf('../input/test_user.h5')

In [5]:
train_user.head()

Unnamed: 0,phone_no_m,city_name,county_name,idcard_cnt,label,arpu_202004
0,672ddbf02a5544d32e4ecc9433b1981bffe23bf912273a3a835f6cccb78b8ed7554e9ab0fbcd33d19eb6063ce00542dd223cc5cc83c68f07bcf933547b6776b1,绵阳,江油分公司,1,0,45.0
1,5e1272273e041e82cb275ae877710be98cdaf5b0a8f34de8d361f71d5268fa0851edffd3950e170df1e3846fcf90cc7cc8299be9139a2ac4b5c5e5121d832674,德阳,旌阳分公司,1,0,60.0
2,eaab3472ec87b076e69e6e8bb62b14341638fc63661a6c682d6add360a4332a8ad294d8470d64a73c6e53e8413f0ad93b9ea65afab717e58d312554f33553ee7,成都,金堂分公司,2,0,63.0
3,0ce1bb415704178bf44e9c9b431a39b083a132c8e6d99f3627cd4520064b93ec66a72d085a67f0f40eebb9f901072965073e398f04c4ae500a1db4dfb13a972c,成都,高新分公司,2,0,23.203125
4,28b87f35f63f65096a53e3a4c97eaffd4a6c43ffa7e92d7706b20520aa11fff0f4ce06203f66758ec0f0e70780e4e258aae6aa1f23439e115528e08eda71ed20,德阳,旌阳分公司,1,0,50.0


In [6]:
test_user.head()

Unnamed: 0,phone_no_m,city_name,county_name,idcard_cnt,arpu_202004
0,22d522340df77e2252c1a4d92b4bcb00d515e36f3ec6bf94e017b4ffd67e26520af18637ad91bde69abd831cc36cdc0c5af5e57bc280f95f79efef7bfa9c6926,成都,金牛分公司,1,9.0
1,5220d4b8429bdba3971a7b46a088c6b8fa6710f40607598ac1219daab6071510b3f66f78637420574dbb2d5488d44de9d4360a644bbe4ec45aaa4b7513bdc886,天府新区,天府直属部门,1,
2,7d19dd2b50ced56f03d23bf928cf34dc570a48525571a868ffdcdc53a865b7a41c386a6f1d6567dd4a710530b933308c77d1ef89f77b580d1983dadd3de162ea,成都,锦江分公司,1,9.0
3,134a4a591185c9d3788021896dcfc235e9e0a6a1e3f8a4fd3a9f9d9bf4a033e791b7199db980b0678020bdf689d719306a5c694ffa47b4ba2bec43ceda68dfc1,绵阳,江油分公司,1,138.75
4,2356dcd6759d50455ddaeed03c838843558e9182d5962f8a4c81a9a178063c49ab988750f795bb6c5462aee5dace2ad9fd18dbb1a6619dbb1e771e9b0f192da9,成都,青羊分公司,2,39.0


In [7]:
train_user.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6106 entries, 0 to 6105
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   phone_no_m   6106 non-null   object 
 1   city_name    6106 non-null   object 
 2   county_name  6106 non-null   object 
 3   idcard_cnt   6106 non-null   int8   
 4   label        6106 non-null   int8   
 5   arpu_202004  5369 non-null   float16
dtypes: float16(1), int8(2), object(3)
memory usage: 214.7+ KB


In [8]:
test_user.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2045 entries, 0 to 2044
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   phone_no_m   2045 non-null   object 
 1   city_name    2045 non-null   object 
 2   county_name  2045 non-null   object 
 3   idcard_cnt   2045 non-null   int8   
 4   arpu_202004  1869 non-null   float16
dtypes: float16(1), int8(1), object(3)
memory usage: 69.9+ KB


In [9]:
df_user = pd.concat([train_user, test_user])

In [10]:
# 号码量*消费值
df_user['idcard_cnt*arpu_202004'] = df_user['idcard_cnt'] * df_user['arpu_202004']

In [11]:
# count编码
count_list = ['city_name', 'county_name', 'idcard_cnt']

df_user = count_coding(df_user, count_list)

In [12]:
cross_cat = ['city_name', 'county_name']
cross_num = ['idcard_cnt']
df_user = cross_cat_num(df_user, cross_num, cross_cat)

  0%|                                                                                            | 0/2 [00:00<?, ?it/s]
100%|███████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 111.40it/s][A

100%|███████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 111.50it/s][A
100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 77.13it/s]


### 电话表

In [13]:
train_voc = pd.read_hdf('../input/train_voc.h5')
test_voc = pd.read_hdf('../input/test_voc.h5')

In [14]:
train_voc.head()

Unnamed: 0,phone_no_m,opposite_no_m,calltype_id,start_datetime,call_dur,city_name,county_name,imei_m
9,b3994b4c46e39954cfe0cb8ebd2a79703a2ace6612fa18241509e8b2dfe9d75f503823b6df2d10180a9033ccd22390aaeda397037ca4cd26604d6db6e3470874,1b01124a6ad0443ad868757c6594a605a0c5179c2b5b57071d5cec8cd8c4c89d7598587f953f08887965b42d2e465d08388c5465fe43b67f1e9ca5509c5529f2,1,2020-03-22 21:38:40,175,,,90ba61111dad175330029e10da8264a86dfdb3a281ccb8556eca87a6f1af32e1d5ff4db4a7d4b11f760e4866de974933f247c4c389073f28634229889bfe18bf
10,b3994b4c46e39954cfe0cb8ebd2a79703a2ace6612fa18241509e8b2dfe9d75f503823b6df2d10180a9033ccd22390aaeda397037ca4cd26604d6db6e3470874,1b01124a6ad0443ad868757c6594a605a0c5179c2b5b57071d5cec8cd8c4c89d7598587f953f08887965b42d2e465d08388c5465fe43b67f1e9ca5509c5529f2,1,2020-03-23 18:05:41,33,,,90ba61111dad175330029e10da8264a86dfdb3a281ccb8556eca87a6f1af32e1d5ff4db4a7d4b11f760e4866de974933f247c4c389073f28634229889bfe18bf
219,c5502a6d3e4cabaf8c0e298c4aac693b6e80835d50d4e4f393d8426bd49c0d4d00c3959d2a2c8341410e57af28bee6dc5c80dbad7cf147018ec0ece7bfa0cf5f,f1b73c46a691c068bfd38253e20e882d4993c8d3e58016aeed804a8cdbf54542112e529ebf7fdd0e62cf3556571fb17b865d8c3d57c42be851d75c90f4df4603,1,2020-03-01 11:24:37,39,,,e1e1149b2c06e9972293c457293abb32ad05b0c82b3b64c3786896b0414c897b60a539329ce3b7df83ebee19ad20fee3971303edd776a593578f03d16ee8981a
222,c5502a6d3e4cabaf8c0e298c4aac693b6e80835d50d4e4f393d8426bd49c0d4d00c3959d2a2c8341410e57af28bee6dc5c80dbad7cf147018ec0ece7bfa0cf5f,5bc6d97a3017578edfa7979fa43653cd74bad827f18337c8c84728f79778939b6f939ea3aa2661907f2e79bcf70feb93adb79c3f7d4a934368c459d9b6aeb54e,1,2020-03-01 11:23:44,32,,,e1e1149b2c06e9972293c457293abb32ad05b0c82b3b64c3786896b0414c897b60a539329ce3b7df83ebee19ad20fee3971303edd776a593578f03d16ee8981a
276,2bdbc0a45d6228970b353d691e2ec229f0cf60790a1a39d5e01c0175b5e20cd2ca7d39aff8506741884fd7a4cd4fd9fdec5112ddb764d81c2983752c989f1078,c42eeddeff98feb87860441ea6548f8bda2a080c935e02c7a6ae30223cf37dfc2d16760224c79b93ea44a6d3fd77139e03a20f47608b9455372a96cb026f239e,2,2020-03-03 15:34:21,3,,,3e808a14fa2aa61524c70f262539d1c8532b8e3a8c7c87c863b3f8420750f1bf4a63aa900aef09620c1fae1a6a7fadf1f42e30477e24336fc5c2a8fae9b5de6e


In [15]:
test_voc.head()

Unnamed: 0,phone_no_m,opposite_no_m,calltype_id,start_datetime,call_dur,city_name,county_name,imei_m
0,b3dce36871f3e88164b18d4953b114163f008cb51c28fe932ae3c734f3b1d1e2853d63fb3fb52a09fd9d0997b64fe5796507d3b50768fe0dce23819c8b24729c,f87f526ee776ac8b6b28392620fbb6049af9eaadab7de2ee9d7c5a31d7404566b8a3c25f404f793e513448a8c5dad1940a597a488e6f165a1aad6b103cf40f59,1,2020-04-13 21:04:10,1909,,,685a0bfcd91b4ecec2ff35d656a35a4e4f9e1c7a92b15f5c3c0675fcf8b6c9adc35731ab15e91401f2367f1d9d2910f9e8829e2e5052c96bc800fcde6c505214
1,b3dce36871f3e88164b18d4953b114163f008cb51c28fe932ae3c734f3b1d1e2853d63fb3fb52a09fd9d0997b64fe5796507d3b50768fe0dce23819c8b24729c,f87f526ee776ac8b6b28392620fbb6049af9eaadab7de2ee9d7c5a31d7404566b8a3c25f404f793e513448a8c5dad1940a597a488e6f165a1aad6b103cf40f59,1,2020-04-13 18:32:50,2510,,,685a0bfcd91b4ecec2ff35d656a35a4e4f9e1c7a92b15f5c3c0675fcf8b6c9adc35731ab15e91401f2367f1d9d2910f9e8829e2e5052c96bc800fcde6c505214
2,b3dce36871f3e88164b18d4953b114163f008cb51c28fe932ae3c734f3b1d1e2853d63fb3fb52a09fd9d0997b64fe5796507d3b50768fe0dce23819c8b24729c,f87f526ee776ac8b6b28392620fbb6049af9eaadab7de2ee9d7c5a31d7404566b8a3c25f404f793e513448a8c5dad1940a597a488e6f165a1aad6b103cf40f59,1,2020-04-13 19:55:12,25,,,685a0bfcd91b4ecec2ff35d656a35a4e4f9e1c7a92b15f5c3c0675fcf8b6c9adc35731ab15e91401f2367f1d9d2910f9e8829e2e5052c96bc800fcde6c505214
3,7ec68a368fbbec3279a6a34847f7959623dbff4638351aa0c4ef3e6c719a4cce873e332d11c2c8f860a67edcc4bc3706da02976e26c218474c67a3ed5cdffed5,ed8280e5590e40e35a2af7d8708a0754540b0612600d5255b8b79e576d1543765e552e30cb50aae3f4d424e08e8104a848f021316949d72e20a9efe0c29eba7c,1,2020-04-20 14:11:44,276,,,a60e82a4383faa79ea972adc48686a729f689b7d1923bce82ca67c7deae306fec30004b30260b56e13d77f791271222594954cc4f88a65c3348b97a9eeb47ad2
4,7ec68a368fbbec3279a6a34847f7959623dbff4638351aa0c4ef3e6c719a4cce873e332d11c2c8f860a67edcc4bc3706da02976e26c218474c67a3ed5cdffed5,ba792e76c3e7ee47d2403083f6def313372ae450ddf0b946d2f3d237e05cd99e75265be9d91ce58bc149919c97e1355f56082da790dccb9cffb0d7e11c4fa7ed,2,2020-04-20 12:56:33,539,,,a60e82a4383faa79ea972adc48686a729f689b7d1923bce82ca67c7deae306fec30004b30260b56e13d77f791271222594954cc4f88a65c3348b97a9eeb47ad2


In [16]:
train_voc.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 709144 entries, 9 to 5015413
Data columns (total 8 columns):
 #   Column          Non-Null Count   Dtype 
---  ------          --------------   ----- 
 0   phone_no_m      709144 non-null  object
 1   opposite_no_m   709144 non-null  object
 2   calltype_id     709144 non-null  int8  
 3   start_datetime  709144 non-null  object
 4   call_dur        709144 non-null  int16 
 5   city_name       709144 non-null  object
 6   county_name     709144 non-null  object
 7   imei_m          709144 non-null  object
dtypes: int16(1), int8(1), object(6)
memory usage: 39.9+ MB


In [17]:
test_voc.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 276522 entries, 0 to 276521
Data columns (total 8 columns):
 #   Column          Non-Null Count   Dtype 
---  ------          --------------   ----- 
 0   phone_no_m      276522 non-null  object
 1   opposite_no_m   276522 non-null  object
 2   calltype_id     276522 non-null  int8  
 3   start_datetime  276522 non-null  object
 4   call_dur        276522 non-null  int16 
 5   city_name       276522 non-null  object
 6   county_name     276522 non-null  object
 7   imei_m          276522 non-null  object
dtypes: int16(1), int8(1), object(6)
memory usage: 15.6+ MB


In [18]:
train_voc.nunique()

phone_no_m          4823
opposite_no_m     281103
calltype_id            3
start_datetime    561615
call_dur            2653
city_name             22
county_name          193
imei_m              6025
dtype: int64

In [19]:
test_voc.nunique()

phone_no_m          1965
opposite_no_m     107683
calltype_id            3
start_datetime    253144
call_dur            2089
city_name             22
county_name          173
imei_m              2656
dtype: int64

In [20]:
df_voc = pd.concat([train_voc, test_voc])

In [21]:
df_voc['voc_day'] = df_voc['start_datetime'].astype('datetime64').dt.day
df_voc['voc_hour'] = df_voc['start_datetime'].astype('datetime64').dt.hour
df_voc['voc_dayofweek'] = df_voc['start_datetime'].astype('datetime64').dt.dayofweek

In [22]:
df_voc.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 985666 entries, 9 to 276521
Data columns (total 11 columns):
 #   Column          Non-Null Count   Dtype 
---  ------          --------------   ----- 
 0   phone_no_m      985666 non-null  object
 1   opposite_no_m   985666 non-null  object
 2   calltype_id     985666 non-null  int8  
 3   start_datetime  985666 non-null  object
 4   call_dur        985666 non-null  int16 
 5   city_name       985666 non-null  object
 6   county_name     985666 non-null  object
 7   imei_m          985666 non-null  object
 8   voc_day         985666 non-null  int64 
 9   voc_hour        985666 non-null  int64 
 10  voc_dayofweek   985666 non-null  int64 
dtypes: int16(1), int64(3), int8(1), object(6)
memory usage: 78.0+ MB


In [23]:
# 每天通话次数统计
df_voc['voc_day_count'] = df_voc.groupby(['phone_no_m', 'voc_day'])['phone_no_m'].transform('count')
df_voc['voc_day_count_max'] = df_voc.groupby('phone_no_m')['voc_day_count'].transform('max')
df_voc['voc_day_count_min'] = df_voc.groupby('phone_no_m')['voc_day_count'].transform('min')
df_voc['voc_day_count_mean'] = df_voc.groupby('phone_no_m')['voc_day_count'].transform('mean')
df_voc['voc_day_count_std'] = df_voc.groupby('phone_no_m')['voc_day_count'].transform('std')

del df_voc['voc_day_count']


# 每天与不同的人通话次数统计
df_voc['voc_day_diff_count'] = df_voc.groupby(['phone_no_m', 'opposite_no_m', 'voc_day'])['phone_no_m'].transform('count')
df_voc['voc_day_diff_count_max'] = df_voc.groupby('phone_no_m')['voc_day_diff_count'].transform('max')
df_voc['voc_day_diff_count_min'] = df_voc.groupby('phone_no_m')['voc_day_diff_count'].transform('min')
df_voc['voc_day_diff_count_mean'] = df_voc.groupby('phone_no_m')['voc_day_diff_count'].transform('mean')
df_voc['voc_day_diff_count_std'] = df_voc.groupby('phone_no_m')['voc_day_diff_count'].transform('std')

del df_voc['voc_day_diff_count']

In [24]:
# 一小时内通话统计量

df_voc['voc_hour_count'] = df_voc.groupby(['phone_no_m', 'voc_hour'])['phone_no_m'].transform('count')
df_voc['voc_hour_count_max'] = df_voc.groupby('phone_no_m')['voc_hour_count'].transform('max')
df_voc['voc_hour_count_min'] = df_voc.groupby('phone_no_m')['voc_hour_count'].transform('min')
df_voc['voc_hour_count_mean'] = df_voc.groupby('phone_no_m')['voc_hour_count'].transform('mean')
df_voc['voc_hour_count_std'] = df_voc.groupby('phone_no_m')['voc_hour_count'].transform('std')

del df_voc['voc_hour_count']


# 一小时内与不同的人通话统计量

df_voc['voc_hour_diff_count'] = df_voc.groupby(['phone_no_m', 'opposite_no_m', 'voc_hour'])['phone_no_m'].transform('count')
df_voc['voc_hour_diff_count_max'] = df_voc.groupby('phone_no_m')['voc_hour_diff_count'].transform('max')
df_voc['voc_hour_diff_count_min'] = df_voc.groupby('phone_no_m')['voc_hour_diff_count'].transform('min')
df_voc['voc_hour_diff_count_mean'] = df_voc.groupby('phone_no_m')['voc_hour_diff_count'].transform('mean')
df_voc['voc_hour_diff_count_std'] = df_voc.groupby('phone_no_m')['voc_hour_diff_count'].transform('std')

del df_voc['voc_hour_diff_count']

In [25]:
# 周几通话统计量

df_voc['voc_dayofweek_count'] = df_voc.groupby(['phone_no_m', 'voc_dayofweek'])['phone_no_m'].transform('count')
df_voc['voc_dayofweek_count_max'] = df_voc.groupby('phone_no_m')['voc_dayofweek'].transform('max')
df_voc['voc_dayofweek_count_min'] = df_voc.groupby('phone_no_m')['voc_dayofweek'].transform('min')
df_voc['voc_dayofweek_count_mean'] = df_voc.groupby('phone_no_m')['voc_dayofweek'].transform('mean')
df_voc['voc_dayofweek_count_std'] = df_voc.groupby('phone_no_m')['voc_dayofweek'].transform('std')

del df_voc['voc_dayofweek_count']

In [26]:
# 该月总的通话次数
df_voc['voc_count'] = df_voc.groupby('phone_no_m')['phone_no_m'].transform('count')

# 相互打电话次数
df_voc['voc_count_mutual'] = df_voc.groupby(['phone_no_m', 'opposite_no_m'])['phone_no_m'].transform('count')

### 短信表

In [27]:
train_sms = pd.read_hdf('../input/train_sms.h5')
test_sms = pd.read_hdf('../input/test_sms.h5')

In [28]:
train_sms = train_sms[train_sms['request_datetime'] >= '2020-03-01 00:00:00']

In [29]:
df_sms = pd.concat([train_sms, test_sms])

In [30]:
df_sms.rename(columns={'calltype_id': 'calltype_id_sms'}, inplace=True)

In [31]:
df_sms['sms_day'] = df_sms['request_datetime'].astype('datetime64').dt.day
df_sms['sms_hour'] = df_sms['request_datetime'].astype('datetime64').dt.hour
df_sms['sms_dayofweek'] = df_sms['request_datetime'].astype('datetime64').dt.dayofweek

In [32]:
# 一天内短信统计量

df_sms['sms_day_count'] = df_sms.groupby(['phone_no_m', 'sms_day'])['phone_no_m'].transform('count')
df_sms['sms_day_count_max'] = df_sms.groupby('phone_no_m')['sms_day_count'].transform('max')
df_sms['sms_day_count_min'] = df_sms.groupby('phone_no_m')['sms_day_count'].transform('min')
df_sms['sms_day_count_mean'] = df_sms.groupby('phone_no_m')['sms_day_count'].transform('mean')
df_sms['sms_day_count_std'] = df_sms.groupby('phone_no_m')['sms_day_count'].transform('std')

del df_sms['sms_day_count']


# 一天内与不同的人短信统计量
df_sms['sms_day_diff_count'] = df_sms.groupby(['phone_no_m', 'opposite_no_m', 'sms_day'])['phone_no_m'].transform('count')
df_sms['sms_day_diff_count_max'] = df_sms.groupby('phone_no_m')['sms_day_diff_count'].transform('max')
df_sms['sms_day_diff_count_min'] = df_sms.groupby('phone_no_m')['sms_day_diff_count'].transform('min')
df_sms['sms_day_diff_count_mean'] = df_sms.groupby('phone_no_m')['sms_day_diff_count'].transform('mean')
df_sms['sms_day_diff_count_std'] = df_sms.groupby('phone_no_m')['sms_day_diff_count'].transform('std')

del df_sms['sms_day_diff_count']

In [33]:
# 一小时内短信统计量

df_sms['sms_hour_count'] = df_sms.groupby(['phone_no_m', 'sms_hour'])['phone_no_m'].transform('count')
df_sms['sms_hour_count_max'] = df_sms.groupby('phone_no_m')['sms_hour_count'].transform('max')
df_sms['sms_hour_count_min'] = df_sms.groupby('phone_no_m')['sms_hour_count'].transform('min')
df_sms['sms_hour_count_mean'] = df_sms.groupby('phone_no_m')['sms_hour_count'].transform('mean')
df_sms['sms_hour_count_std'] = df_sms.groupby('phone_no_m')['sms_hour_count'].transform('std')

del df_sms['sms_hour_count']


# 一小时内与不同的人短信统计量
df_sms['sms_hour_diff_count'] = df_sms.groupby(['phone_no_m', 'opposite_no_m', 'sms_hour'])['phone_no_m'].transform('count')
df_sms['sms_hour_diff_count_max'] = df_sms.groupby('phone_no_m')['sms_hour_diff_count'].transform('max')
df_sms['sms_hour_diff_count_min'] = df_sms.groupby('phone_no_m')['sms_hour_diff_count'].transform('min')
df_sms['sms_hour_diff_count_mean'] = df_sms.groupby('phone_no_m')['sms_hour_diff_count'].transform('mean')
df_sms['sms_hour_diff_count_std'] = df_sms.groupby('phone_no_m')['sms_hour_diff_count'].transform('std')

del df_sms['sms_hour_diff_count']

In [34]:
# 周几短信统计量

df_sms['sms_dayofweek_count'] = df_sms.groupby(['phone_no_m', 'sms_dayofweek'])['phone_no_m'].transform('count')
df_sms['sms_dayofweek_count_max'] = df_sms.groupby('phone_no_m')['sms_dayofweek_count'].transform('max')
df_sms['sms_dayofweek_count_min'] = df_sms.groupby('phone_no_m')['sms_dayofweek_count'].transform('min')
df_sms['sms_dayofweek_count_mean'] = df_sms.groupby('phone_no_m')['sms_dayofweek_count'].transform('mean')
df_sms['sms_dayofweek_count_std'] = df_sms.groupby('phone_no_m')['sms_dayofweek_count'].transform('std')

del df_sms['sms_dayofweek_count']

In [35]:
# 该月总的短信次数
df_sms['sms_count'] = df_sms.groupby('phone_no_m')['phone_no_m'].transform('count')

# 相互发送短信次数
df_sms['sms_count_mutual'] = df_sms.groupby(['phone_no_m', 'opposite_no_m'])['phone_no_m'].transform('count')

### 应用表

In [36]:
train_app = pd.read_hdf('../input/train_app.h5')
test_app = pd.read_hdf('../input/test_app.h5')

In [37]:
train_app = train_app[train_app['month_id'] == '2020-03']

In [38]:
df_app = pd.concat([train_app, test_app])

In [39]:
# 用户流量统计

df_app['total_flow'] = df_app.groupby('phone_no_m')['flow'].transform('sum')
df_app['flow_max'] = df_app.groupby('phone_no_m')['flow'].transform('max')
df_app['flow_min'] = df_app.groupby('phone_no_m')['flow'].transform('min')
df_app['flow_mean'] = df_app.groupby('phone_no_m')['flow'].transform('mean')
df_app['flow_std'] = df_app.groupby('phone_no_m')['flow'].transform('std')

# 应用数
df_app['app_count'] = df_app.groupby('phone_no_m')['phone_no_m'].transform('count')

In [40]:
# 用户在每个app上的流量统计

df_app['busi_name_total_flow'] = df_app.groupby(['phone_no_m', 'busi_name'])['flow'].transform('sum')
df_app['busi_name_flow_max'] = df_app.groupby(['phone_no_m', 'busi_name'])['flow'].transform('max')
df_app['busi_name_flow_min'] = df_app.groupby(['phone_no_m', 'busi_name'])['flow'].transform('min')
df_app['busi_name_flow_mean'] = df_app.groupby(['phone_no_m', 'busi_name'])['flow'].transform('mean')
df_app['busi_name_flow_std'] = df_app.groupby(['phone_no_m', 'busi_name'])['flow'].transform('std')

### 合并数据

In [41]:
df_user.shape, df_voc.shape, df_sms.shape, df_app.shape

((8151, 16), (985666, 33), (1319506, 29), (512100, 15))

In [42]:
df_voc.columns.tolist()

['phone_no_m',
 'opposite_no_m',
 'calltype_id',
 'start_datetime',
 'call_dur',
 'city_name',
 'county_name',
 'imei_m',
 'voc_day',
 'voc_hour',
 'voc_dayofweek',
 'voc_day_count_max',
 'voc_day_count_min',
 'voc_day_count_mean',
 'voc_day_count_std',
 'voc_day_diff_count_max',
 'voc_day_diff_count_min',
 'voc_day_diff_count_mean',
 'voc_day_diff_count_std',
 'voc_hour_count_max',
 'voc_hour_count_min',
 'voc_hour_count_mean',
 'voc_hour_count_std',
 'voc_hour_diff_count_max',
 'voc_hour_diff_count_min',
 'voc_hour_diff_count_mean',
 'voc_hour_diff_count_std',
 'voc_dayofweek_count_max',
 'voc_dayofweek_count_min',
 'voc_dayofweek_count_mean',
 'voc_dayofweek_count_std',
 'voc_count',
 'voc_count_mutual']

In [43]:
df_sms.columns.tolist()

['phone_no_m',
 'opposite_no_m',
 'calltype_id_sms',
 'request_datetime',
 'sms_day',
 'sms_hour',
 'sms_dayofweek',
 'sms_day_count_max',
 'sms_day_count_min',
 'sms_day_count_mean',
 'sms_day_count_std',
 'sms_day_diff_count_max',
 'sms_day_diff_count_min',
 'sms_day_diff_count_mean',
 'sms_day_diff_count_std',
 'sms_hour_count_max',
 'sms_hour_count_min',
 'sms_hour_count_mean',
 'sms_hour_count_std',
 'sms_hour_diff_count_max',
 'sms_hour_diff_count_min',
 'sms_hour_diff_count_mean',
 'sms_hour_diff_count_std',
 'sms_dayofweek_count_max',
 'sms_dayofweek_count_min',
 'sms_dayofweek_count_mean',
 'sms_dayofweek_count_std',
 'sms_count',
 'sms_count_mutual']

In [44]:
df_app.columns.tolist()

['phone_no_m',
 'busi_name',
 'flow',
 'month_id',
 'total_flow',
 'flow_max',
 'flow_min',
 'flow_mean',
 'flow_std',
 'app_count',
 'busi_name_total_flow',
 'busi_name_flow_max',
 'busi_name_flow_min',
 'busi_name_flow_mean',
 'busi_name_flow_std']

In [45]:
df_voc = df_voc[['phone_no_m', 'calltype_id', 'call_dur', 'voc_day', 'voc_hour', 'voc_dayofweek',
                 'voc_day_count_max', 'voc_day_count_min', 'voc_day_count_mean', 'voc_day_count_std',
                 'voc_hour_count_max', 'voc_hour_count_min', 'voc_hour_count_mean', 'voc_hour_count_std',
                 'voc_dayofweek_count_max', 'voc_dayofweek_count_min', 'voc_dayofweek_count_mean', 'voc_dayofweek_count_std',
                 'voc_count', 'voc_count_mutual',
                 'voc_day_diff_count_max', 'voc_day_diff_count_min', 'voc_day_diff_count_mean', 'voc_day_diff_count_std',
                 'voc_hour_diff_count_max', 'voc_hour_diff_count_min', 'voc_hour_diff_count_mean', 'voc_hour_diff_count_std'
                ]].drop_duplicates(subset=['phone_no_m'])

In [46]:
df_sms = df_sms[['phone_no_m', 'calltype_id_sms', 'sms_day', 'sms_hour', 'sms_dayofweek',
                 'sms_day_count_max', 'sms_day_count_min', 'sms_day_count_mean', 'sms_day_count_std',
                 'sms_hour_count_max', 'sms_hour_count_min', 'sms_hour_count_mean', 'sms_hour_count_std',
                 'sms_dayofweek_count_max', 'sms_dayofweek_count_min', 'sms_dayofweek_count_mean', 'sms_dayofweek_count_std',
                 'sms_count', 'sms_count_mutual',
                 'sms_day_diff_count_max', 'sms_day_diff_count_min', 'sms_day_diff_count_mean', 'sms_day_diff_count_std',
                 'sms_hour_diff_count_max', 'sms_hour_diff_count_min', 'sms_hour_diff_count_mean', 'sms_hour_diff_count_std'
                ]].drop_duplicates(subset=['phone_no_m'])

In [47]:
df_app = df_app[['phone_no_m', 'busi_name', 'flow',
                 'total_flow', 'flow_max', 'flow_min', 'flow_mean', 'flow_std',
                 'app_count',
                 'busi_name_total_flow','busi_name_flow_max', 'busi_name_flow_min', 'busi_name_flow_mean', 'busi_name_flow_std'
                 ]].drop_duplicates(subset=['phone_no_m'])

In [48]:
df = pd.merge(df_user, df_voc, how='left', on='phone_no_m')
df = pd.merge(df, df_sms, how='left', on='phone_no_m')
df = pd.merge(df, df_app, how='left', on='phone_no_m')

In [49]:
del df_user, df_voc, df_sms, df_app
gc.collect()

20

In [50]:
lbl = LabelEncoder()

for f in tqdm(['city_name', 'county_name', 'busi_name']):
    df[f] = df[f].fillna('NA')
    df[f] = lbl.fit_transform(df[f].astype(str))

100%|███████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 300.83it/s]


In [51]:
df.columns

Index(['phone_no_m', 'city_name', 'county_name', 'idcard_cnt', 'label',
       'arpu_202004', 'idcard_cnt*arpu_202004', 'city_name_count',
       'county_name_count', 'idcard_cnt_count', 'city_name_idcard_cnt_max',
       'city_name_idcard_cnt_min', 'city_name_idcard_cnt_median',
       'county_name_idcard_cnt_max', 'county_name_idcard_cnt_min',
       'county_name_idcard_cnt_median', 'calltype_id', 'call_dur', 'voc_day',
       'voc_hour', 'voc_dayofweek', 'voc_day_count_max', 'voc_day_count_min',
       'voc_day_count_mean', 'voc_day_count_std', 'voc_hour_count_max',
       'voc_hour_count_min', 'voc_hour_count_mean', 'voc_hour_count_std',
       'voc_dayofweek_count_max', 'voc_dayofweek_count_min',
       'voc_dayofweek_count_mean', 'voc_dayofweek_count_std', 'voc_count',
       'voc_count_mutual', 'voc_day_diff_count_max', 'voc_day_diff_count_min',
       'voc_day_diff_count_mean', 'voc_day_diff_count_std',
       'voc_hour_diff_count_max', 'voc_hour_diff_count_min',
       'voc_ho

In [52]:
df_train = df[df.label.notna()]
df_test = df[df.label.isna()]

df_train.shape, df_test.shape

((6106, 82), (2045, 82))

In [53]:
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(df_train.drop('label', axis=1), df_train['label'], random_state=2020)

In [54]:
train_cols = [i for i in X_train if i not in ['phone_no_m', 'label']]

In [55]:
params = {'objective': 'binary',
          'boosting': 'gbdt',
          'metric': 'auc',
          'num_iterations': 1000000,
          'learning_rate': 0.1,
          'num_leaves': 31,
          'lambda_l1': 0,
          'lambda_l2': 1,
          'min_data_in_leaf': 20,
          'is_unbalance': True,
          'max_depth': -1,
          'seed': 2020}

In [56]:
use_cols = []
useless_cols = []

for i in train_cols:
    print(i)
    
    lgb_train = lgb.Dataset(X_train[[i]].values, y_train) 
    lgb_eval= lgb.Dataset(X_valid[[i]].values, y_valid, reference=lgb_train)
    lgb_test = lgb.train(params,
                         lgb_train,
                         num_boost_round=10000,
                         valid_sets=[lgb_eval, lgb_train],
                         early_stopping_rounds=100,
                         verbose_eval=1)
    
    auc = roc_auc_score(y_train, lgb_test.predict(X_train[[i]]))
    if auc > 0.5:
        use_cols.append(i)
    else:
        useless_cols.append(i)
    print('*' * 20)
    print('\n')

city_name
[1]	training's auc: 0.684712	valid_0's auc: 0.671773
Training until validation scores don't improve for 100 rounds
[2]	training's auc: 0.684824	valid_0's auc: 0.671976
[3]	training's auc: 0.684824	valid_0's auc: 0.671976
[4]	training's auc: 0.684824	valid_0's auc: 0.671976
[5]	training's auc: 0.684824	valid_0's auc: 0.671976
[6]	training's auc: 0.684824	valid_0's auc: 0.671976
[7]	training's auc: 0.684824	valid_0's auc: 0.671976
[8]	training's auc: 0.684824	valid_0's auc: 0.671976
[9]	training's auc: 0.684824	valid_0's auc: 0.671976
[10]	training's auc: 0.684824	valid_0's auc: 0.671976
[11]	training's auc: 0.684824	valid_0's auc: 0.671976
[12]	training's auc: 0.684824	valid_0's auc: 0.671976
[13]	training's auc: 0.684824	valid_0's auc: 0.671976
[14]	training's auc: 0.684824	valid_0's auc: 0.671976
[15]	training's auc: 0.684824	valid_0's auc: 0.671976
[16]	training's auc: 0.684824	valid_0's auc: 0.671976
[17]	training's auc: 0.684824	valid_0's auc: 0.671976
[18]	training's auc

[147]	training's auc: 0.780472	valid_0's auc: 0.72224
[148]	training's auc: 0.780502	valid_0's auc: 0.722019
[149]	training's auc: 0.78053	valid_0's auc: 0.721998
[150]	training's auc: 0.780594	valid_0's auc: 0.722132
[151]	training's auc: 0.780603	valid_0's auc: 0.722014
[152]	training's auc: 0.780612	valid_0's auc: 0.72212
[153]	training's auc: 0.780618	valid_0's auc: 0.722041
[154]	training's auc: 0.780656	valid_0's auc: 0.722107
[155]	training's auc: 0.780662	valid_0's auc: 0.722065
[156]	training's auc: 0.780664	valid_0's auc: 0.722026
[157]	training's auc: 0.780654	valid_0's auc: 0.721904
[158]	training's auc: 0.780705	valid_0's auc: 0.722324
[159]	training's auc: 0.780698	valid_0's auc: 0.722185
[160]	training's auc: 0.78071	valid_0's auc: 0.722088
[161]	training's auc: 0.780734	valid_0's auc: 0.722222
[162]	training's auc: 0.780744	valid_0's auc: 0.722195
[163]	training's auc: 0.780762	valid_0's auc: 0.722423
[164]	training's auc: 0.780783	valid_0's auc: 0.722425
[165]	training

[87]	training's auc: 0.729252	valid_0's auc: 0.722696
[88]	training's auc: 0.729252	valid_0's auc: 0.722696
[89]	training's auc: 0.729252	valid_0's auc: 0.722696
[90]	training's auc: 0.729252	valid_0's auc: 0.722696
[91]	training's auc: 0.729252	valid_0's auc: 0.722696
[92]	training's auc: 0.729252	valid_0's auc: 0.722696
[93]	training's auc: 0.729252	valid_0's auc: 0.722696
[94]	training's auc: 0.729252	valid_0's auc: 0.722696
[95]	training's auc: 0.729252	valid_0's auc: 0.722696
[96]	training's auc: 0.729252	valid_0's auc: 0.722696
[97]	training's auc: 0.729252	valid_0's auc: 0.722696
[98]	training's auc: 0.729252	valid_0's auc: 0.722696
[99]	training's auc: 0.729252	valid_0's auc: 0.722696
[100]	training's auc: 0.729252	valid_0's auc: 0.722696
[101]	training's auc: 0.729252	valid_0's auc: 0.722696
[102]	training's auc: 0.729252	valid_0's auc: 0.722696
[103]	training's auc: 0.729252	valid_0's auc: 0.722696
[104]	training's auc: 0.729252	valid_0's auc: 0.722696
[105]	training's auc: 0

[78]	training's auc: 0.842528	valid_0's auc: 0.816598
[79]	training's auc: 0.842551	valid_0's auc: 0.816353
[80]	training's auc: 0.842661	valid_0's auc: 0.816385
[81]	training's auc: 0.842758	valid_0's auc: 0.816319
[82]	training's auc: 0.84281	valid_0's auc: 0.816245
[83]	training's auc: 0.842812	valid_0's auc: 0.815974
[84]	training's auc: 0.842921	valid_0's auc: 0.816061
[85]	training's auc: 0.84302	valid_0's auc: 0.816268
[86]	training's auc: 0.843086	valid_0's auc: 0.81604
[87]	training's auc: 0.843154	valid_0's auc: 0.816154
[88]	training's auc: 0.843243	valid_0's auc: 0.816073
[89]	training's auc: 0.843282	valid_0's auc: 0.816263
[90]	training's auc: 0.843319	valid_0's auc: 0.816127
[91]	training's auc: 0.84336	valid_0's auc: 0.816036
[92]	training's auc: 0.843504	valid_0's auc: 0.816098
[93]	training's auc: 0.843658	valid_0's auc: 0.816088
[94]	training's auc: 0.8437	valid_0's auc: 0.816092
[95]	training's auc: 0.84377	valid_0's auc: 0.816017
[96]	training's auc: 0.843829	valid

[123]	training's auc: 0.755303	valid_0's auc: 0.713897
[124]	training's auc: 0.755303	valid_0's auc: 0.713897
[125]	training's auc: 0.755298	valid_0's auc: 0.713894
[126]	training's auc: 0.755303	valid_0's auc: 0.713897
[127]	training's auc: 0.755298	valid_0's auc: 0.713894
[128]	training's auc: 0.755307	valid_0's auc: 0.713855
[129]	training's auc: 0.755313	valid_0's auc: 0.713822
[130]	training's auc: 0.755316	valid_0's auc: 0.713716
[131]	training's auc: 0.755321	valid_0's auc: 0.713673
[132]	training's auc: 0.755317	valid_0's auc: 0.713716
[133]	training's auc: 0.755321	valid_0's auc: 0.713673
[134]	training's auc: 0.755321	valid_0's auc: 0.713673
[135]	training's auc: 0.755317	valid_0's auc: 0.713716
[136]	training's auc: 0.755321	valid_0's auc: 0.713673
[137]	training's auc: 0.755321	valid_0's auc: 0.713673
[138]	training's auc: 0.755323	valid_0's auc: 0.713762
[139]	training's auc: 0.755323	valid_0's auc: 0.713762
[140]	training's auc: 0.755323	valid_0's auc: 0.713762
[141]	trai

[133]	training's auc: 0.729202	valid_0's auc: 0.722798
[134]	training's auc: 0.729202	valid_0's auc: 0.722798
[135]	training's auc: 0.729202	valid_0's auc: 0.722798
[136]	training's auc: 0.729202	valid_0's auc: 0.722798
[137]	training's auc: 0.729202	valid_0's auc: 0.722798
[138]	training's auc: 0.729202	valid_0's auc: 0.722798
[139]	training's auc: 0.729202	valid_0's auc: 0.722798
[140]	training's auc: 0.729202	valid_0's auc: 0.722798
[141]	training's auc: 0.729202	valid_0's auc: 0.722798
[142]	training's auc: 0.729202	valid_0's auc: 0.722798
[143]	training's auc: 0.729202	valid_0's auc: 0.722798
[144]	training's auc: 0.729202	valid_0's auc: 0.722798
[145]	training's auc: 0.729202	valid_0's auc: 0.722798
[146]	training's auc: 0.729202	valid_0's auc: 0.722798
[147]	training's auc: 0.729202	valid_0's auc: 0.722798
[148]	training's auc: 0.729202	valid_0's auc: 0.722798
[149]	training's auc: 0.729202	valid_0's auc: 0.722798
[150]	training's auc: 0.729202	valid_0's auc: 0.722798
[151]	trai

[86]	training's auc: 0.585862	valid_0's auc: 0.576556
[87]	training's auc: 0.585862	valid_0's auc: 0.576556
[88]	training's auc: 0.585862	valid_0's auc: 0.576556
[89]	training's auc: 0.585862	valid_0's auc: 0.576556
[90]	training's auc: 0.585862	valid_0's auc: 0.576556
[91]	training's auc: 0.585862	valid_0's auc: 0.576556
[92]	training's auc: 0.585862	valid_0's auc: 0.576556
[93]	training's auc: 0.585862	valid_0's auc: 0.576556
[94]	training's auc: 0.585862	valid_0's auc: 0.576556
[95]	training's auc: 0.585862	valid_0's auc: 0.576556
[96]	training's auc: 0.585862	valid_0's auc: 0.576556
[97]	training's auc: 0.585862	valid_0's auc: 0.576556
[98]	training's auc: 0.585862	valid_0's auc: 0.576556
[99]	training's auc: 0.585862	valid_0's auc: 0.576556
[100]	training's auc: 0.585862	valid_0's auc: 0.576556
[101]	training's auc: 0.585862	valid_0's auc: 0.576556
Early stopping, best iteration is:
[1]	training's auc: 0.585862	valid_0's auc: 0.576556
********************


city_name_idcard_cnt_me

[74]	training's auc: 0.638736	valid_0's auc: 0.627342
[75]	training's auc: 0.638736	valid_0's auc: 0.627342
[76]	training's auc: 0.638736	valid_0's auc: 0.627342
[77]	training's auc: 0.638736	valid_0's auc: 0.627342
[78]	training's auc: 0.638736	valid_0's auc: 0.627342
[79]	training's auc: 0.638736	valid_0's auc: 0.627342
[80]	training's auc: 0.638736	valid_0's auc: 0.627342
[81]	training's auc: 0.638736	valid_0's auc: 0.627342
[82]	training's auc: 0.638736	valid_0's auc: 0.627342
[83]	training's auc: 0.638736	valid_0's auc: 0.627342
[84]	training's auc: 0.638736	valid_0's auc: 0.627342
[85]	training's auc: 0.638736	valid_0's auc: 0.627342
[86]	training's auc: 0.638736	valid_0's auc: 0.627342
[87]	training's auc: 0.638736	valid_0's auc: 0.627342
[88]	training's auc: 0.638736	valid_0's auc: 0.627342
[89]	training's auc: 0.638736	valid_0's auc: 0.627342
[90]	training's auc: 0.638736	valid_0's auc: 0.627342
[91]	training's auc: 0.638736	valid_0's auc: 0.627342
[92]	training's auc: 0.63873

[55]	training's auc: 0.610386	valid_0's auc: 0.594856
[56]	training's auc: 0.610386	valid_0's auc: 0.594856
[57]	training's auc: 0.610386	valid_0's auc: 0.594856
[58]	training's auc: 0.610386	valid_0's auc: 0.594856
[59]	training's auc: 0.610386	valid_0's auc: 0.594856
[60]	training's auc: 0.610386	valid_0's auc: 0.594856
[61]	training's auc: 0.610386	valid_0's auc: 0.594856
[62]	training's auc: 0.610386	valid_0's auc: 0.594856
[63]	training's auc: 0.610386	valid_0's auc: 0.594856
[64]	training's auc: 0.610386	valid_0's auc: 0.594856
[65]	training's auc: 0.610386	valid_0's auc: 0.594856
[66]	training's auc: 0.610386	valid_0's auc: 0.594856
[67]	training's auc: 0.610386	valid_0's auc: 0.594856
[68]	training's auc: 0.610386	valid_0's auc: 0.594856
[69]	training's auc: 0.610386	valid_0's auc: 0.594856
[70]	training's auc: 0.610386	valid_0's auc: 0.594856
[71]	training's auc: 0.610386	valid_0's auc: 0.594856
[72]	training's auc: 0.610386	valid_0's auc: 0.594856
[73]	training's auc: 0.61038

[3]	training's auc: 0.838929	valid_0's auc: 0.787079
[4]	training's auc: 0.840119	valid_0's auc: 0.790451
[5]	training's auc: 0.840256	valid_0's auc: 0.789837
[6]	training's auc: 0.841841	valid_0's auc: 0.787631
[7]	training's auc: 0.842453	valid_0's auc: 0.787847
[8]	training's auc: 0.842631	valid_0's auc: 0.787326
[9]	training's auc: 0.842683	valid_0's auc: 0.787365
[10]	training's auc: 0.842917	valid_0's auc: 0.787637
[11]	training's auc: 0.84383	valid_0's auc: 0.787302
[12]	training's auc: 0.844723	valid_0's auc: 0.788795
[13]	training's auc: 0.845007	valid_0's auc: 0.787784
[14]	training's auc: 0.845803	valid_0's auc: 0.789681
[15]	training's auc: 0.846306	valid_0's auc: 0.78832
[16]	training's auc: 0.8467	valid_0's auc: 0.78908
[17]	training's auc: 0.846908	valid_0's auc: 0.789394
[18]	training's auc: 0.847065	valid_0's auc: 0.78904
[19]	training's auc: 0.847271	valid_0's auc: 0.78922
[20]	training's auc: 0.847361	valid_0's auc: 0.789756
[21]	training's auc: 0.847646	valid_0's au

[217]	training's auc: 0.861892	valid_0's auc: 0.792771
[218]	training's auc: 0.861871	valid_0's auc: 0.792837
[219]	training's auc: 0.861909	valid_0's auc: 0.7929
[220]	training's auc: 0.861902	valid_0's auc: 0.792908
[221]	training's auc: 0.8619	valid_0's auc: 0.792984
[222]	training's auc: 0.861903	valid_0's auc: 0.792889
[223]	training's auc: 0.861912	valid_0's auc: 0.792815
[224]	training's auc: 0.861923	valid_0's auc: 0.792825
[225]	training's auc: 0.861954	valid_0's auc: 0.792887
[226]	training's auc: 0.861942	valid_0's auc: 0.792823
[227]	training's auc: 0.861972	valid_0's auc: 0.792784
[228]	training's auc: 0.861985	valid_0's auc: 0.792821
[229]	training's auc: 0.862002	valid_0's auc: 0.792889
[230]	training's auc: 0.862016	valid_0's auc: 0.792906
Early stopping, best iteration is:
[130]	training's auc: 0.860054	valid_0's auc: 0.793463
********************


voc_day
[1]	training's auc: 0.837525	valid_0's auc: 0.81743
Training until validation scores don't improve for 100 rounds

********************


voc_dayofweek
[1]	training's auc: 0.809443	valid_0's auc: 0.792452
Training until validation scores don't improve for 100 rounds
[2]	training's auc: 0.809443	valid_0's auc: 0.792452
[3]	training's auc: 0.809443	valid_0's auc: 0.792452
[4]	training's auc: 0.809443	valid_0's auc: 0.792452
[5]	training's auc: 0.809443	valid_0's auc: 0.792452
[6]	training's auc: 0.809443	valid_0's auc: 0.792452
[7]	training's auc: 0.809443	valid_0's auc: 0.792452
[8]	training's auc: 0.809443	valid_0's auc: 0.792452
[9]	training's auc: 0.809443	valid_0's auc: 0.792452
[10]	training's auc: 0.809443	valid_0's auc: 0.792452
[11]	training's auc: 0.809443	valid_0's auc: 0.792452
[12]	training's auc: 0.809443	valid_0's auc: 0.792452
[13]	training's auc: 0.809443	valid_0's auc: 0.792452
[14]	training's auc: 0.809443	valid_0's auc: 0.792452
[15]	training's auc: 0.809443	valid_0's auc: 0.792452
[16]	training's auc: 0.809443	valid_0's auc: 0.792452
[17]	training's auc: 0.809443	valid_0's auc: 0

[46]	training's auc: 0.905599	valid_0's auc: 0.877674
[47]	training's auc: 0.905621	valid_0's auc: 0.877422
[48]	training's auc: 0.905625	valid_0's auc: 0.877441
[49]	training's auc: 0.905621	valid_0's auc: 0.877422
[50]	training's auc: 0.905625	valid_0's auc: 0.877437
[51]	training's auc: 0.905633	valid_0's auc: 0.877422
[52]	training's auc: 0.905671	valid_0's auc: 0.877056
[53]	training's auc: 0.905833	valid_0's auc: 0.877257
[54]	training's auc: 0.905841	valid_0's auc: 0.877197
[55]	training's auc: 0.905855	valid_0's auc: 0.877265
[56]	training's auc: 0.905855	valid_0's auc: 0.877254
[57]	training's auc: 0.905855	valid_0's auc: 0.877254
[58]	training's auc: 0.905866	valid_0's auc: 0.877281
[59]	training's auc: 0.905863	valid_0's auc: 0.877236
[60]	training's auc: 0.905871	valid_0's auc: 0.87728
[61]	training's auc: 0.905871	valid_0's auc: 0.87728
[62]	training's auc: 0.905875	valid_0's auc: 0.877291
[63]	training's auc: 0.905888	valid_0's auc: 0.8772
[64]	training's auc: 0.905889	va

[8]	training's auc: 0.922214	valid_0's auc: 0.890772
[9]	training's auc: 0.922657	valid_0's auc: 0.891751
[10]	training's auc: 0.923295	valid_0's auc: 0.893248
[11]	training's auc: 0.923687	valid_0's auc: 0.893801
[12]	training's auc: 0.923875	valid_0's auc: 0.893483
[13]	training's auc: 0.924522	valid_0's auc: 0.893345
[14]	training's auc: 0.924822	valid_0's auc: 0.893362
[15]	training's auc: 0.925296	valid_0's auc: 0.893624
[16]	training's auc: 0.925466	valid_0's auc: 0.893172
[17]	training's auc: 0.925639	valid_0's auc: 0.893662
[18]	training's auc: 0.925871	valid_0's auc: 0.894294
[19]	training's auc: 0.92625	valid_0's auc: 0.894447
[20]	training's auc: 0.927065	valid_0's auc: 0.89471
[21]	training's auc: 0.927232	valid_0's auc: 0.895138
[22]	training's auc: 0.927383	valid_0's auc: 0.895058
[23]	training's auc: 0.927624	valid_0's auc: 0.894924
[24]	training's auc: 0.927715	valid_0's auc: 0.894912
[25]	training's auc: 0.927891	valid_0's auc: 0.894859
[26]	training's auc: 0.928045	va

[37]	training's auc: 0.908558	valid_0's auc: 0.875546
[38]	training's auc: 0.908628	valid_0's auc: 0.875529
[39]	training's auc: 0.908773	valid_0's auc: 0.875562
[40]	training's auc: 0.908828	valid_0's auc: 0.875396
[41]	training's auc: 0.909215	valid_0's auc: 0.875097
[42]	training's auc: 0.909261	valid_0's auc: 0.875043
[43]	training's auc: 0.909325	valid_0's auc: 0.875097
[44]	training's auc: 0.90939	valid_0's auc: 0.875078
[45]	training's auc: 0.909466	valid_0's auc: 0.874832
[46]	training's auc: 0.909671	valid_0's auc: 0.875279
[47]	training's auc: 0.909786	valid_0's auc: 0.875351
[48]	training's auc: 0.909875	valid_0's auc: 0.874968
[49]	training's auc: 0.910001	valid_0's auc: 0.874759
[50]	training's auc: 0.910081	valid_0's auc: 0.87453
[51]	training's auc: 0.910181	valid_0's auc: 0.874246
[52]	training's auc: 0.910339	valid_0's auc: 0.874201
[53]	training's auc: 0.910395	valid_0's auc: 0.873826
[54]	training's auc: 0.910506	valid_0's auc: 0.873773
[55]	training's auc: 0.910579	

********************


voc_hour_count_min
[1]	training's auc: 0.838763	valid_0's auc: 0.823697
Training until validation scores don't improve for 100 rounds
[2]	training's auc: 0.838763	valid_0's auc: 0.823697
[3]	training's auc: 0.838763	valid_0's auc: 0.823697
[4]	training's auc: 0.838763	valid_0's auc: 0.823697
[5]	training's auc: 0.838763	valid_0's auc: 0.823697
[6]	training's auc: 0.838763	valid_0's auc: 0.823697
[7]	training's auc: 0.839446	valid_0's auc: 0.826286
[8]	training's auc: 0.839446	valid_0's auc: 0.826286
[9]	training's auc: 0.839446	valid_0's auc: 0.826286
[10]	training's auc: 0.839446	valid_0's auc: 0.826286
[11]	training's auc: 0.839446	valid_0's auc: 0.826286
[12]	training's auc: 0.839446	valid_0's auc: 0.826286
[13]	training's auc: 0.839446	valid_0's auc: 0.826286
[14]	training's auc: 0.839446	valid_0's auc: 0.826286
[15]	training's auc: 0.839446	valid_0's auc: 0.826286
[16]	training's auc: 0.839446	valid_0's auc: 0.826286
[17]	training's auc: 0.839446	valid_0's a

[75]	training's auc: 0.901689	valid_0's auc: 0.84984
[76]	training's auc: 0.90172	valid_0's auc: 0.849725
[77]	training's auc: 0.901796	valid_0's auc: 0.849419
[78]	training's auc: 0.901911	valid_0's auc: 0.849338
[79]	training's auc: 0.901946	valid_0's auc: 0.849406
[80]	training's auc: 0.902127	valid_0's auc: 0.849002
[81]	training's auc: 0.902227	valid_0's auc: 0.849325
[82]	training's auc: 0.902268	valid_0's auc: 0.849209
[83]	training's auc: 0.902309	valid_0's auc: 0.849101
[84]	training's auc: 0.902495	valid_0's auc: 0.848502
[85]	training's auc: 0.9025	valid_0's auc: 0.848362
[86]	training's auc: 0.902583	valid_0's auc: 0.8488
[87]	training's auc: 0.902621	valid_0's auc: 0.848792
[88]	training's auc: 0.902636	valid_0's auc: 0.848726
[89]	training's auc: 0.902723	valid_0's auc: 0.848333
[90]	training's auc: 0.902731	valid_0's auc: 0.848401
[91]	training's auc: 0.902817	valid_0's auc: 0.848271
[92]	training's auc: 0.902898	valid_0's auc: 0.848246
[93]	training's auc: 0.90295	valid

[1]	training's auc: 0.825545	valid_0's auc: 0.816535
Training until validation scores don't improve for 100 rounds
[2]	training's auc: 0.825545	valid_0's auc: 0.816535
[3]	training's auc: 0.825576	valid_0's auc: 0.816557
[4]	training's auc: 0.825576	valid_0's auc: 0.816557
[5]	training's auc: 0.825576	valid_0's auc: 0.816557
[6]	training's auc: 0.825576	valid_0's auc: 0.816557
[7]	training's auc: 0.825576	valid_0's auc: 0.816557
[8]	training's auc: 0.825576	valid_0's auc: 0.816557
[9]	training's auc: 0.825576	valid_0's auc: 0.816557
[10]	training's auc: 0.825576	valid_0's auc: 0.816557
[11]	training's auc: 0.825576	valid_0's auc: 0.816557
[12]	training's auc: 0.825578	valid_0's auc: 0.816617
[13]	training's auc: 0.825578	valid_0's auc: 0.816617
[14]	training's auc: 0.825578	valid_0's auc: 0.816617
[15]	training's auc: 0.825578	valid_0's auc: 0.816617
[16]	training's auc: 0.825578	valid_0's auc: 0.816617
[17]	training's auc: 0.825578	valid_0's auc: 0.816617
[18]	training's auc: 0.825578

[91]	training's auc: 0.879864	valid_0's auc: 0.824433
[92]	training's auc: 0.879995	valid_0's auc: 0.824361
[93]	training's auc: 0.880056	valid_0's auc: 0.824539
[94]	training's auc: 0.880184	valid_0's auc: 0.82434
[95]	training's auc: 0.880269	valid_0's auc: 0.824192
[96]	training's auc: 0.880364	valid_0's auc: 0.824395
[97]	training's auc: 0.880489	valid_0's auc: 0.824291
[98]	training's auc: 0.880474	valid_0's auc: 0.824053
[99]	training's auc: 0.880542	valid_0's auc: 0.823985
[100]	training's auc: 0.880661	valid_0's auc: 0.824086
[101]	training's auc: 0.88079	valid_0's auc: 0.824074
[102]	training's auc: 0.88096	valid_0's auc: 0.824212
[103]	training's auc: 0.881048	valid_0's auc: 0.82433
[104]	training's auc: 0.881145	valid_0's auc: 0.824402
[105]	training's auc: 0.881173	valid_0's auc: 0.824441
[106]	training's auc: 0.88127	valid_0's auc: 0.824601
[107]	training's auc: 0.881254	valid_0's auc: 0.82445
[108]	training's auc: 0.881364	valid_0's auc: 0.824431
Early stopping, best iter

[94]	training's auc: 0.883559	valid_0's auc: 0.824854
[95]	training's auc: 0.88361	valid_0's auc: 0.824642
[96]	training's auc: 0.883718	valid_0's auc: 0.824599
[97]	training's auc: 0.88378	valid_0's auc: 0.824388
[98]	training's auc: 0.883805	valid_0's auc: 0.824468
[99]	training's auc: 0.883856	valid_0's auc: 0.824368
[100]	training's auc: 0.883931	valid_0's auc: 0.824329
[101]	training's auc: 0.883968	valid_0's auc: 0.824258
Early stopping, best iteration is:
[1]	training's auc: 0.868108	valid_0's auc: 0.840093
********************


voc_count_mutual
[1]	training's auc: 0.862369	valid_0's auc: 0.853989
Training until validation scores don't improve for 100 rounds
[2]	training's auc: 0.862463	valid_0's auc: 0.854614
[3]	training's auc: 0.86294	valid_0's auc: 0.853384
[4]	training's auc: 0.863126	valid_0's auc: 0.853507
[5]	training's auc: 0.863392	valid_0's auc: 0.853578
[6]	training's auc: 0.863411	valid_0's auc: 0.853692
[7]	training's auc: 0.863545	valid_0's auc: 0.85385
[8]	train

[74]	training's auc: 0.831052	valid_0's auc: 0.833784
[75]	training's auc: 0.831051	valid_0's auc: 0.833799
[76]	training's auc: 0.831052	valid_0's auc: 0.833784
[77]	training's auc: 0.831052	valid_0's auc: 0.833784
[78]	training's auc: 0.831052	valid_0's auc: 0.833784
[79]	training's auc: 0.831052	valid_0's auc: 0.833784
[80]	training's auc: 0.831052	valid_0's auc: 0.833784
[81]	training's auc: 0.831052	valid_0's auc: 0.833784
[82]	training's auc: 0.831052	valid_0's auc: 0.833784
[83]	training's auc: 0.831052	valid_0's auc: 0.833784
[84]	training's auc: 0.831052	valid_0's auc: 0.833784
[85]	training's auc: 0.831052	valid_0's auc: 0.833784
[86]	training's auc: 0.831052	valid_0's auc: 0.833784
[87]	training's auc: 0.831052	valid_0's auc: 0.833784
[88]	training's auc: 0.831052	valid_0's auc: 0.833784
[89]	training's auc: 0.831052	valid_0's auc: 0.833784
[90]	training's auc: 0.831052	valid_0's auc: 0.833784
[91]	training's auc: 0.831052	valid_0's auc: 0.833784
[92]	training's auc: 0.83105

[78]	training's auc: 0.888777	valid_0's auc: 0.8458
[79]	training's auc: 0.888934	valid_0's auc: 0.845855
[80]	training's auc: 0.889007	valid_0's auc: 0.845857
[81]	training's auc: 0.889087	valid_0's auc: 0.845794
[82]	training's auc: 0.88937	valid_0's auc: 0.845156
[83]	training's auc: 0.889391	valid_0's auc: 0.845175
[84]	training's auc: 0.889447	valid_0's auc: 0.845251
[85]	training's auc: 0.889535	valid_0's auc: 0.845137
[86]	training's auc: 0.889668	valid_0's auc: 0.845003
[87]	training's auc: 0.88976	valid_0's auc: 0.845213
[88]	training's auc: 0.889812	valid_0's auc: 0.845167
[89]	training's auc: 0.889811	valid_0's auc: 0.845317
[90]	training's auc: 0.889835	valid_0's auc: 0.845162
[91]	training's auc: 0.889915	valid_0's auc: 0.845257
[92]	training's auc: 0.889976	valid_0's auc: 0.845197
[93]	training's auc: 0.890025	valid_0's auc: 0.845198
[94]	training's auc: 0.890033	valid_0's auc: 0.845206
[95]	training's auc: 0.890096	valid_0's auc: 0.84515
[96]	training's auc: 0.89014	vali

[41]	training's auc: 0.834657	valid_0's auc: 0.807613
[42]	training's auc: 0.834657	valid_0's auc: 0.807613
[43]	training's auc: 0.834657	valid_0's auc: 0.807613
[44]	training's auc: 0.834657	valid_0's auc: 0.807613
[45]	training's auc: 0.834657	valid_0's auc: 0.807613
[46]	training's auc: 0.834657	valid_0's auc: 0.807613
[47]	training's auc: 0.834657	valid_0's auc: 0.807613
[48]	training's auc: 0.834658	valid_0's auc: 0.807625
[49]	training's auc: 0.834658	valid_0's auc: 0.807625
[50]	training's auc: 0.834659	valid_0's auc: 0.807602
[51]	training's auc: 0.834659	valid_0's auc: 0.807602
[52]	training's auc: 0.834659	valid_0's auc: 0.807602
[53]	training's auc: 0.834794	valid_0's auc: 0.807966
[54]	training's auc: 0.834794	valid_0's auc: 0.807966
[55]	training's auc: 0.834918	valid_0's auc: 0.808377
[56]	training's auc: 0.834918	valid_0's auc: 0.808377
[57]	training's auc: 0.834923	valid_0's auc: 0.808065
[58]	training's auc: 0.834923	valid_0's auc: 0.808065
[59]	training's auc: 0.83492

[5]	training's auc: 0.88216	valid_0's auc: 0.834333
[6]	training's auc: 0.88298	valid_0's auc: 0.834627
[7]	training's auc: 0.883234	valid_0's auc: 0.834847
[8]	training's auc: 0.883663	valid_0's auc: 0.835062
[9]	training's auc: 0.883935	valid_0's auc: 0.835898
[10]	training's auc: 0.884259	valid_0's auc: 0.834586
[11]	training's auc: 0.884942	valid_0's auc: 0.834755
[12]	training's auc: 0.885328	valid_0's auc: 0.834785
[13]	training's auc: 0.885625	valid_0's auc: 0.835107
[14]	training's auc: 0.885842	valid_0's auc: 0.835181
[15]	training's auc: 0.885968	valid_0's auc: 0.834605
[16]	training's auc: 0.886473	valid_0's auc: 0.835216
[17]	training's auc: 0.886676	valid_0's auc: 0.834677
[18]	training's auc: 0.886816	valid_0's auc: 0.834203
[19]	training's auc: 0.886938	valid_0's auc: 0.834551
[20]	training's auc: 0.887363	valid_0's auc: 0.834833
[21]	training's auc: 0.887538	valid_0's auc: 0.835074
[22]	training's auc: 0.887835	valid_0's auc: 0.83448
[23]	training's auc: 0.888154	valid_

[28]	training's auc: 0.873668	valid_0's auc: 0.818205
[29]	training's auc: 0.873742	valid_0's auc: 0.818075
[30]	training's auc: 0.873877	valid_0's auc: 0.817728
[31]	training's auc: 0.873941	valid_0's auc: 0.817931
[32]	training's auc: 0.873963	valid_0's auc: 0.818084
[33]	training's auc: 0.874181	valid_0's auc: 0.81762
[34]	training's auc: 0.874577	valid_0's auc: 0.817797
[35]	training's auc: 0.874851	valid_0's auc: 0.817971
[36]	training's auc: 0.875014	valid_0's auc: 0.818372
[37]	training's auc: 0.875287	valid_0's auc: 0.818196
[38]	training's auc: 0.875301	valid_0's auc: 0.817984
[39]	training's auc: 0.875605	valid_0's auc: 0.817514
[40]	training's auc: 0.875705	valid_0's auc: 0.817638
[41]	training's auc: 0.875712	valid_0's auc: 0.817412
[42]	training's auc: 0.875785	valid_0's auc: 0.817803
[43]	training's auc: 0.876048	valid_0's auc: 0.817684
[44]	training's auc: 0.876115	valid_0's auc: 0.817677
[45]	training's auc: 0.876489	valid_0's auc: 0.816691
[46]	training's auc: 0.876567

[99]	training's auc: 0.803176	valid_0's auc: 0.794565
[100]	training's auc: 0.803176	valid_0's auc: 0.794565
[101]	training's auc: 0.803176	valid_0's auc: 0.794565
Early stopping, best iteration is:
[1]	training's auc: 0.803176	valid_0's auc: 0.794565
********************


sms_day
[1]	training's auc: 0.83682	valid_0's auc: 0.819007
Training until validation scores don't improve for 100 rounds
[2]	training's auc: 0.836816	valid_0's auc: 0.819904
[3]	training's auc: 0.836942	valid_0's auc: 0.819411
[4]	training's auc: 0.836942	valid_0's auc: 0.819411
[5]	training's auc: 0.836954	valid_0's auc: 0.819403
[6]	training's auc: 0.836954	valid_0's auc: 0.819403
[7]	training's auc: 0.836965	valid_0's auc: 0.819479
[8]	training's auc: 0.836965	valid_0's auc: 0.819479
[9]	training's auc: 0.836967	valid_0's auc: 0.819413
[10]	training's auc: 0.836977	valid_0's auc: 0.819568
[11]	training's auc: 0.836977	valid_0's auc: 0.819568
[12]	training's auc: 0.836979	valid_0's auc: 0.819268
[13]	training's a

[97]	training's auc: 0.810443	valid_0's auc: 0.805939
[98]	training's auc: 0.810443	valid_0's auc: 0.805939
[99]	training's auc: 0.810443	valid_0's auc: 0.805939
[100]	training's auc: 0.810443	valid_0's auc: 0.805939
[101]	training's auc: 0.810443	valid_0's auc: 0.805939
Early stopping, best iteration is:
[1]	training's auc: 0.810397	valid_0's auc: 0.806184
********************


sms_dayofweek
[1]	training's auc: 0.798064	valid_0's auc: 0.788722
Training until validation scores don't improve for 100 rounds
[2]	training's auc: 0.798064	valid_0's auc: 0.788722
[3]	training's auc: 0.798064	valid_0's auc: 0.788722
[4]	training's auc: 0.798064	valid_0's auc: 0.788722
[5]	training's auc: 0.798064	valid_0's auc: 0.788722
[6]	training's auc: 0.798064	valid_0's auc: 0.788722
[7]	training's auc: 0.798064	valid_0's auc: 0.788722
[8]	training's auc: 0.798064	valid_0's auc: 0.788722
[9]	training's auc: 0.798064	valid_0's auc: 0.788722
[10]	training's auc: 0.798064	valid_0's auc: 0.788722
[11]	train

[96]	training's auc: 0.88053	valid_0's auc: 0.859554
[97]	training's auc: 0.880529	valid_0's auc: 0.859504
[98]	training's auc: 0.880528	valid_0's auc: 0.859539
[99]	training's auc: 0.880535	valid_0's auc: 0.859517
[100]	training's auc: 0.88054	valid_0's auc: 0.859564
[101]	training's auc: 0.880546	valid_0's auc: 0.859502
[102]	training's auc: 0.880571	valid_0's auc: 0.859393
[103]	training's auc: 0.880579	valid_0's auc: 0.859324
Early stopping, best iteration is:
[3]	training's auc: 0.878358	valid_0's auc: 0.861444
********************


sms_day_count_min
[1]	training's auc: 0.847347	valid_0's auc: 0.83904
Training until validation scores don't improve for 100 rounds
[2]	training's auc: 0.847388	valid_0's auc: 0.838887
[3]	training's auc: 0.847388	valid_0's auc: 0.838887
[4]	training's auc: 0.847388	valid_0's auc: 0.838887
[5]	training's auc: 0.847388	valid_0's auc: 0.838887
[6]	training's auc: 0.847388	valid_0's auc: 0.838887
[7]	training's auc: 0.847388	valid_0's auc: 0.838887
[8]	t

[69]	training's auc: 0.908457	valid_0's auc: 0.871901
[70]	training's auc: 0.90858	valid_0's auc: 0.871833
[71]	training's auc: 0.908644	valid_0's auc: 0.871728
[72]	training's auc: 0.908655	valid_0's auc: 0.871744
[73]	training's auc: 0.908786	valid_0's auc: 0.871322
[74]	training's auc: 0.908879	valid_0's auc: 0.871217
[75]	training's auc: 0.908987	valid_0's auc: 0.87133
[76]	training's auc: 0.909074	valid_0's auc: 0.871072
[77]	training's auc: 0.909109	valid_0's auc: 0.870956
[78]	training's auc: 0.909155	valid_0's auc: 0.870673
[79]	training's auc: 0.909211	valid_0's auc: 0.870906
[80]	training's auc: 0.909224	valid_0's auc: 0.870664
[81]	training's auc: 0.909324	valid_0's auc: 0.87079
[82]	training's auc: 0.909414	valid_0's auc: 0.870692
[83]	training's auc: 0.909465	valid_0's auc: 0.870618
[84]	training's auc: 0.9095	valid_0's auc: 0.870312
[85]	training's auc: 0.909518	valid_0's auc: 0.870248
[86]	training's auc: 0.909587	valid_0's auc: 0.870144
[87]	training's auc: 0.909643	val

[71]	training's auc: 0.853702	valid_0's auc: 0.812195
[72]	training's auc: 0.853756	valid_0's auc: 0.812299
[73]	training's auc: 0.853816	valid_0's auc: 0.812177
[74]	training's auc: 0.853818	valid_0's auc: 0.812462
[75]	training's auc: 0.853804	valid_0's auc: 0.812321
[76]	training's auc: 0.853899	valid_0's auc: 0.812305
[77]	training's auc: 0.853872	valid_0's auc: 0.811963
[78]	training's auc: 0.853939	valid_0's auc: 0.812261
[79]	training's auc: 0.853988	valid_0's auc: 0.812104
[80]	training's auc: 0.854054	valid_0's auc: 0.812222
[81]	training's auc: 0.854096	valid_0's auc: 0.812152
[82]	training's auc: 0.854113	valid_0's auc: 0.812237
[83]	training's auc: 0.854124	valid_0's auc: 0.81221
[84]	training's auc: 0.854155	valid_0's auc: 0.812241
[85]	training's auc: 0.854161	valid_0's auc: 0.812179
[86]	training's auc: 0.854193	valid_0's auc: 0.811736
[87]	training's auc: 0.854236	valid_0's auc: 0.811531
[88]	training's auc: 0.854227	valid_0's auc: 0.811734
[89]	training's auc: 0.854295

[162]	training's auc: 0.810571	valid_0's auc: 0.796536
[163]	training's auc: 0.810571	valid_0's auc: 0.796536
[164]	training's auc: 0.810571	valid_0's auc: 0.796536
[165]	training's auc: 0.810571	valid_0's auc: 0.796536
[166]	training's auc: 0.810571	valid_0's auc: 0.796536
[167]	training's auc: 0.810571	valid_0's auc: 0.796536
[168]	training's auc: 0.810571	valid_0's auc: 0.796536
[169]	training's auc: 0.810571	valid_0's auc: 0.796536
[170]	training's auc: 0.810571	valid_0's auc: 0.796536
[171]	training's auc: 0.810571	valid_0's auc: 0.796536
[172]	training's auc: 0.810571	valid_0's auc: 0.796536
[173]	training's auc: 0.810571	valid_0's auc: 0.796536
[174]	training's auc: 0.810571	valid_0's auc: 0.796536
[175]	training's auc: 0.810571	valid_0's auc: 0.796536
[176]	training's auc: 0.810571	valid_0's auc: 0.796536
[177]	training's auc: 0.810571	valid_0's auc: 0.796536
[178]	training's auc: 0.810571	valid_0's auc: 0.796536
[179]	training's auc: 0.810571	valid_0's auc: 0.796536
[180]	trai

[27]	training's auc: 0.863257	valid_0's auc: 0.80822
[28]	training's auc: 0.863318	valid_0's auc: 0.807973
[29]	training's auc: 0.863569	valid_0's auc: 0.807756
[30]	training's auc: 0.863887	valid_0's auc: 0.808274
[31]	training's auc: 0.864014	valid_0's auc: 0.808174
[32]	training's auc: 0.864236	valid_0's auc: 0.808282
[33]	training's auc: 0.864473	valid_0's auc: 0.808507
[34]	training's auc: 0.864754	valid_0's auc: 0.808106
[35]	training's auc: 0.864866	valid_0's auc: 0.807972
[36]	training's auc: 0.865007	valid_0's auc: 0.807744
[37]	training's auc: 0.865263	valid_0's auc: 0.80774
[38]	training's auc: 0.865488	valid_0's auc: 0.80772
[39]	training's auc: 0.865641	valid_0's auc: 0.807377
[40]	training's auc: 0.865787	valid_0's auc: 0.807027
[41]	training's auc: 0.865871	valid_0's auc: 0.806646
[42]	training's auc: 0.865989	valid_0's auc: 0.806643
[43]	training's auc: 0.866051	valid_0's auc: 0.80627
[44]	training's auc: 0.866148	valid_0's auc: 0.80618
[45]	training's auc: 0.866201	val

[73]	training's auc: 0.859322	valid_0's auc: 0.804328
[74]	training's auc: 0.859326	valid_0's auc: 0.804249
[75]	training's auc: 0.859374	valid_0's auc: 0.804191
[76]	training's auc: 0.859402	valid_0's auc: 0.804292
[77]	training's auc: 0.859438	valid_0's auc: 0.804038
[78]	training's auc: 0.859463	valid_0's auc: 0.804319
[79]	training's auc: 0.859489	valid_0's auc: 0.804286
[80]	training's auc: 0.859575	valid_0's auc: 0.80439
[81]	training's auc: 0.859601	valid_0's auc: 0.804195
[82]	training's auc: 0.859596	valid_0's auc: 0.804299
[83]	training's auc: 0.859614	valid_0's auc: 0.804392
[84]	training's auc: 0.859661	valid_0's auc: 0.804137
[85]	training's auc: 0.859678	valid_0's auc: 0.80419
[86]	training's auc: 0.859724	valid_0's auc: 0.804194
[87]	training's auc: 0.859762	valid_0's auc: 0.804005
[88]	training's auc: 0.859815	valid_0's auc: 0.804087
[89]	training's auc: 0.859831	valid_0's auc: 0.804145
[90]	training's auc: 0.859944	valid_0's auc: 0.804087
[91]	training's auc: 0.859924	

[28]	training's auc: 0.86937	valid_0's auc: 0.823972
[29]	training's auc: 0.86964	valid_0's auc: 0.824119
[30]	training's auc: 0.869953	valid_0's auc: 0.823195
[31]	training's auc: 0.870201	valid_0's auc: 0.823336
[32]	training's auc: 0.870378	valid_0's auc: 0.823387
[33]	training's auc: 0.870381	valid_0's auc: 0.822934
[34]	training's auc: 0.870696	valid_0's auc: 0.822152
[35]	training's auc: 0.870882	valid_0's auc: 0.821697
[36]	training's auc: 0.8711	valid_0's auc: 0.821118
[37]	training's auc: 0.871146	valid_0's auc: 0.821496
[38]	training's auc: 0.871435	valid_0's auc: 0.821724
[39]	training's auc: 0.871774	valid_0's auc: 0.821891
[40]	training's auc: 0.87201	valid_0's auc: 0.821668
[41]	training's auc: 0.872034	valid_0's auc: 0.821127
[42]	training's auc: 0.872212	valid_0's auc: 0.821108
[43]	training's auc: 0.872298	valid_0's auc: 0.820974
[44]	training's auc: 0.87242	valid_0's auc: 0.820854
[45]	training's auc: 0.872611	valid_0's auc: 0.821129
[46]	training's auc: 0.872678	vali

[5]	training's auc: 0.859295	valid_0's auc: 0.810432
[6]	training's auc: 0.859753	valid_0's auc: 0.810487
[7]	training's auc: 0.859687	valid_0's auc: 0.811417
[8]	training's auc: 0.861937	valid_0's auc: 0.811633
[9]	training's auc: 0.862261	valid_0's auc: 0.8118
[10]	training's auc: 0.862761	valid_0's auc: 0.812302
[11]	training's auc: 0.863286	valid_0's auc: 0.811449
[12]	training's auc: 0.863516	valid_0's auc: 0.810951
[13]	training's auc: 0.863633	valid_0's auc: 0.810974
[14]	training's auc: 0.863755	valid_0's auc: 0.81136
[15]	training's auc: 0.864049	valid_0's auc: 0.811081
[16]	training's auc: 0.864391	valid_0's auc: 0.810807
[17]	training's auc: 0.864737	valid_0's auc: 0.810922
[18]	training's auc: 0.865056	valid_0's auc: 0.811057
[19]	training's auc: 0.865488	valid_0's auc: 0.810531
[20]	training's auc: 0.865772	valid_0's auc: 0.810806
[21]	training's auc: 0.866093	valid_0's auc: 0.810755
[22]	training's auc: 0.866404	valid_0's auc: 0.809787
[23]	training's auc: 0.866621	valid_

[52]	training's auc: 0.860651	valid_0's auc: 0.81509
[53]	training's auc: 0.860584	valid_0's auc: 0.815115
[54]	training's auc: 0.860799	valid_0's auc: 0.814923
[55]	training's auc: 0.86089	valid_0's auc: 0.81498
[56]	training's auc: 0.860879	valid_0's auc: 0.81488
[57]	training's auc: 0.860984	valid_0's auc: 0.815
[58]	training's auc: 0.861144	valid_0's auc: 0.814756
[59]	training's auc: 0.861269	valid_0's auc: 0.814719
[60]	training's auc: 0.861359	valid_0's auc: 0.814251
[61]	training's auc: 0.861387	valid_0's auc: 0.814191
[62]	training's auc: 0.861413	valid_0's auc: 0.81444
[63]	training's auc: 0.861603	valid_0's auc: 0.814454
[64]	training's auc: 0.861841	valid_0's auc: 0.814152
[65]	training's auc: 0.861754	valid_0's auc: 0.814436
[66]	training's auc: 0.861804	valid_0's auc: 0.814353
[67]	training's auc: 0.861914	valid_0's auc: 0.813976
[68]	training's auc: 0.861945	valid_0's auc: 0.81391
[69]	training's auc: 0.862125	valid_0's auc: 0.813676
[70]	training's auc: 0.862018	valid_0

[17]	training's auc: 0.791498	valid_0's auc: 0.788461
[18]	training's auc: 0.791498	valid_0's auc: 0.788461
[19]	training's auc: 0.791498	valid_0's auc: 0.788461
[20]	training's auc: 0.791498	valid_0's auc: 0.788461
[21]	training's auc: 0.791498	valid_0's auc: 0.788461
[22]	training's auc: 0.791498	valid_0's auc: 0.788461
[23]	training's auc: 0.791498	valid_0's auc: 0.788461
[24]	training's auc: 0.791498	valid_0's auc: 0.788461
[25]	training's auc: 0.791498	valid_0's auc: 0.788461
[26]	training's auc: 0.791498	valid_0's auc: 0.788461
[27]	training's auc: 0.791498	valid_0's auc: 0.788461
[28]	training's auc: 0.791498	valid_0's auc: 0.788461
[29]	training's auc: 0.791498	valid_0's auc: 0.788461
[30]	training's auc: 0.791498	valid_0's auc: 0.788461
[31]	training's auc: 0.791498	valid_0's auc: 0.788461
[32]	training's auc: 0.791498	valid_0's auc: 0.788461
[33]	training's auc: 0.791498	valid_0's auc: 0.788461
[34]	training's auc: 0.791498	valid_0's auc: 0.788461
[35]	training's auc: 0.79149

[19]	training's auc: 0.90401	valid_0's auc: 0.862616
[20]	training's auc: 0.904043	valid_0's auc: 0.862667
[21]	training's auc: 0.904135	valid_0's auc: 0.862564
[22]	training's auc: 0.904264	valid_0's auc: 0.86223
[23]	training's auc: 0.904264	valid_0's auc: 0.862325
[24]	training's auc: 0.904524	valid_0's auc: 0.861455
[25]	training's auc: 0.904701	valid_0's auc: 0.86138
[26]	training's auc: 0.904784	valid_0's auc: 0.861624
[27]	training's auc: 0.904941	valid_0's auc: 0.861738
[28]	training's auc: 0.905098	valid_0's auc: 0.862265
[29]	training's auc: 0.905201	valid_0's auc: 0.862323
[30]	training's auc: 0.905421	valid_0's auc: 0.862032
[31]	training's auc: 0.905574	valid_0's auc: 0.862305
[32]	training's auc: 0.905767	valid_0's auc: 0.86299
[33]	training's auc: 0.905934	valid_0's auc: 0.86285
[34]	training's auc: 0.906	valid_0's auc: 0.862459
[35]	training's auc: 0.906137	valid_0's auc: 0.862348
[36]	training's auc: 0.906271	valid_0's auc: 0.862352
[37]	training's auc: 0.90641	valid_0

[7]	training's auc: 0.89432	valid_0's auc: 0.865445
********************


sms_hour_diff_count_max
[1]	training's auc: 0.853876	valid_0's auc: 0.833573
Training until validation scores don't improve for 100 rounds
[2]	training's auc: 0.854616	valid_0's auc: 0.839588
[3]	training's auc: 0.854558	valid_0's auc: 0.839681
[4]	training's auc: 0.85468	valid_0's auc: 0.839765
[5]	training's auc: 0.854673	valid_0's auc: 0.840047
[6]	training's auc: 0.854739	valid_0's auc: 0.840389
[7]	training's auc: 0.854789	valid_0's auc: 0.840025
[8]	training's auc: 0.855314	valid_0's auc: 0.838885
[9]	training's auc: 0.85658	valid_0's auc: 0.839309
[10]	training's auc: 0.856791	valid_0's auc: 0.840745
[11]	training's auc: 0.856865	valid_0's auc: 0.840957
[12]	training's auc: 0.856902	valid_0's auc: 0.840866
[13]	training's auc: 0.856937	valid_0's auc: 0.840879
[14]	training's auc: 0.856943	valid_0's auc: 0.84096
[15]	training's auc: 0.856964	valid_0's auc: 0.840965
[16]	training's auc: 0.857057	valid_0's a

[112]	training's auc: 0.791327	valid_0's auc: 0.788248
[113]	training's auc: 0.791327	valid_0's auc: 0.788248
[114]	training's auc: 0.791327	valid_0's auc: 0.788248
[115]	training's auc: 0.791327	valid_0's auc: 0.788248
[116]	training's auc: 0.791327	valid_0's auc: 0.788248
[117]	training's auc: 0.791327	valid_0's auc: 0.788248
[118]	training's auc: 0.791327	valid_0's auc: 0.788248
[119]	training's auc: 0.791327	valid_0's auc: 0.788248
[120]	training's auc: 0.791327	valid_0's auc: 0.788248
[121]	training's auc: 0.791327	valid_0's auc: 0.788248
[122]	training's auc: 0.791327	valid_0's auc: 0.788248
[123]	training's auc: 0.791327	valid_0's auc: 0.788248
[124]	training's auc: 0.791327	valid_0's auc: 0.788248
[125]	training's auc: 0.791327	valid_0's auc: 0.788248
Early stopping, best iteration is:
[25]	training's auc: 0.791327	valid_0's auc: 0.788248
********************


sms_hour_diff_count_mean
[1]	training's auc: 0.86233	valid_0's auc: 0.834887
Training until validation scores don't im

[107]	training's auc: 0.876969	valid_0's auc: 0.82826
[108]	training's auc: 0.876992	valid_0's auc: 0.828341
Early stopping, best iteration is:
[8]	training's auc: 0.863944	valid_0's auc: 0.837382
********************


busi_name
[1]	training's auc: 0.790282	valid_0's auc: 0.790065
Training until validation scores don't improve for 100 rounds
[2]	training's auc: 0.790282	valid_0's auc: 0.790065
[3]	training's auc: 0.790282	valid_0's auc: 0.790065
[4]	training's auc: 0.790282	valid_0's auc: 0.790065
[5]	training's auc: 0.790282	valid_0's auc: 0.790065
[6]	training's auc: 0.790282	valid_0's auc: 0.790065
[7]	training's auc: 0.790282	valid_0's auc: 0.790065
[8]	training's auc: 0.790282	valid_0's auc: 0.790065
[9]	training's auc: 0.790282	valid_0's auc: 0.790065
[10]	training's auc: 0.790282	valid_0's auc: 0.790065
[11]	training's auc: 0.790282	valid_0's auc: 0.790065
[12]	training's auc: 0.790282	valid_0's auc: 0.790065
[13]	training's auc: 0.790735	valid_0's auc: 0.790065
[14]	training's

[28]	training's auc: 0.83575	valid_0's auc: 0.800747
[29]	training's auc: 0.835975	valid_0's auc: 0.800899
[30]	training's auc: 0.836209	valid_0's auc: 0.800842
[31]	training's auc: 0.836374	valid_0's auc: 0.80096
[32]	training's auc: 0.836518	valid_0's auc: 0.801137
[33]	training's auc: 0.836642	valid_0's auc: 0.801009
[34]	training's auc: 0.836769	valid_0's auc: 0.800801
[35]	training's auc: 0.836869	valid_0's auc: 0.800448
[36]	training's auc: 0.837036	valid_0's auc: 0.800042
[37]	training's auc: 0.837258	valid_0's auc: 0.800258
[38]	training's auc: 0.837461	valid_0's auc: 0.799732
[39]	training's auc: 0.837571	valid_0's auc: 0.799886
[40]	training's auc: 0.837652	valid_0's auc: 0.80034
[41]	training's auc: 0.837741	valid_0's auc: 0.799782
[42]	training's auc: 0.837904	valid_0's auc: 0.799585
[43]	training's auc: 0.838002	valid_0's auc: 0.799577
[44]	training's auc: 0.838081	valid_0's auc: 0.799866
[45]	training's auc: 0.838133	valid_0's auc: 0.799403
[46]	training's auc: 0.83829	va

[250]	training's auc: 0.844212	valid_0's auc: 0.801103
[251]	training's auc: 0.844219	valid_0's auc: 0.80112
[252]	training's auc: 0.844223	valid_0's auc: 0.80107
[253]	training's auc: 0.844224	valid_0's auc: 0.801147
[254]	training's auc: 0.844221	valid_0's auc: 0.801052
[255]	training's auc: 0.844219	valid_0's auc: 0.800998
[256]	training's auc: 0.844227	valid_0's auc: 0.801136
[257]	training's auc: 0.844238	valid_0's auc: 0.801169
[258]	training's auc: 0.844233	valid_0's auc: 0.801167
[259]	training's auc: 0.844237	valid_0's auc: 0.801093
[260]	training's auc: 0.844237	valid_0's auc: 0.80112
[261]	training's auc: 0.844244	valid_0's auc: 0.801047
[262]	training's auc: 0.84425	valid_0's auc: 0.801014
[263]	training's auc: 0.84426	valid_0's auc: 0.801085
[264]	training's auc: 0.844259	valid_0's auc: 0.801056
[265]	training's auc: 0.844268	valid_0's auc: 0.80101
[266]	training's auc: 0.844263	valid_0's auc: 0.801027
[267]	training's auc: 0.844264	valid_0's auc: 0.801019
[268]	training's

[49]	training's auc: 0.83303	valid_0's auc: 0.800363
[50]	training's auc: 0.833067	valid_0's auc: 0.800608
[51]	training's auc: 0.83319	valid_0's auc: 0.800502
[52]	training's auc: 0.833225	valid_0's auc: 0.800538
[53]	training's auc: 0.833381	valid_0's auc: 0.800405
[54]	training's auc: 0.833436	valid_0's auc: 0.800294
[55]	training's auc: 0.833501	valid_0's auc: 0.800309
[56]	training's auc: 0.833603	valid_0's auc: 0.800471
[57]	training's auc: 0.833731	valid_0's auc: 0.800208
[58]	training's auc: 0.833753	valid_0's auc: 0.800194
[59]	training's auc: 0.833805	valid_0's auc: 0.800235
[60]	training's auc: 0.833883	valid_0's auc: 0.800146
[61]	training's auc: 0.83397	valid_0's auc: 0.800192
[62]	training's auc: 0.833995	valid_0's auc: 0.80002
[63]	training's auc: 0.834031	valid_0's auc: 0.800045
[64]	training's auc: 0.834087	valid_0's auc: 0.799921
[65]	training's auc: 0.834115	valid_0's auc: 0.800054
[66]	training's auc: 0.834181	valid_0's auc: 0.799891
[67]	training's auc: 0.834275	va

[75]	training's auc: 0.831929	valid_0's auc: 0.794723
[76]	training's auc: 0.832018	valid_0's auc: 0.794678
[77]	training's auc: 0.832115	valid_0's auc: 0.794856
[78]	training's auc: 0.83214	valid_0's auc: 0.79494
[79]	training's auc: 0.832246	valid_0's auc: 0.794872
[80]	training's auc: 0.832292	valid_0's auc: 0.794829
[81]	training's auc: 0.832456	valid_0's auc: 0.795002
[82]	training's auc: 0.83246	valid_0's auc: 0.794961
[83]	training's auc: 0.832526	valid_0's auc: 0.794813
[84]	training's auc: 0.832578	valid_0's auc: 0.795032
[85]	training's auc: 0.832695	valid_0's auc: 0.794819
[86]	training's auc: 0.832744	valid_0's auc: 0.794784
[87]	training's auc: 0.832765	valid_0's auc: 0.794737
[88]	training's auc: 0.832834	valid_0's auc: 0.794637
[89]	training's auc: 0.83294	valid_0's auc: 0.794443
[90]	training's auc: 0.833036	valid_0's auc: 0.794238
[91]	training's auc: 0.83305	valid_0's auc: 0.794283
[92]	training's auc: 0.833124	valid_0's auc: 0.794375
[93]	training's auc: 0.833194	val

[125]	training's auc: 0.836152	valid_0's auc: 0.784455
[126]	training's auc: 0.836178	valid_0's auc: 0.784581
[127]	training's auc: 0.836226	valid_0's auc: 0.78459
[128]	training's auc: 0.836313	valid_0's auc: 0.78458
[129]	training's auc: 0.83632	valid_0's auc: 0.784526
[130]	training's auc: 0.83635	valid_0's auc: 0.784652
[131]	training's auc: 0.836355	valid_0's auc: 0.784569
[132]	training's auc: 0.836398	valid_0's auc: 0.784693
[133]	training's auc: 0.8364	valid_0's auc: 0.784798
[134]	training's auc: 0.836423	valid_0's auc: 0.784629
[135]	training's auc: 0.836442	valid_0's auc: 0.784767
[136]	training's auc: 0.836458	valid_0's auc: 0.784777
[137]	training's auc: 0.836475	valid_0's auc: 0.785028
[138]	training's auc: 0.836516	valid_0's auc: 0.78503
[139]	training's auc: 0.836545	valid_0's auc: 0.784995
[140]	training's auc: 0.836585	valid_0's auc: 0.784908
[141]	training's auc: 0.836593	valid_0's auc: 0.784897
[142]	training's auc: 0.836608	valid_0's auc: 0.784856
[143]	training's 

[274]	training's auc: 0.837547	valid_0's auc: 0.785672
[275]	training's auc: 0.837548	valid_0's auc: 0.785711
[276]	training's auc: 0.837551	valid_0's auc: 0.785693
[277]	training's auc: 0.837554	valid_0's auc: 0.785668
[278]	training's auc: 0.837553	valid_0's auc: 0.785709
[279]	training's auc: 0.837556	valid_0's auc: 0.785697
[280]	training's auc: 0.837555	valid_0's auc: 0.785695
[281]	training's auc: 0.837556	valid_0's auc: 0.785705
[282]	training's auc: 0.837569	valid_0's auc: 0.785732
[283]	training's auc: 0.837567	valid_0's auc: 0.785703
[284]	training's auc: 0.837567	valid_0's auc: 0.785734
[285]	training's auc: 0.837573	valid_0's auc: 0.785714
[286]	training's auc: 0.837576	valid_0's auc: 0.785763
[287]	training's auc: 0.837573	valid_0's auc: 0.785767
[288]	training's auc: 0.837578	valid_0's auc: 0.785796
[289]	training's auc: 0.83758	valid_0's auc: 0.785838
[290]	training's auc: 0.837586	valid_0's auc: 0.785819
[291]	training's auc: 0.837586	valid_0's auc: 0.785844
[292]	train

[428]	training's auc: 0.837645	valid_0's auc: 0.786506
[429]	training's auc: 0.837645	valid_0's auc: 0.786469
[430]	training's auc: 0.837645	valid_0's auc: 0.786566
[431]	training's auc: 0.837646	valid_0's auc: 0.786634
[432]	training's auc: 0.837644	valid_0's auc: 0.786648
[433]	training's auc: 0.837646	valid_0's auc: 0.786619
[434]	training's auc: 0.837646	valid_0's auc: 0.7866
[435]	training's auc: 0.837646	valid_0's auc: 0.786672
[436]	training's auc: 0.837646	valid_0's auc: 0.786662
[437]	training's auc: 0.837647	valid_0's auc: 0.786637
[438]	training's auc: 0.837647	valid_0's auc: 0.7866
[439]	training's auc: 0.837647	valid_0's auc: 0.786571
[440]	training's auc: 0.837646	valid_0's auc: 0.78659
[441]	training's auc: 0.837647	valid_0's auc: 0.786617
[442]	training's auc: 0.837646	valid_0's auc: 0.78661
[443]	training's auc: 0.837647	valid_0's auc: 0.786629
[444]	training's auc: 0.837647	valid_0's auc: 0.786652
[445]	training's auc: 0.837646	valid_0's auc: 0.786524
[446]	training's

[578]	training's auc: 0.837656	valid_0's auc: 0.786964
[579]	training's auc: 0.837657	valid_0's auc: 0.786993
[580]	training's auc: 0.837657	valid_0's auc: 0.786962
[581]	training's auc: 0.837657	valid_0's auc: 0.786898
[582]	training's auc: 0.837657	valid_0's auc: 0.786943
[583]	training's auc: 0.837657	valid_0's auc: 0.78695
[584]	training's auc: 0.837657	valid_0's auc: 0.7869
[585]	training's auc: 0.837657	valid_0's auc: 0.78689
[586]	training's auc: 0.837657	valid_0's auc: 0.786983
[587]	training's auc: 0.837657	valid_0's auc: 0.786972
[588]	training's auc: 0.837657	valid_0's auc: 0.78702
[589]	training's auc: 0.837657	valid_0's auc: 0.787022
[590]	training's auc: 0.837657	valid_0's auc: 0.786962
[591]	training's auc: 0.837657	valid_0's auc: 0.786954
[592]	training's auc: 0.837657	valid_0's auc: 0.786952
[593]	training's auc: 0.837657	valid_0's auc: 0.786991
[594]	training's auc: 0.837657	valid_0's auc: 0.78709
[595]	training's auc: 0.837657	valid_0's auc: 0.787045
[596]	training's

[738]	training's auc: 0.837671	valid_0's auc: 0.787243
[739]	training's auc: 0.837671	valid_0's auc: 0.787221
[740]	training's auc: 0.837671	valid_0's auc: 0.787206
[741]	training's auc: 0.837671	valid_0's auc: 0.787299
[742]	training's auc: 0.837671	valid_0's auc: 0.787183
[743]	training's auc: 0.837671	valid_0's auc: 0.78726
[744]	training's auc: 0.837672	valid_0's auc: 0.787216
[745]	training's auc: 0.837671	valid_0's auc: 0.787185
[746]	training's auc: 0.83767	valid_0's auc: 0.787268
[747]	training's auc: 0.83767	valid_0's auc: 0.787243
[748]	training's auc: 0.837673	valid_0's auc: 0.787252
[749]	training's auc: 0.837675	valid_0's auc: 0.787246
[750]	training's auc: 0.837673	valid_0's auc: 0.787208
[751]	training's auc: 0.837673	valid_0's auc: 0.787306
[752]	training's auc: 0.837674	valid_0's auc: 0.787248
[753]	training's auc: 0.837674	valid_0's auc: 0.787177
[754]	training's auc: 0.837674	valid_0's auc: 0.787237
[755]	training's auc: 0.837674	valid_0's auc: 0.787154
[756]	trainin

[891]	training's auc: 0.837681	valid_0's auc: 0.787589
[892]	training's auc: 0.837681	valid_0's auc: 0.787607
[893]	training's auc: 0.837681	valid_0's auc: 0.787525
[894]	training's auc: 0.837682	valid_0's auc: 0.787521
[895]	training's auc: 0.837681	valid_0's auc: 0.787469
[896]	training's auc: 0.837682	valid_0's auc: 0.78743
[897]	training's auc: 0.837682	valid_0's auc: 0.787473
[898]	training's auc: 0.837682	valid_0's auc: 0.787448
[899]	training's auc: 0.837682	valid_0's auc: 0.787421
[900]	training's auc: 0.83768	valid_0's auc: 0.787399
[901]	training's auc: 0.83768	valid_0's auc: 0.787504
[902]	training's auc: 0.83768	valid_0's auc: 0.787512
[903]	training's auc: 0.837681	valid_0's auc: 0.787577
[904]	training's auc: 0.837681	valid_0's auc: 0.787661
[905]	training's auc: 0.837681	valid_0's auc: 0.787686
[906]	training's auc: 0.837681	valid_0's auc: 0.787612
[907]	training's auc: 0.837682	valid_0's auc: 0.787672
[908]	training's auc: 0.837682	valid_0's auc: 0.787641
[909]	training

[68]	training's auc: 0.815688	valid_0's auc: 0.770277
[69]	training's auc: 0.815774	valid_0's auc: 0.770499
[70]	training's auc: 0.815828	valid_0's auc: 0.770567
[71]	training's auc: 0.815922	valid_0's auc: 0.770594
[72]	training's auc: 0.816027	valid_0's auc: 0.770625
[73]	training's auc: 0.816108	valid_0's auc: 0.770523
[74]	training's auc: 0.816168	valid_0's auc: 0.770557
[75]	training's auc: 0.816181	valid_0's auc: 0.770557
[76]	training's auc: 0.816203	valid_0's auc: 0.770428
[77]	training's auc: 0.816281	valid_0's auc: 0.770333
[78]	training's auc: 0.816334	valid_0's auc: 0.77024
[79]	training's auc: 0.816366	valid_0's auc: 0.770223
[80]	training's auc: 0.816449	valid_0's auc: 0.77019
[81]	training's auc: 0.816482	valid_0's auc: 0.770354
[82]	training's auc: 0.81654	valid_0's auc: 0.770372
[83]	training's auc: 0.816593	valid_0's auc: 0.770387
[84]	training's auc: 0.81662	valid_0's auc: 0.770399
[85]	training's auc: 0.816698	valid_0's auc: 0.770445
[86]	training's auc: 0.816725	va

[64]	training's auc: 0.838215	valid_0's auc: 0.793039
[65]	training's auc: 0.83826	valid_0's auc: 0.792824
[66]	training's auc: 0.838272	valid_0's auc: 0.792903
[67]	training's auc: 0.83837	valid_0's auc: 0.792617
[68]	training's auc: 0.838367	valid_0's auc: 0.792905
[69]	training's auc: 0.838461	valid_0's auc: 0.792797
[70]	training's auc: 0.838516	valid_0's auc: 0.792603
[71]	training's auc: 0.83858	valid_0's auc: 0.792559
[72]	training's auc: 0.838656	valid_0's auc: 0.792586
[73]	training's auc: 0.838692	valid_0's auc: 0.792661
[74]	training's auc: 0.838792	valid_0's auc: 0.792298
[75]	training's auc: 0.83881	valid_0's auc: 0.792396
[76]	training's auc: 0.838866	valid_0's auc: 0.792344
[77]	training's auc: 0.838917	valid_0's auc: 0.792013
[78]	training's auc: 0.838958	valid_0's auc: 0.791833
[79]	training's auc: 0.838949	valid_0's auc: 0.79199
[80]	training's auc: 0.838993	valid_0's auc: 0.792032
[81]	training's auc: 0.839058	valid_0's auc: 0.792167
[82]	training's auc: 0.839068	val

[42]	training's auc: 0.829737	valid_0's auc: 0.769376
[43]	training's auc: 0.82978	valid_0's auc: 0.769463
[44]	training's auc: 0.829859	valid_0's auc: 0.769656
[45]	training's auc: 0.829904	valid_0's auc: 0.769481
[46]	training's auc: 0.82997	valid_0's auc: 0.769386
[47]	training's auc: 0.830047	valid_0's auc: 0.769346
[48]	training's auc: 0.830178	valid_0's auc: 0.769544
[49]	training's auc: 0.830293	valid_0's auc: 0.769515
[50]	training's auc: 0.83031	valid_0's auc: 0.769422
[51]	training's auc: 0.830353	valid_0's auc: 0.769508
[52]	training's auc: 0.830394	valid_0's auc: 0.769582
[53]	training's auc: 0.830415	valid_0's auc: 0.769511
[54]	training's auc: 0.830655	valid_0's auc: 0.769689
[55]	training's auc: 0.830813	valid_0's auc: 0.769708
[56]	training's auc: 0.830891	valid_0's auc: 0.769496
[57]	training's auc: 0.830984	valid_0's auc: 0.769532
[58]	training's auc: 0.830977	valid_0's auc: 0.769542
[59]	training's auc: 0.831125	valid_0's auc: 0.769389
[60]	training's auc: 0.831174	v

[23]	training's auc: 0.76123	valid_0's auc: 0.733446
[24]	training's auc: 0.761348	valid_0's auc: 0.733082
[25]	training's auc: 0.761437	valid_0's auc: 0.733188
[26]	training's auc: 0.761538	valid_0's auc: 0.7333
[27]	training's auc: 0.76163	valid_0's auc: 0.733061
[28]	training's auc: 0.761756	valid_0's auc: 0.733364
[29]	training's auc: 0.761828	valid_0's auc: 0.733397
[30]	training's auc: 0.761856	valid_0's auc: 0.733204
[31]	training's auc: 0.761848	valid_0's auc: 0.733241
[32]	training's auc: 0.762019	valid_0's auc: 0.733123
[33]	training's auc: 0.762084	valid_0's auc: 0.733119
[34]	training's auc: 0.762145	valid_0's auc: 0.733124
[35]	training's auc: 0.762216	valid_0's auc: 0.732867
[36]	training's auc: 0.762313	valid_0's auc: 0.732907
[37]	training's auc: 0.762406	valid_0's auc: 0.732978
[38]	training's auc: 0.762401	valid_0's auc: 0.733066
[39]	training's auc: 0.762508	valid_0's auc: 0.733089
[40]	training's auc: 0.762595	valid_0's auc: 0.733066
[41]	training's auc: 0.762669	va

In [None]:
print(use_cols)

In [None]:
print(useless_cols)

In [55]:
lgb_train = lgb.Dataset(X_train[train_cols].values, y_train) 

lgb_eval= lgb.Dataset(X_valid[train_cols].values, y_valid, reference=lgb_train)  

print('Start training...')

# train

lgb_val_0 = lgb.train(params,
                      lgb_train,
                      num_boost_round=10000,
                      valid_sets=[lgb_eval, lgb_train],
                      early_stopping_rounds=100,
                      verbose_eval=10)

Start training...
Training until validation scores don't improve for 100 rounds
[10]	training's auc: 0.979093	valid_0's auc: 0.934074
[20]	training's auc: 0.99165	valid_0's auc: 0.94308
[30]	training's auc: 0.997122	valid_0's auc: 0.947472
[40]	training's auc: 0.99889	valid_0's auc: 0.948817
[50]	training's auc: 0.999451	valid_0's auc: 0.94998
[60]	training's auc: 0.999703	valid_0's auc: 0.950674
[70]	training's auc: 0.999819	valid_0's auc: 0.950188
[80]	training's auc: 0.999896	valid_0's auc: 0.949726
[90]	training's auc: 0.999942	valid_0's auc: 0.949097
[100]	training's auc: 0.999967	valid_0's auc: 0.949828
[110]	training's auc: 0.999975	valid_0's auc: 0.949701
[120]	training's auc: 0.99999	valid_0's auc: 0.94942
[130]	training's auc: 0.999998	valid_0's auc: 0.949205
[140]	training's auc: 1	valid_0's auc: 0.949046
[150]	training's auc: 1	valid_0's auc: 0.949377
[160]	training's auc: 1	valid_0's auc: 0.949639
Early stopping, best iteration is:
[64]	training's auc: 0.999746	valid_0's a

In [None]:
lgb_train_all = lgb.Dataset(df_train[train_cols].values, df_train['label'])   

print('Start training...')

# train

lgb_model = lgb.train(params,
                      lgb_train_all,
                      num_boost_round=lgb_val_0.best_iteration + 20)

Start training...


In [None]:
df_train['prob'] = lgb_model.predict(df_train[train_cols])
df_train['pred'] = np.where(df_train['prob'] > 0.5, 1, 0)

f1 = np.round(f1_score(df_train['label'], df_train['pred']), 4)
auc = roc_auc_score(df_train['label'], df_train['prob'])

print('f1: ', f1)
print('auc: ', auc)

In [None]:
df_test['label'] = np.where(lgb_model.predict(df_test[train_cols]) > 0.5, 1, 0)
df_test[['phone_no_m', 'label']].to_csv('../sub/sub_{}_{}.csv'.format(time.strptime('%Y%m%d', f1)), index=False)