In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from lightgbm import LGBMClassifier
import logging

In [2]:
import pickle
def save_obj(obj, name ):
    with open('obj/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def load_obj(name):
    with open('obj/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [3]:
log_fmt = "[%(asctime)s] %(levelname)s in %(module)s: %(message)s"
logging.basicConfig(format=log_fmt, level=logging.INFO)

import warnings
warnings.filterwarnings('ignore')
def extract_day(s):
    return s.apply(lambda x: int(x.split('-')[0][1:]))
def extract_hour(s):
    return s.apply(lambda x: int(x.split('-')[1][1:]))

base_path = 'data_set_0926'

In [4]:
#加载邀请数据
train = pd.read_csv(f'{base_path}/invite_info_0926.txt', sep='\t', header=None)
train.columns = ['qid', 'uid', 'dt', 'label']
logging.info("invite %s", train.shape)

test = pd.read_csv(f'{base_path}/invite_info_evaluate_1_0926.txt', sep='\t', header=None)
test.columns = ['qid', 'uid', 'dt']
logging.info("test %s", test.shape)

sub = test.copy()
sub_size = len(sub)

train['day'] = extract_day(train['dt'])
train['hour'] = extract_hour(train['dt'])
test['day'] = extract_day(test['dt'])
test['hour'] = extract_hour(test['dt'])

del train['dt'], test['dt']

[2019-12-17 00:06:56,328] INFO in <ipython-input-4-b836fd75a8ca>: invite (9489162, 4)
[2019-12-17 00:06:59,491] INFO in <ipython-input-4-b836fd75a8ca>: test (1141683, 3)


In [5]:
#加载问题
ques = pd.read_csv(f'{base_path}/question_info_0926.txt', header=None, sep='\t')
ques.columns = ['qid', 'q_dt', 'title_t1', 'title_t2', 'desc_t1', 'desc_t2', 'topic']
del ques['title_t1'], ques['title_t2'], ques['desc_t1'], ques['desc_t2']
logging.info("ques %s", ques.shape)

ques['q_day'] = extract_day(ques['q_dt'])
ques['q_hour'] = extract_hour(ques['q_dt'])
del ques['q_dt']

[2019-12-17 00:08:03,760] INFO in <ipython-input-5-5996cc9bc826>: ques (1829900, 3)


In [6]:
#加载回答
ans = pd.read_csv(f'{base_path}/answer_info_0926.txt', header=None, sep='\t')
ans.columns = ['aid', 'qid', 'uid', 'ans_dt','ans_t1','ans_t2','is_good','is_rec','is_dest','has_img',
               'has_video', 'word_count', 'reci_cheer', 'reci_uncheer', 'reci_comment', 'reci_mark', 'reci_tks',
               'reci_xxx', 'reci_no_help', 'reci_dis']
del ans['ans_t1'], ans['ans_t2']
logging.info("ans %s", ans.shape)

ans['a_day'] = extract_day(ans['ans_dt'])
ans['a_hour'] = extract_hour(ans['ans_dt'])
del ans['ans_dt']

ans = pd.merge(ans, ques, on='qid')
del ques

[2019-12-17 00:13:25,342] INFO in <ipython-input-6-ac73b4ea1d3f>: ans (4513735, 18)


In [7]:
#回答距提问的天数
ans['diff_qa_days'] = ans['a_day'] - ans['q_day']

#时间窗口划分
# train
# val
train_start = 3838
train_end = 3867

val_start = 3868
val_end = 3874

#这里标示训练集起始
label_end = 3867
label_start = label_end - 6

train_label_feature_end = label_end - 7
train_label_feature_start = train_label_feature_end - 22

train_ans_feature_end = label_end - 7
train_ans_feature_start = train_ans_feature_end - 50

val_label_feature_end = val_start - 1
val_label_feature_start = val_label_feature_end - 22

val_ans_feature_end = val_start - 1
val_ans_feature_start = val_ans_feature_end - 50

train_label_feature = train[(train['day'] >= train_label_feature_start) & (train['day'] <= train_label_feature_end)]
logging.info("train_label_feature %s", train_label_feature.shape)

val_label_feature = train[(train['day'] >= val_label_feature_start) & (train['day'] <= val_label_feature_end)]
logging.info("val_label_feature %s", val_label_feature.shape)

train_label = train[(train['day'] > train_label_feature_end)]

logging.info("train feature start %s end %s, label start %s end %s", train_label_feature['day'].min(),
             train_label_feature['day'].max(), train_label['day'].min(), train_label['day'].max())

logging.info("test feature start %s end %s, label start %s end %s", val_label_feature['day'].min(),
             val_label_feature['day'].max(), test['day'].min(), test['day'].max())

#确定ans的时间范围
# 3807~3874
train_ans_feature = ans[(ans['a_day'] >= train_ans_feature_start) & (ans['a_day'] <= train_ans_feature_end)]

val_ans_feature = ans[(ans['a_day'] >= val_ans_feature_start) & (ans['a_day'] <= val_ans_feature_end)]

logging.info("train ans feature %s, start %s end %s", train_ans_feature.shape, train_ans_feature['a_day'].min(),
             train_ans_feature['a_day'].max())

logging.info("val ans feature %s, start %s end %s", val_ans_feature.shape, val_ans_feature['a_day'].min(),
             val_ans_feature['a_day'].max())

fea_cols = ['is_good', 'is_rec', 'is_dest', 'has_img', 'has_video', 'word_count',
            'reci_cheer', 'reci_uncheer', 'reci_comment', 'reci_mark', 'reci_tks',
            'reci_xxx', 'reci_no_help', 'reci_dis', 'diff_qa_days']

[2019-12-17 00:19:32,778] INFO in <ipython-input-7-c0b3e11102a1>: train_label_feature (6895493, 5)
[2019-12-17 00:19:33,830] INFO in <ipython-input-7-c0b3e11102a1>: val_label_feature (7583553, 5)
[2019-12-17 00:19:34,363] INFO in <ipython-input-7-c0b3e11102a1>: train feature start 3838 end 3860, label start 3861 end 3867
[2019-12-17 00:19:34,410] INFO in <ipython-input-7-c0b3e11102a1>: test feature start 3845 end 3867, label start 3868 end 3874
[2019-12-17 00:19:41,167] INFO in <ipython-input-7-c0b3e11102a1>: train ans feature (3700178, 23), start 3810 end 3860
[2019-12-17 00:19:41,207] INFO in <ipython-input-7-c0b3e11102a1>: val ans feature (3992334, 23), start 3817 end 3867


In [9]:
print(ans.columns)

Index(['aid', 'qid', 'uid', 'is_good', 'is_rec', 'is_dest', 'has_img',
       'has_video', 'word_count', 'reci_cheer', 'reci_uncheer', 'reci_comment',
       'reci_mark', 'reci_tks', 'reci_xxx', 'reci_no_help', 'reci_dis',
       'a_day', 'a_hour', 'topic', 'q_day', 'q_hour', 'diff_qa_days'],
      dtype='object')


In [41]:
def to_array(df,x):
    col=df.iloc[:,x]
    #取表中的第x+1列的所有值
    arr =list(col.values)
    return arr

In [45]:
uid_list = to_array(ans,2)

In [16]:
qid_list = to_array(ans,1)

In [17]:
topic_list = to_array(ans,19)

In [20]:
len(topic_list)

4513735

In [21]:
topic_list

['T381,T8211,T3144,T4936,T823',
 'T381,T8211,T3144,T4936,T823',
 'T381,T8211,T3144,T4936,T823',
 'T381,T8211,T3144,T4936,T823',
 'T5490,T2180,T17098',
 'T5490,T2180,T17098',
 'T5490,T2180,T17098',
 'T5490,T2180,T17098',
 'T5490,T2180,T17098',
 'T5490,T2180,T17098',
 'T8716,T10196,T8767',
 'T8716,T10196,T8767',
 'T8716,T10196,T8767',
 'T8716,T10196,T8767',
 'T258',
 'T258',
 'T258',
 'T258',
 'T95,T545',
 'T95,T545',
 'T95,T545',
 'T95,T545',
 'T95,T545',
 'T95,T545',
 'T95,T545',
 'T95,T545',
 'T95,T545',
 'T95,T545',
 'T95,T545',
 'T95,T545',
 'T95,T545',
 'T95,T545',
 'T95,T545',
 'T95,T545',
 'T95,T545',
 'T95,T545',
 'T95,T545',
 'T95,T545',
 'T95,T545',
 'T95,T545',
 'T1144,T545,T258,T95',
 'T1144,T545,T258,T95',
 'T1144,T545,T258,T95',
 'T1144,T545,T258,T95',
 'T1144,T545,T258,T95',
 'T1144,T545,T258,T95',
 'T1144,T545,T258,T95',
 'T1144,T545,T258,T95',
 'T106,T95,T1515',
 'T106,T95,T1515',
 'T106,T95,T1515',
 'T248,T591,T687,T11258,T12762',
 'T248,T591,T687,T11258,T12762',
 'T24

In [22]:
def static(usrList,tpcList):
# 统计 用户-回答涉及话题
    member_topic = {}
    for user,topicraw in zip(usrList,tpcList): #这里用的还是dataframe
        if topicraw == '-1':
            pass
        else:
            list = topicraw.split(",")
            if user in member_topic:
                for i in list:
                    if i in member_topic[user]:
                        member_topic[user][i] += 1;
                    else:
                        member_topic[user][i] = 1;
            else:
                member_topic[user] = dict(zip(list,[1]*len(list)))
    return member_topic

In [23]:
member_topic = static(uid_list,topic_list)

In [24]:
ques = pd.read_csv(f'{base_path}/question_info_0926.txt', header=None, sep='\t')
ques.columns = ['qid', 'q_dt', 'title_t1', 'title_t2', 'desc_t1', 'desc_t2', 'topic']
del ques['title_t1'], ques['title_t2'], ques['desc_t1'], ques['desc_t2']
del ques['q_dt']

In [25]:
print(ques.columns)

Index(['qid', 'topic'], dtype='object')


In [26]:
QUESTION_list = to_array(ques,0)
TOPIC_list = to_array(ques,1)

In [28]:
len(TOPIC_list)

1829900

In [76]:
ques_topic = {}
#topic是-1的也在里面
for ques,topic in zip(QUESTION_list,TOPIC_list):
    if topic == '-1':
        List = []
    else:
        List = topic.split(",")
    if ques in ques_topic:
        print("问题重复")
        ques_topic[ques].update(List)
    else:
        ques_topic[ques] = set(List)

In [77]:
save_obj(ques_topic,"ques_topic") #ques_topic是 字典-集合结构 只包含涉及的topic
save_obj(member_topic,"member_topic")#member_topic是 字典-字典结构 保存每个topic出现次数

In [8]:
print(train_label.columns)

Index(['qid', 'uid', 'label', 'day', 'hour'], dtype='object')


In [36]:
print(train_label.iloc[0,0])

AttributeError: 'str' object has no attribute 'type'

In [46]:
train_qid_list = to_array(train_label,0)
train_uid_list = to_array(train_label,1)

In [80]:
len(UQscore)

2593669

In [78]:
def score(useridlist,quesidlist):
#     用static统计出来的member_topic给每个U-Q打分
    global member_topic
    global ques_topic
    score = []
    for userid,quesid in zip(useridlist,quesidlist):
        score_temp = 0
        if userid not in member_topic:
            score.append(0)
#             print('member %s没找到'%userid)
        elif quesid not in ques_topic:
            score.append(0)
#             print('问题 %s 没找到'%quesid)
        elif not ques_topic[quesid]:
            score.append(3)
        else:
#             print('member %s回答了问题'%userid)
            topic_set = ques_topic[quesid]
            for i in topic_set:
                if i in member_topic[userid]:
                    score_temp += member_topic[userid][i]
            score.append(score_temp)
    return score

In [79]:
UQscore = score(train_uid_list,train_qid_list)

In [68]:
train_label['UQscore'] = UQscore

In [69]:
print(test.columns)

Index(['qid', 'uid', 'day', 'hour'], dtype='object')


In [70]:
test_qid_list = to_array(test,0)
test_uid_list = to_array(test,1)

In [71]:
UQscore_test = score(test_uid_list,test_qid_list)

In [74]:
len(test_uid_list)

1141683

In [75]:
test['UQscore'] = UQscore_test

In [None]:
test_qid_list = to_array(train_label_feature,0)
test_uid_list = to_array(train_label_feature,1)

In [93]:
def UQSCORE(df,x,y):
#     一个大的计算UQSCORE的函数
#     xy 是user 和 ques column的序号
    Q_LST = to_array(df,x)
    U_LST = to_array(df,y)
    if len(df.iloc[:,x]) == len(U_LST) and len(df.iloc[:,y]) == len(Q_LST):
        print("等长不需填充")
    else:
        return
    UQscore = score(U_LST,Q_LST)
    df['UQscore'] = UQscore
    return

In [92]:
len(train_label_feature.iloc[:,1])

6895493

In [94]:
UQSCORE(train_label_feature,0,1)

等长不需填充


In [97]:
print(val_label_feature.columns)

Index(['qid', 'uid', 'label', 'day', 'hour', 'UQscore'], dtype='object')


In [96]:
UQSCORE(val_label_feature,0,1)

等长不需填充


In [98]:
del ques_topic,member_topic

In [113]:
def extract_feature1(target, label_feature, ans_feature):
    # 问题特征
    t1 = label_feature.groupby('qid')['label'].agg(['mean', 'sum', 'std', 'count']).reset_index()
    t1.columns = ['qid', 'q_inv_mean', 'q_inv_sum', 'q_inv_std', 'q_inv_count']
    target = pd.merge(target, t1, on='qid', how='left')

    # 用户特征
    t1 = label_feature.groupby('uid')['label'].agg(['mean', 'sum', 'std', 'count']).reset_index()
    t1.columns = ['uid', 'u_inv_mean', 'u_inv_sum', 'u_inv_std', 'u_inv_count']
    target = pd.merge(target, t1, on='uid', how='left')
    #
    # train_size = len(train)
    # data = pd.concat((train, test), sort=True)

#     t1 = label_feature.groupby('qid')['UQscore'].agg(['mean', 'std', 'count']).reset_index()
#     t1.columns = ['qid', 'UQS_inv_mean', 'UQS_inv_std', 'UQS_inv_count']
#     target = pd.merge(target, t1, on='qid', how='left')
    
#     t1 = label_feature.groupby('uid')['UQscore'].agg(['mean', 'std', 'count']).reset_index()
#     t1.columns = ['uid', 'UQS_inv_mean','UQS_inv_std', 'UQS_inv_count']
#     target = pd.merge(target, t1, on='uid', how='left')
    
    
    # 回答部分特征
    t1 = ans_feature.groupby('qid')['aid'].count().reset_index()
    t1.columns = ['qid', 'q_ans_count']
    target = pd.merge(target, t1, on='qid', how='left')

    t1 = ans_feature.groupby('uid')['aid'].count().reset_index()
    t1.columns = ['uid', 'u_ans_count']
    target = pd.merge(target, t1, on='uid', how='left')

    for col in fea_cols:
        t1 = ans_feature.groupby('uid')[col].agg(['sum', 'max', 'mean']).reset_index()
        t1.columns = ['uid', f'u_{col}_sum', f'u_{col}_max', f'u_{col}_mean']
        target = pd.merge(target, t1, on='uid', how='left')

        t1 = ans_feature.groupby('qid')[col].agg(['sum', 'max', 'mean']).reset_index()
        t1.columns = ['qid', f'q_{col}_sum', f'q_{col}_max', f'q_{col}_mean']
        target = pd.merge(target, t1, on='qid', how='left')
        logging.info("extract %s", col)
    return target

In [102]:
# train_label = extract_feature1(train_label, train_label_feature, train_ans_feature)
test = extract_feature1(test, val_label_feature, val_ans_feature)

[2019-12-17 02:44:05,991] INFO in <ipython-input-101-01eadaa08d0c>: extract is_good
[2019-12-17 02:44:20,950] INFO in <ipython-input-101-01eadaa08d0c>: extract is_rec
[2019-12-17 02:44:35,987] INFO in <ipython-input-101-01eadaa08d0c>: extract is_dest
[2019-12-17 02:44:51,044] INFO in <ipython-input-101-01eadaa08d0c>: extract has_img
[2019-12-17 02:45:07,295] INFO in <ipython-input-101-01eadaa08d0c>: extract has_video
[2019-12-17 02:45:26,420] INFO in <ipython-input-101-01eadaa08d0c>: extract word_count
[2019-12-17 02:45:43,610] INFO in <ipython-input-101-01eadaa08d0c>: extract reci_cheer
[2019-12-17 02:45:59,738] INFO in <ipython-input-101-01eadaa08d0c>: extract reci_uncheer
[2019-12-17 02:46:18,948] INFO in <ipython-input-101-01eadaa08d0c>: extract reci_comment
[2019-12-17 02:46:35,847] INFO in <ipython-input-101-01eadaa08d0c>: extract reci_mark
[2019-12-17 02:46:52,977] INFO in <ipython-input-101-01eadaa08d0c>: extract reci_tks
[2019-12-17 02:47:11,241] INFO in <ipython-input-101-01e

In [103]:
# 特征提取结束
logging.info("train shape %s, test shape %s", train_label.shape, test.shape)
assert len(test) == sub_size

# 加载用户
user = pd.read_csv(f'{base_path}/member_info_0926.txt', header=None, sep='\t')
user.columns = ['uid', 'gender', 'creat_keyword', 'level', 'hot', 'reg_type', 'reg_plat', 'freq', 'uf_b1', 'uf_b2',
                'uf_b3', 'uf_b4', 'uf_b5', 'uf_c1', 'uf_c2', 'uf_c3', 'uf_c4', 'uf_c5', 'score', 'follow_topic',
                'inter_topic']
del user['follow_topic'], user['inter_topic']
logging.info("user %s", user.shape)

unq = user.nunique()
logging.info("user unq %s", unq)

for x in unq[unq == 1].index:
    del user[x]
    logging.info('del unq==1 %s', x)

t = user.dtypes
cats = [x for x in t[t == 'object'].index if x not in ['follow_topic', 'inter_topic', 'uid']]
logging.info("user cat %s", cats)

for d in cats:
    lb = LabelEncoder()
    user[d] = lb.fit_transform(user[d])
    logging.info('encode %s', d)

q_lb = LabelEncoder()
q_lb.fit(list(train_label['qid'].astype(str).values) + list(test['qid'].astype(str).values))
train_label['qid_enc'] = q_lb.transform(train_label['qid'])
test['qid_enc'] = q_lb.transform(test['qid'])

u_lb = LabelEncoder()
u_lb.fit(user['uid'])
train_label['uid_enc'] = u_lb.transform(train_label['uid'])
test['uid_enc'] = u_lb.transform(test['uid'])

# merge user
train_label = pd.merge(train_label, user, on='uid', how='left')
test = pd.merge(test, user, on='uid', how='left')
logging.info("train shape %s, test shape %s", train_label.shape, test.shape)

data = pd.concat((train_label, test), axis=0, sort=True)
del test

# count编码
count_fea = ['uid_enc', 'qid_enc', 'gender', 'freq', 'uf_c1', 'uf_c2', 'uf_c3', 'uf_c4', 'uf_c5']
for feat in count_fea:
    col_name = '{}_count'.format(feat)
    data[col_name] = data[feat].map(data[feat].value_counts().astype(int))
    data.loc[data[col_name] < 2, feat] = -1
    data[feat] += 1
    data[col_name] = data[feat].map(data[feat].value_counts().astype(int))
    data[col_name] = (data[col_name] - data[col_name].min()) / (data[col_name].max() - data[col_name].min())
    # 

# 问题被回答的次数

# 压缩数据
t = data.dtypes
for x in t[t == 'int64'].index:
    data[x] = data[x].astype('int32')

for x in t[t == 'float64'].index:
    data[x] = data[x].astype('float32')

data['wk'] = data['day'] % 7

feature_cols = [x for x in data.columns if x not in ('label', 'uid', 'qid', 'dt', 'day')]
# target编码
logging.info("feature size %s", len(feature_cols))

[2019-12-17 02:53:33,608] INFO in <ipython-input-103-567af4893ef3>: train shape (2593669, 112), test shape (1141683, 111)
[2019-12-17 02:53:47,232] INFO in <ipython-input-103-567af4893ef3>: user (1931654, 19)
[2019-12-17 02:53:51,347] INFO in <ipython-input-103-567af4893ef3>: user unq uid              1931654
gender                 3
creat_keyword          1
level                  1
hot                    1
reg_type               1
reg_plat               1
freq                   5
uf_b1                  2
uf_b2                  2
uf_b3                  2
uf_b4                  2
uf_b5                  2
uf_c1               2561
uf_c2                291
uf_c3                428
uf_c4               1556
uf_c5                  2
score                732
dtype: int64
[2019-12-17 02:53:51,406] INFO in <ipython-input-103-567af4893ef3>: del unq==1 creat_keyword
[2019-12-17 02:53:51,418] INFO in <ipython-input-103-567af4893ef3>: del unq==1 level
[2019-12-17 02:53:51,419] INFO in <ipython-input

In [104]:
X_train_all = data.iloc[:len(train_label)][feature_cols]
y_train_all = data.iloc[:len(train_label)]['label']
test = data.iloc[len(train_label):]
del data
assert len(test) == sub_size

logging.info("train shape %s, test shape %s", train_label.shape, test.shape)

fold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for index, (train_idx, val_idx) in enumerate(fold.split(X=X_train_all, y=y_train_all)):
    break

X_train, X_val, y_train, y_val = X_train_all.iloc[train_idx][feature_cols], X_train_all.iloc[val_idx][feature_cols], \
                                 y_train_all.iloc[train_idx], \
                                 y_train_all.iloc[val_idx]
del train_label, X_train_all

[2019-12-17 03:01:35,377] INFO in <ipython-input-104-bbd36cef4c67>: train shape (2593669, 127), test shape (1141683, 137)


In [112]:
model_lgb = LGBMClassifier(learning_rate=0.05,n_estimators=700, n_jobs=-1, 
                           objective='binary', seed=1000, silent=True, 
                           device="gpu",gpu_platform_id = 0,gpu_device_id = 2,
                           max_bin=63,min_child_samples=128)

model_lgb.fit(X_train, y_train,
              eval_metric=['logloss', 'auc'],
              eval_set=[(X_val, y_val)],
              early_stopping_rounds=10)

sub['label'] = model_lgb.predict_proba(test[feature_cols])[:, 1]

sub.to_csv('result781.txt', index=None, header=None, sep='\t')

save_obj(model_lgb,'model.model_lgb_781')

[1]	valid_0's auc: 0.836028	valid_0's binary_logloss: 0.425656
Training until validation scores don't improve for 10 rounds
[2]	valid_0's auc: 0.838512	valid_0's binary_logloss: 0.414124
[3]	valid_0's auc: 0.843415	valid_0's binary_logloss: 0.404366
[4]	valid_0's auc: 0.844364	valid_0's binary_logloss: 0.395628
[5]	valid_0's auc: 0.848432	valid_0's binary_logloss: 0.388112
[6]	valid_0's auc: 0.84985	valid_0's binary_logloss: 0.381268
[7]	valid_0's auc: 0.854282	valid_0's binary_logloss: 0.375064
[8]	valid_0's auc: 0.855068	valid_0's binary_logloss: 0.369524
[9]	valid_0's auc: 0.856683	valid_0's binary_logloss: 0.364332
[10]	valid_0's auc: 0.857744	valid_0's binary_logloss: 0.359668
[11]	valid_0's auc: 0.862475	valid_0's binary_logloss: 0.355177
[12]	valid_0's auc: 0.864106	valid_0's binary_logloss: 0.351076
[13]	valid_0's auc: 0.865583	valid_0's binary_logloss: 0.347246
[14]	valid_0's auc: 0.866243	valid_0's binary_logloss: 0.343804
[15]	valid_0's auc: 0.867387	valid_0's binary_logloss

[129]	valid_0's auc: 0.897445	valid_0's binary_logloss: 0.270891
[130]	valid_0's auc: 0.897519	valid_0's binary_logloss: 0.270781
[131]	valid_0's auc: 0.897594	valid_0's binary_logloss: 0.270675
[132]	valid_0's auc: 0.897696	valid_0's binary_logloss: 0.270554
[133]	valid_0's auc: 0.897766	valid_0's binary_logloss: 0.270449
[134]	valid_0's auc: 0.897833	valid_0's binary_logloss: 0.270355
[135]	valid_0's auc: 0.897883	valid_0's binary_logloss: 0.270286
[136]	valid_0's auc: 0.897933	valid_0's binary_logloss: 0.270215
[137]	valid_0's auc: 0.89799	valid_0's binary_logloss: 0.270143
[138]	valid_0's auc: 0.89803	valid_0's binary_logloss: 0.270074
[139]	valid_0's auc: 0.89811	valid_0's binary_logloss: 0.269957
[140]	valid_0's auc: 0.898172	valid_0's binary_logloss: 0.269872
[141]	valid_0's auc: 0.898217	valid_0's binary_logloss: 0.269799
[142]	valid_0's auc: 0.898263	valid_0's binary_logloss: 0.269741
[143]	valid_0's auc: 0.898324	valid_0's binary_logloss: 0.269651
[144]	valid_0's auc: 0.89840

[256]	valid_0's auc: 0.902377	valid_0's binary_logloss: 0.263868
[257]	valid_0's auc: 0.902429	valid_0's binary_logloss: 0.263799
[258]	valid_0's auc: 0.902449	valid_0's binary_logloss: 0.263769
[259]	valid_0's auc: 0.902473	valid_0's binary_logloss: 0.263742
[260]	valid_0's auc: 0.902505	valid_0's binary_logloss: 0.263695
[261]	valid_0's auc: 0.902524	valid_0's binary_logloss: 0.263667
[262]	valid_0's auc: 0.902545	valid_0's binary_logloss: 0.263636
[263]	valid_0's auc: 0.902572	valid_0's binary_logloss: 0.263604
[264]	valid_0's auc: 0.902585	valid_0's binary_logloss: 0.263587
[265]	valid_0's auc: 0.902597	valid_0's binary_logloss: 0.263568
[266]	valid_0's auc: 0.902629	valid_0's binary_logloss: 0.263523
[267]	valid_0's auc: 0.90265	valid_0's binary_logloss: 0.263495
[268]	valid_0's auc: 0.902668	valid_0's binary_logloss: 0.263468
[269]	valid_0's auc: 0.9027	valid_0's binary_logloss: 0.263422
[270]	valid_0's auc: 0.902719	valid_0's binary_logloss: 0.263387
[271]	valid_0's auc: 0.90273

[383]	valid_0's auc: 0.904499	valid_0's binary_logloss: 0.260951
[384]	valid_0's auc: 0.904505	valid_0's binary_logloss: 0.260942
[385]	valid_0's auc: 0.90452	valid_0's binary_logloss: 0.260919
[386]	valid_0's auc: 0.904527	valid_0's binary_logloss: 0.260912
[387]	valid_0's auc: 0.904531	valid_0's binary_logloss: 0.260904
[388]	valid_0's auc: 0.904543	valid_0's binary_logloss: 0.260888
[389]	valid_0's auc: 0.904553	valid_0's binary_logloss: 0.26087
[390]	valid_0's auc: 0.904561	valid_0's binary_logloss: 0.260861
[391]	valid_0's auc: 0.904568	valid_0's binary_logloss: 0.260849
[392]	valid_0's auc: 0.904573	valid_0's binary_logloss: 0.260843
[393]	valid_0's auc: 0.90458	valid_0's binary_logloss: 0.260832
[394]	valid_0's auc: 0.90461	valid_0's binary_logloss: 0.260791
[395]	valid_0's auc: 0.904623	valid_0's binary_logloss: 0.260775
[396]	valid_0's auc: 0.904629	valid_0's binary_logloss: 0.260768
[397]	valid_0's auc: 0.904634	valid_0's binary_logloss: 0.260759
[398]	valid_0's auc: 0.904641

[510]	valid_0's auc: 0.905589	valid_0's binary_logloss: 0.259452
[511]	valid_0's auc: 0.905592	valid_0's binary_logloss: 0.259448
[512]	valid_0's auc: 0.905594	valid_0's binary_logloss: 0.259447
[513]	valid_0's auc: 0.905599	valid_0's binary_logloss: 0.259441
[514]	valid_0's auc: 0.905609	valid_0's binary_logloss: 0.259427
[515]	valid_0's auc: 0.905612	valid_0's binary_logloss: 0.259424
[516]	valid_0's auc: 0.905617	valid_0's binary_logloss: 0.259418
[517]	valid_0's auc: 0.905619	valid_0's binary_logloss: 0.259415
[518]	valid_0's auc: 0.905628	valid_0's binary_logloss: 0.259402
[519]	valid_0's auc: 0.905645	valid_0's binary_logloss: 0.25938
[520]	valid_0's auc: 0.905648	valid_0's binary_logloss: 0.259376
[521]	valid_0's auc: 0.90565	valid_0's binary_logloss: 0.259373
[522]	valid_0's auc: 0.905654	valid_0's binary_logloss: 0.259367
[523]	valid_0's auc: 0.905659	valid_0's binary_logloss: 0.259359
[524]	valid_0's auc: 0.905664	valid_0's binary_logloss: 0.259352
[525]	valid_0's auc: 0.9056

[637]	valid_0's auc: 0.906311	valid_0's binary_logloss: 0.258498
[638]	valid_0's auc: 0.906316	valid_0's binary_logloss: 0.258492
[639]	valid_0's auc: 0.906318	valid_0's binary_logloss: 0.258489
[640]	valid_0's auc: 0.906321	valid_0's binary_logloss: 0.258483
[641]	valid_0's auc: 0.906334	valid_0's binary_logloss: 0.258462
[642]	valid_0's auc: 0.90634	valid_0's binary_logloss: 0.258455
[643]	valid_0's auc: 0.90634	valid_0's binary_logloss: 0.258454
[644]	valid_0's auc: 0.906358	valid_0's binary_logloss: 0.258426
[645]	valid_0's auc: 0.906362	valid_0's binary_logloss: 0.258419
[646]	valid_0's auc: 0.906369	valid_0's binary_logloss: 0.258411
[647]	valid_0's auc: 0.906375	valid_0's binary_logloss: 0.258401
[648]	valid_0's auc: 0.906381	valid_0's binary_logloss: 0.258394
[649]	valid_0's auc: 0.906383	valid_0's binary_logloss: 0.258391
[650]	valid_0's auc: 0.906384	valid_0's binary_logloss: 0.25839
[651]	valid_0's auc: 0.906397	valid_0's binary_logloss: 0.258369
[652]	valid_0's auc: 0.90639

In [None]:
model = load_obj("model.model_lgb_777")