In [2]:
import pandas as pd
import numpy as np

#显示所有列
pd.set_option('display.max_columns', None)
#显示所有行
pd.set_option('display.max_rows', None)
#numpy 不以科学计数法现实
np.set_printoptions(suppress=True)
#pandas不以科学计数法现实
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [3]:
from sklearn.model_selection import KFold
import lightgbm as lgb
from sklearn import metrics

In [6]:
train = pd.read_csv('final/train_final.csv')
test = pd.read_csv('final/test_final.csv')

In [7]:
def getLabel(x):
    threshold = 0.5
    z = np.where(x>=threshold,1,0)
    return z

## baseline

In [8]:
#baseline
train_copy = train.copy()
folds = KFold(n_splits=5,shuffle=True, random_state=42)
loss_train_list = []
loss_eval_list = []
loss_test_list = []


for train_ix, eval_ix in folds.split(train_copy):
    train_df = train_copy.loc[train_ix]
    eval_df = train_copy.loc[eval_ix]
    
    clf = lgb.LGBMClassifier(boosting_type='gbdt', 
                            objective ="binary",
                            metric= 'binary_logloss',
                            ).fit(train_df.drop(columns=['loan_status']), train_df['loan_status'],
                                 eval_names=["train","val"],
                                 eval_set=[(train_df.drop(columns=['loan_status']), train_df['loan_status']),
                                            (eval_df.drop(columns=['loan_status']), eval_df['loan_status'])],
                                 eval_metric='binary_logloss',
                                 verbose=20)
    
    ytrain = clf.predict_proba(train_df.drop(columns=['loan_status']))[:,1]
    yeval = clf.predict_proba(eval_df.drop(columns=['loan_status']))[:,1]
    ytest = clf.predict_proba(test.drop(columns=['loan_status']))[:,1]
    
    
    loss_train = metrics.log_loss(train_df['loan_status'],ytrain)
    loss_eval = metrics.log_loss(eval_df['loan_status'],yeval)
    loss_test = metrics.log_loss(test['loan_status'],ytest)
    print('train logloss',loss_train, 'eval logloss',loss_eval, 'test logloss',loss_test )
    
    loss_train_list.append(loss_train)
    loss_eval_list.append(loss_eval)
    loss_test_list.append(loss_test)

    ytrainL = getLabel(ytrain)
    yevalL = getLabel(yeval)
    ytestL = getLabel(ytest)
    print("train classification report")
    print(metrics.classification_report(train_df['loan_status'],ytrainL))
    print("**********")
    print("val classification report")
    print(metrics.classification_report(eval_df['loan_status'],yevalL))
    print("**********")
    print("target classification report")
    print(metrics.classification_report(test['loan_status'],ytestL))
    print("**************************************")

[20]	train's binary_logloss: 0.204217	val's binary_logloss: 0.213604
[40]	train's binary_logloss: 0.17979	val's binary_logloss: 0.199146
[60]	train's binary_logloss: 0.168875	val's binary_logloss: 0.198759
[80]	train's binary_logloss: 0.160312	val's binary_logloss: 0.198661
[100]	train's binary_logloss: 0.153623	val's binary_logloss: 0.199181
train logloss 0.15362316367975015 eval logloss 0.19918081167439325 test logloss 0.1993030345536531
train classification report
              precision    recall  f1-score   support

           0       0.84      0.85      0.84      8149
           1       0.96      0.96      0.96     31851

    accuracy                           0.94     40000
   macro avg       0.90      0.90      0.90     40000
weighted avg       0.94      0.94      0.94     40000

**********
val classification report
              precision    recall  f1-score   support

           0       0.79      0.82      0.81      2063
           1       0.95      0.94      0.95      7937



In [9]:
print('avg train loss', np.mean(loss_train_list),'avg eval loss', 
      np.mean(loss_eval_list),'avg test loss', np.mean(loss_test_list))

avg train loss 0.1542849733180826 avg eval loss 0.19903233375734924 avg test loss 0.19976589398637914


In [10]:
#baseline 调参： 把beseline进行了调参
#并且打印出了feature的重要性

train_copy = train.copy()
folds = KFold(n_splits=5,shuffle=True, random_state=42)
loss_train_list = []
loss_eval_list = []
loss_test_list = []
key_cols = []
    
for train_ix, eval_ix in folds.split(train_copy):
    train_df = train_copy.loc[train_ix]
    eval_df = train_copy.loc[eval_ix]
    
    clf = lgb.LGBMClassifier(boosting_type='gbdt', 
                            objective ="binary",
                            metric= 'binary_logloss',
                            colsample_bytree =0.7, 
                            learning_rate = 0.02, 
                            n_estimators = 300,
                            num_leaves = 31, 
                            subsample =0.7,
                            importance_type ='gain'
                            ).fit(train_df.drop(columns=['loan_status']), train_df['loan_status'],
                                 eval_names=["train","val"],
                                 eval_set=[(train_df.drop(columns=['loan_status']), train_df['loan_status']),
                                            (eval_df.drop(columns=['loan_status']), eval_df['loan_status'])],
                                 eval_metric='binary_logloss',
                                 verbose=20)
    
    ytrain = clf.predict_proba(train_df.drop(columns=['loan_status']))[:,1]
    yeval = clf.predict_proba(eval_df.drop(columns=['loan_status']))[:,1]
    ytest = clf.predict_proba(test.drop(columns=['loan_status']))[:,1]
    
    
    loss_train = metrics.log_loss(train_df['loan_status'],ytrain)
    loss_eval = metrics.log_loss(eval_df['loan_status'],yeval)
    loss_test = metrics.log_loss(test['loan_status'],ytest)
    print('train logloss',loss_train, 'eval logloss',loss_eval, 'test logloss',loss_test )
    
    loss_train_list.append(loss_train)
    loss_eval_list.append(loss_eval)
    loss_test_list.append(loss_test)

    ytrainL = getLabel(ytrain)
    yevalL = getLabel(yeval)
    ytestL = getLabel(ytest)
    print("train classification report")
    print(metrics.classification_report(train_df['loan_status'],ytrainL))
    print("**********")
    print("val classification report")
    print(metrics.classification_report(eval_df['loan_status'],yevalL))
    print("**********")
    print("target classification report")
    print(metrics.classification_report(test['loan_status'],ytestL))
    print("**************************************")
    
    feature_importance = pd.DataFrame({'feature_name': clf.booster_.feature_name(),
                                       'importance': clf.feature_importances_})
    feature_importance.sort_index(by='importance', ascending=False, inplace=True)
    print(feature_importance[:30])
    for c in feature_importance[:30]['feature_name']:
        key_cols.append(c)

[20]	train's binary_logloss: 0.357411	val's binary_logloss: 0.36073
[40]	train's binary_logloss: 0.287821	val's binary_logloss: 0.29192
[60]	train's binary_logloss: 0.247249	val's binary_logloss: 0.252545
[80]	train's binary_logloss: 0.223145	val's binary_logloss: 0.229981
[100]	train's binary_logloss: 0.208243	val's binary_logloss: 0.216939
[120]	train's binary_logloss: 0.19877	val's binary_logloss: 0.209185
[140]	train's binary_logloss: 0.19253	val's binary_logloss: 0.204743
[160]	train's binary_logloss: 0.187994	val's binary_logloss: 0.202065
[180]	train's binary_logloss: 0.184568	val's binary_logloss: 0.20052
[200]	train's binary_logloss: 0.181662	val's binary_logloss: 0.199447
[220]	train's binary_logloss: 0.179092	val's binary_logloss: 0.198802
[240]	train's binary_logloss: 0.176879	val's binary_logloss: 0.198553
[260]	train's binary_logloss: 0.17463	val's binary_logloss: 0.19836
[280]	train's binary_logloss: 0.172598	val's binary_logloss: 0.198172
[300]	train's binary_logloss: 0



[20]	train's binary_logloss: 0.35706	val's binary_logloss: 0.363682
[40]	train's binary_logloss: 0.287565	val's binary_logloss: 0.294952
[60]	train's binary_logloss: 0.247003	val's binary_logloss: 0.255152
[80]	train's binary_logloss: 0.223051	val's binary_logloss: 0.232118
[100]	train's binary_logloss: 0.208251	val's binary_logloss: 0.218376
[120]	train's binary_logloss: 0.198762	val's binary_logloss: 0.210212
[140]	train's binary_logloss: 0.192619	val's binary_logloss: 0.205448
[160]	train's binary_logloss: 0.188139	val's binary_logloss: 0.20249
[180]	train's binary_logloss: 0.184826	val's binary_logloss: 0.200793
[200]	train's binary_logloss: 0.181936	val's binary_logloss: 0.199645
[220]	train's binary_logloss: 0.179387	val's binary_logloss: 0.198943
[240]	train's binary_logloss: 0.176947	val's binary_logloss: 0.198626
[260]	train's binary_logloss: 0.174801	val's binary_logloss: 0.19842
[280]	train's binary_logloss: 0.172747	val's binary_logloss: 0.198204
[300]	train's binary_loglos



[20]	train's binary_logloss: 0.357549	val's binary_logloss: 0.360881
[40]	train's binary_logloss: 0.287451	val's binary_logloss: 0.293632
[60]	train's binary_logloss: 0.246584	val's binary_logloss: 0.255047
[80]	train's binary_logloss: 0.222463	val's binary_logloss: 0.233006
[100]	train's binary_logloss: 0.207528	val's binary_logloss: 0.220108
[120]	train's binary_logloss: 0.198014	val's binary_logloss: 0.212371
[140]	train's binary_logloss: 0.191859	val's binary_logloss: 0.207972
[160]	train's binary_logloss: 0.187383	val's binary_logloss: 0.205325
[180]	train's binary_logloss: 0.183929	val's binary_logloss: 0.203772
[200]	train's binary_logloss: 0.181071	val's binary_logloss: 0.202927
[220]	train's binary_logloss: 0.17855	val's binary_logloss: 0.202442
[240]	train's binary_logloss: 0.176162	val's binary_logloss: 0.202209
[260]	train's binary_logloss: 0.173934	val's binary_logloss: 0.201986
[280]	train's binary_logloss: 0.171773	val's binary_logloss: 0.202077
[300]	train's binary_logl



[20]	train's binary_logloss: 0.35446	val's binary_logloss: 0.354022
[40]	train's binary_logloss: 0.28677	val's binary_logloss: 0.287059
[60]	train's binary_logloss: 0.246045	val's binary_logloss: 0.247024
[80]	train's binary_logloss: 0.222493	val's binary_logloss: 0.224442
[100]	train's binary_logloss: 0.20826	val's binary_logloss: 0.211357
[120]	train's binary_logloss: 0.199223	val's binary_logloss: 0.203654
[140]	train's binary_logloss: 0.193501	val's binary_logloss: 0.199381
[160]	train's binary_logloss: 0.189286	val's binary_logloss: 0.196532
[180]	train's binary_logloss: 0.185928	val's binary_logloss: 0.194834
[200]	train's binary_logloss: 0.183069	val's binary_logloss: 0.193817
[220]	train's binary_logloss: 0.180676	val's binary_logloss: 0.193366
[240]	train's binary_logloss: 0.178244	val's binary_logloss: 0.192899
[260]	train's binary_logloss: 0.175939	val's binary_logloss: 0.192553
[280]	train's binary_logloss: 0.173869	val's binary_logloss: 0.192421
[300]	train's binary_loglos



[20]	train's binary_logloss: 0.354933	val's binary_logloss: 0.352202
[40]	train's binary_logloss: 0.286898	val's binary_logloss: 0.286469
[60]	train's binary_logloss: 0.245884	val's binary_logloss: 0.247594
[80]	train's binary_logloss: 0.222089	val's binary_logloss: 0.225788
[100]	train's binary_logloss: 0.207726	val's binary_logloss: 0.213193
[120]	train's binary_logloss: 0.19862	val's binary_logloss: 0.205884
[140]	train's binary_logloss: 0.19283	val's binary_logloss: 0.201863
[160]	train's binary_logloss: 0.188604	val's binary_logloss: 0.199354
[180]	train's binary_logloss: 0.185251	val's binary_logloss: 0.197755
[200]	train's binary_logloss: 0.182436	val's binary_logloss: 0.196617
[220]	train's binary_logloss: 0.179994	val's binary_logloss: 0.196038
[240]	train's binary_logloss: 0.177692	val's binary_logloss: 0.195737
[260]	train's binary_logloss: 0.175595	val's binary_logloss: 0.195611
[280]	train's binary_logloss: 0.173738	val's binary_logloss: 0.195307
[300]	train's binary_loglo



In [11]:
print('avg train loss', np.mean(loss_train_list),'avg eval loss', 
      np.mean(loss_eval_list),'avg test loss', np.mean(loss_test_list))


baseline_train_avgloss = np.mean(loss_train_list)
baseline_eval_avgloss = np.mean(loss_eval_list)
baseline_test_avgloss = np.mean(loss_test_list)

avg train loss 0.17104548110422396 avg eval loss 0.19713644072532674 avg test loss 0.19770312170552207


## 计算特征 把特征重要性的前30的特征进行了组合

In [12]:
set(key_cols)

{'continuous_annual_inc',
 'continuous_delinq_2yrs',
 'continuous_dti',
 'continuous_fico_range_high',
 'continuous_fico_range_low',
 'continuous_funded_amnt',
 'continuous_funded_amnt_inv',
 'continuous_inq_last_6mths',
 'continuous_installment',
 'continuous_int_rate',
 'continuous_last_fico_range_high',
 'continuous_last_fico_range_low',
 'continuous_loan_amnt',
 'continuous_mths_since_last_delinq',
 'continuous_mths_since_last_major_derog',
 'continuous_mths_since_last_record',
 'continuous_open_acc',
 'continuous_pub_rec',
 'discrete_addr_state_11_one_hot',
 'discrete_addr_state_14_one_hot',
 'discrete_addr_state_15_one_hot',
 'discrete_addr_state_36_one_hot',
 'discrete_addr_state_37_one_hot',
 'discrete_addr_state_3_one_hot',
 'discrete_addr_state_43_one_hot',
 'discrete_addr_state_4_one_hot',
 'discrete_addr_state_9_one_hot',
 'discrete_emp_length_12_one_hot',
 'discrete_emp_length_1_one_hot',
 'discrete_emp_length_7_one_hot',
 'discrete_grade_2_one_hot',
 'discrete_home_owners

In [13]:
continoue_cols = ['continuous_annual_inc',
 'continuous_delinq_2yrs',
 'continuous_dti',
 'continuous_fico_range_high',
 'continuous_fico_range_low',
 'continuous_funded_amnt',
 'continuous_funded_amnt_inv',
 'continuous_inq_last_6mths',
 'continuous_installment',
 'continuous_int_rate',
 'continuous_last_fico_range_high',
 'continuous_last_fico_range_low',
 'continuous_loan_amnt',
 'continuous_mths_since_last_delinq',
 'continuous_mths_since_last_major_derog',
 'continuous_mths_since_last_record',
 'continuous_open_acc',
 'continuous_pub_rec',]

discrete_cols = ['discrete_addr_state_11_one_hot',
 'discrete_addr_state_14_one_hot',
 'discrete_addr_state_15_one_hot',
 'discrete_addr_state_36_one_hot',
 'discrete_addr_state_37_one_hot',
 'discrete_addr_state_3_one_hot',
 'discrete_addr_state_43_one_hot',
 'discrete_addr_state_4_one_hot',
 'discrete_addr_state_9_one_hot',
 'discrete_emp_length_12_one_hot',
 'discrete_emp_length_1_one_hot',
 'discrete_emp_length_7_one_hot',
 'discrete_grade_2_one_hot',
 'discrete_home_ownership_1_one_hot',
 'discrete_home_ownership_2_one_hot',
 'discrete_home_ownership_3_one_hot',
 'discrete_purpose_1_one_hot',
 'discrete_purpose_3_one_hot',
 'discrete_purpose_5_one_hot',
 'discrete_sub_grade_18_one_hot',
 'discrete_sub_grade_25_one_hot',
 'discrete_sub_grade_3_one_hot',
 'discrete_term_1_one_hot',
 'discrete_term_2_one_hot']

In [18]:
train_copy = train.copy()
test_copy = test.copy()
print('原始数据',train_copy.shape, test_copy.shape)

#连续特征处理
for ix_i in range(0,len(continoue_cols)-1):
    for ix_j in range(ix_i+1, len(continoue_cols)):
        i = continoue_cols[ix_i]
        j = continoue_cols[ix_j]
        train_copy['new_'+i+'*'+j] = train_copy[i] * train_copy[j]
        train_copy['new_'+i+'/'+j] = train_copy[i] / (train_copy[j]+1)
        train_copy['new_'+i+'-'+j] = train_copy[i] - train_copy[j]
        
        test_copy['new_'+i+'*'+j] = test_copy[i] * test_copy[j]
        test_copy['new_'+i+'/'+j] = test_copy[i] / (test_copy[j]+1)
        test_copy['new_'+i+'-'+j] = test_copy[i] - test_copy[j]

原始数据 (50000, 146) (50000, 146)


In [19]:
#看一下离散取值
for i in discrete_cols:
    print(set(train_copy[i]))

{0, 1}
{0, 1}
{0, 1}
{0, 1}
{0, 1}
{0, 1}
{0, 1}
{0, 1}
{0, 1}
{0, 1}
{0, 1}
{0, 1}
{0, 1}
{0, 1}
{0, 1}
{0, 1}
{0, 1}
{0, 1}
{0, 1}
{0, 1}
{0, 1}
{0, 1}
{0, 1}
{0, 1}


In [21]:
#离散变量处理。为了方便直接做了label encoding
label_dict = {'00':1,'01':2,'10':3,'11':4}
for ix_i in range(0,len(discrete_cols)-1):
    for ix_j in range(ix_i+1, len(discrete_cols)):
        i = discrete_cols[ix_i]
        j = discrete_cols[ix_j]
        str_f_train = pd.Series([str(i)+str(j) for i,j in zip(train_copy[i], train_copy[j])])
        train_copy['new_'+i+'*'+j] = str_f_train.map(label_dict)
        
        str_f_test = pd.Series([str(i)+str(j) for i,j in zip(test_copy[i], test_copy[j])])
        test_copy['new_'+i+'*'+j] = str_f_test.map(label_dict)

print('生成特征后',train_copy.shape, test_copy.shape)

生成特征后 (50000, 881) (50000, 881)


In [22]:
#生成特征后跑一下 看看结果
folds = KFold(n_splits=5,shuffle=True, random_state=42)
params = {'objective': 'binary',
         'metric':'binary_logloss'
         }

loss_train_list = []
loss_eval_list = []
loss_test_list = []
    
for train_ix, eval_ix in folds.split(train_copy):
    train_df = train_copy.loc[train_ix]
    eval_df = train_copy.loc[eval_ix]
    
    clf = lgb.LGBMClassifier(boosting_type='gbdt', 
                            objective ="binary",
                            metric= 'binary_logloss'
                            ).fit(train_df.drop(columns=['loan_status']), train_df['loan_status'],
                                 eval_names=["train","val"],
                                 eval_set=[(train_df.drop(columns=['loan_status']), train_df['loan_status']),
                                            (eval_df.drop(columns=['loan_status']), eval_df['loan_status'])],
                                 eval_metric='binary_logloss',
                                 verbose=20)
    
    ytrain = clf.predict_proba(train_df.drop(columns=['loan_status']))[:,1]
    yeval = clf.predict_proba(eval_df.drop(columns=['loan_status']))[:,1]
    ytest = clf.predict_proba(test_copy.drop(columns=['loan_status']))[:,1]
    
    
    loss_train = metrics.log_loss(train_df['loan_status'],ytrain)
    loss_eval = metrics.log_loss(eval_df['loan_status'],yeval)
    loss_test = metrics.log_loss(test_copy['loan_status'],ytest)
    print('train logloss',loss_train, 'eval logloss',loss_eval, 'test logloss',loss_test )
    
    loss_train_list.append(loss_train)
    loss_eval_list.append(loss_eval)
    loss_test_list.append(loss_test)

    ytrainL = getLabel(ytrain)
    yevalL = getLabel(yeval)
    ytestL = getLabel(ytest)
    print("train classification report")
    print(metrics.classification_report(train_df['loan_status'],ytrainL))
    print("**********")
    print("val classification report")
    print(metrics.classification_report(eval_df['loan_status'],yevalL))
    print("**********")
    print("target classification report")
    print(metrics.classification_report(test['loan_status'],ytestL))
    print("**************************************")

[20]	train's binary_logloss: 0.197769	val's binary_logloss: 0.212984
[40]	train's binary_logloss: 0.169381	val's binary_logloss: 0.199744
[60]	train's binary_logloss: 0.155281	val's binary_logloss: 0.199561
[80]	train's binary_logloss: 0.144669	val's binary_logloss: 0.20017
[100]	train's binary_logloss: 0.135981	val's binary_logloss: 0.201093
train logloss 0.13598108765305827 eval logloss 0.20109302846309723 test logloss 0.1998971445069125
train classification report
              precision    recall  f1-score   support

           0       0.86      0.87      0.86      8149
           1       0.97      0.96      0.96     31851

    accuracy                           0.94     40000
   macro avg       0.91      0.91      0.91     40000
weighted avg       0.94      0.94      0.94     40000

**********
val classification report
              precision    recall  f1-score   support

           0       0.79      0.82      0.80      2063
           1       0.95      0.94      0.95      7937



In [23]:
print('avg train loss', np.mean(loss_train_list),'avg eval loss', 
      np.mean(loss_eval_list),'avg test loss', np.mean(loss_test_list))

avg train loss 0.13670733000129073 avg eval loss 0.19967843736832624 avg test loss 0.20000226782533354


In [None]:
#生成特征，调参后的结果，并给出此时的特征重要性，验证下生成的特征是不是比之前的有用
folds = KFold(n_splits=5,shuffle=True, random_state=42)
loss_train_list = []
loss_eval_list = []
loss_test_list = []
key_cols = []
    
for train_ix, eval_ix in folds.split(train_copy):
    train_df = train_copy.loc[train_ix]
    eval_df = train_copy.loc[eval_ix]
    
    clf = lgb.LGBMClassifier(boosting_type='gbdt', 
                            objective ="binary",
                            metric= 'binary_logloss',
                            colsample_bytree =0.7, 
                            learning_rate = 0.02, 
                            n_estimators = 300,
                            num_leaves = 20, 
                            subsample = 0.65,
                            importance_type ='gain'
                            ).fit(train_df.drop(columns=['loan_status']), train_df['loan_status'],
                                 eval_names=["train","val"],
                                 eval_set=[(train_df.drop(columns=['loan_status']), train_df['loan_status']),
                                            (eval_df.drop(columns=['loan_status']), eval_df['loan_status'])],
                                 eval_metric='binary_logloss',
                                 verbose=20)
    
    ytrain = clf.predict_proba(train_df.drop(columns=['loan_status']))[:,1]
    yeval = clf.predict_proba(eval_df.drop(columns=['loan_status']))[:,1]
    ytest = clf.predict_proba(test_copy.drop(columns=['loan_status']))[:,1]
    
    
    loss_train = metrics.log_loss(train_df['loan_status'],ytrain)
    loss_eval = metrics.log_loss(eval_df['loan_status'],yeval)
    loss_test = metrics.log_loss(test['loan_status'],ytest)
    print('train logloss',loss_train, 'eval logloss',loss_eval, 'test logloss',loss_test )
    
    loss_train_list.append(loss_train)
    loss_eval_list.append(loss_eval)
    loss_test_list.append(loss_test)

    ytrainL = getLabel(ytrain)
    yevalL = getLabel(yeval)
    ytestL = getLabel(ytest)
    print("train classification report")
    print(metrics.classification_report(train_df['loan_status'],ytrainL))
    print("**********")
    print("val classification report")
    print(metrics.classification_report(eval_df['loan_status'],yevalL))
    print("**********")
    print("target classification report")
    print(metrics.classification_report(test['loan_status'],ytestL))
    print("**************************************")
    
    feature_importance = pd.DataFrame({'feature_name': clf.booster_.feature_name(),
                                       'importance': clf.feature_importances_})
    feature_importance.sort_index(by='importance', ascending=False, inplace=True)
    print(feature_importance[:30])
    for c in feature_importance[:30]['feature_name']:
        key_cols.append(c)

[20]	train's binary_logloss: 0.349334	val's binary_logloss: 0.352442
[40]	train's binary_logloss: 0.279204	val's binary_logloss: 0.283373
[60]	train's binary_logloss: 0.240793	val's binary_logloss: 0.246453
[80]	train's binary_logloss: 0.218335	val's binary_logloss: 0.2256
[100]	train's binary_logloss: 0.204663	val's binary_logloss: 0.213744
[120]	train's binary_logloss: 0.19599	val's binary_logloss: 0.206881
[140]	train's binary_logloss: 0.190136	val's binary_logloss: 0.202907
[160]	train's binary_logloss: 0.185921	val's binary_logloss: 0.200687
[180]	train's binary_logloss: 0.182574	val's binary_logloss: 0.199592
[200]	train's binary_logloss: 0.179775	val's binary_logloss: 0.1989
[220]	train's binary_logloss: 0.177254	val's binary_logloss: 0.198387
[240]	train's binary_logloss: 0.174889	val's binary_logloss: 0.198275
[260]	train's binary_logloss: 0.172839	val's binary_logloss: 0.198185
[280]	train's binary_logloss: 0.170959	val's binary_logloss: 0.198096
[300]	train's binary_logloss:



[20]	train's binary_logloss: 0.348956	val's binary_logloss: 0.355489
[40]	train's binary_logloss: 0.278844	val's binary_logloss: 0.286251
[60]	train's binary_logloss: 0.24058	val's binary_logloss: 0.248861
[80]	train's binary_logloss: 0.218201	val's binary_logloss: 0.227333
[100]	train's binary_logloss: 0.204564	val's binary_logloss: 0.214869
[120]	train's binary_logloss: 0.19589	val's binary_logloss: 0.207554
[140]	train's binary_logloss: 0.190047	val's binary_logloss: 0.203123
[160]	train's binary_logloss: 0.185872	val's binary_logloss: 0.200531
[180]	train's binary_logloss: 0.182624	val's binary_logloss: 0.199008
[200]	train's binary_logloss: 0.179868	val's binary_logloss: 0.198302
[220]	train's binary_logloss: 0.177316	val's binary_logloss: 0.197889
[240]	train's binary_logloss: 0.175049	val's binary_logloss: 0.197522
[260]	train's binary_logloss: 0.173003	val's binary_logloss: 0.197353
[280]	train's binary_logloss: 0.171155	val's binary_logloss: 0.197164
[300]	train's binary_loglo



[20]	train's binary_logloss: 0.349509	val's binary_logloss: 0.353045
[40]	train's binary_logloss: 0.278917	val's binary_logloss: 0.285577
[60]	train's binary_logloss: 0.240389	val's binary_logloss: 0.249407
[80]	train's binary_logloss: 0.217901	val's binary_logloss: 0.229077
[100]	train's binary_logloss: 0.204242	val's binary_logloss: 0.217341
[120]	train's binary_logloss: 0.195607	val's binary_logloss: 0.210605
[140]	train's binary_logloss: 0.189824	val's binary_logloss: 0.206552
[160]	train's binary_logloss: 0.185688	val's binary_logloss: 0.204259
[180]	train's binary_logloss: 0.182457	val's binary_logloss: 0.202963
[200]	train's binary_logloss: 0.179729	val's binary_logloss: 0.202127
[220]	train's binary_logloss: 0.177314	val's binary_logloss: 0.201612
[240]	train's binary_logloss: 0.17518	val's binary_logloss: 0.201293
[260]	train's binary_logloss: 0.173099	val's binary_logloss: 0.201156
[280]	train's binary_logloss: 0.171231	val's binary_logloss: 0.201077
[300]	train's binary_logl



[20]	train's binary_logloss: 0.350646	val's binary_logloss: 0.350176
[40]	train's binary_logloss: 0.280517	val's binary_logloss: 0.280547
[60]	train's binary_logloss: 0.242237	val's binary_logloss: 0.243117
[80]	train's binary_logloss: 0.219892	val's binary_logloss: 0.22178
[100]	train's binary_logloss: 0.206402	val's binary_logloss: 0.209339
[120]	train's binary_logloss: 0.197851	val's binary_logloss: 0.202034
[140]	train's binary_logloss: 0.192028	val's binary_logloss: 0.197506
[160]	train's binary_logloss: 0.187863	val's binary_logloss: 0.194945
[180]	train's binary_logloss: 0.184581	val's binary_logloss: 0.193504
[200]	train's binary_logloss: 0.181834	val's binary_logloss: 0.192575
[220]	train's binary_logloss: 0.179358	val's binary_logloss: 0.192077
[240]	train's binary_logloss: 0.177125	val's binary_logloss: 0.191775
[260]	train's binary_logloss: 0.175097	val's binary_logloss: 0.191596
[280]	train's binary_logloss: 0.173121	val's binary_logloss: 0.19158
[300]	train's binary_loglo



[20]	train's binary_logloss: 0.351115	val's binary_logloss: 0.347911
[40]	train's binary_logloss: 0.280657	val's binary_logloss: 0.28013
[60]	train's binary_logloss: 0.242141	val's binary_logloss: 0.243631
[80]	train's binary_logloss: 0.219629	val's binary_logloss: 0.222899
[100]	train's binary_logloss: 0.205947	val's binary_logloss: 0.210958
[120]	train's binary_logloss: 0.197223	val's binary_logloss: 0.204197
[140]	train's binary_logloss: 0.191391	val's binary_logloss: 0.200207


In [None]:
print('avg train loss', np.mean(loss_train_list),'avg eval loss', 
      np.mean(loss_eval_list),'avg test loss', np.mean(loss_test_list))


newfeature_train_avgloss = np.mean(loss_train_list)
newfeature_eval_avgloss = np.mean(loss_eval_list)
newfeature_test_avgloss = np.mean(loss_test_list)

## 结果对比

In [None]:
#loss对比

print('train loss 差异：',np.linalg.norm(baseline_train_avgloss - newfeature_train_avgloss ))
print('eval loss 差异：',np.linalg.norm(baseline_eval_avgloss - newfeature_eval_avgloss ))
print('test loss 差异：',np.linalg.norm(baseline_test_avgloss - newfeature_test_avgloss ))