In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import lightgbm as lgb

In [2]:
def auc_select(X_train, y_train, X_valid, y_valid, cols, threshold=0.52):
    """
    基于AUC的单特征筛选
    @param X_train:
    @param y_train:
    @param X_valid:
    @param y_valid:
    @param cols:
    @param threshold:
    @return:
    """
    useful_dict = dict()
    useless_dict = dict()
    params = {
        'objective': 'binary',
        'boosting': 'gbdt',
        'metric': 'auc',
        'learning_rate': 0.1,
        'num_leaves': 31,
        'lambda_l1': 0,
        'lambda_l2': 1,
        'num_threads': 23,
        'min_data_in_leaf': 20,
        'first_metric_only': True,
        'is_unbalance': True,
        'max_depth': -1,
        'seed': 2020
    }
    for i in cols:
        print(i)
        lgb_train = lgb.Dataset(X_train[[i]].values, y_train)
        lgb_valid = lgb.Dataset(X_valid[[i]].values, y_valid, reference=lgb_train)
        lgb_model = lgb.train(
            params,
            lgb_train,
            valid_sets=[lgb_valid, lgb_train],
            num_boost_round=1000,
            early_stopping_rounds=50,
            verbose_eval=500
        )
        print('*' * 10)
        print(lgb_model.best_score['valid_0']['auc'])
        if lgb_model.best_score['valid_0']['auc'] > threshold:
            useful_dict[i] = lgb_model.best_score['valid_0']['auc']
        else:
            useless_dict[i] = lgb_model.best_score['valid_0']['auc']
    useful_cols = list(useful_dict.keys())
    useless_cols = list(useless_dict.keys())
    return useful_dict, useless_dict, useful_cols, useless_cols


def correlation(df, useful_dict, threshold=0.98):
    """
    去除特征相关系数大于阈值的特征
    @param df:
    @param threshold:
    @param useful_dict:
    @return:
    """
    col_corr = set()
    corr_matrix = df.corr()
    for i in range(len(corr_matrix.columns)):
        for j in range(i):
            if abs(corr_matrix.iloc[i, j]) > threshold:
                colName_i = corr_matrix.columns[i]
                colName_j = corr_matrix.columns[j]
                if useful_dict[colName_i] >= useful_dict[colName_j]:
                    col_corr.add(colName_j)
                else:
                    col_corr.add(colName_i)
    return col_corr

In [3]:
psi_drop = ['tlsVersion_map_count', 'tlsVersion_bytesOut_max', 'tlsVersion_bytesOut_median', 'tlsVersion_bytesOut_mean', 'tlsVersion_bytesOut_std', 'tlsVersion_bytesOut_max_min', 'tlsVersion_bytesOut_quantile_25', 'tlsVersion_bytesOut_quantile_75', 'tlsVersion_bytesIn_median', 'tlsVersion_bytesIn_mean', 'tlsVersion_bytesIn_std', 'tlsVersion_bytesIn_quantile_25', 'tlsVersion_bytesIn_quantile_75', 'tlsVersion_pktsIn_median', 'tlsVersion_pktsIn_mean', 'tlsVersion_pktsIn_std', 'tlsVersion_pktsIn_quantile_75', 'tlsVersion_pktsOut_max', 'tlsVersion_pktsOut_mean', 'tlsVersion_pktsOut_sum', 'tlsVersion_pktsOut_skew', 'tlsVersion_pktsOut_std', 'tlsVersion_pktsOut_max_min', 'tlsIssuerDn_bytesIn_mean', 'tlsVersion_map_bytesOut_skew', 'tlsVersion_map_bytesOut_std', 'tlsVersion_map_bytesIn_median', 'tlsVersion_map_bytesIn_std', 'tlsVersion_map_pktsIn_skew', 'tlsVersion_map_pktsIn_std', 'tlsVersion_map_pktsIn_max_min', 'tlsVersion_map_pktsOut_max', 'tlsVersion_map_pktsOut_median', 'tlsVersion_map_pktsOut_skew']

psi_error = ['tlsVersion_pktsIn_median', 'destAddress', 'tlsVersion_map_bytesIn_std', 'tlsVersion_bytesIn_quantile_25', 'tlsVersion_map_pktsIn_std', 'tlsSni', 'tlsVersion_bytesOut_max_min', 'tlsVersion_bytesIn_median', 'tlsVersion_map_count', 'tlsVersion_pktsIn_mean', 'tlsVersion_pktsOut_max_min', 'tlsVersion_pktsIn_quantile_75', 'tlsVersion_pktsOut_std', 'tlsVersion_map_bytesOut_std', 'tlsVersion_map_bytesIn_median', 'tlsVersion_bytesIn_quantile_75', 'tlsVersion_bytesOut_quantile_25', 'tlsIssuerDn', 'tlsVersion', 'tlsIssuerDn_bytesIn_mean', 'tlsSubject', 'tlsVersion_bytesOut_std', 'tlsVersion_map_pktsOut_skew', 'tlsVersion_map_pktsIn_skew', 'tlsVersion_pktsOut_max', 'tlsVersion_pktsOut_mean', 'tlsVersion_bytesOut_max', 'tlsVersion_bytesOut_quantile_75', 'tlsVersion_pktsOut_sum', 'tlsVersion_bytesIn_mean', 'tlsVersion_map_bytesOut_skew', 'srcAddress', 'tlsVersion_map_pktsOut_max', 'tlsVersion_pktsOut_skew', 'tlsVersion_bytesIn_std', 'tlsVersion_bytesOut_mean', 'tlsVersion_pktsIn_std', 'tlsVersion_bytesOut_median', 'tlsVersion_map_pktsIn_max_min', 'destAddressPort', 'srcAddressPort', 'tlsVersion_map_pktsOut_median']

to_drop = psi_drop + psi_error

In [4]:
train = pd.read_csv('fe_train.csv')
test = pd.read_csv('fe_test_1.csv')

sub = pd.read_csv('sub.csv')

In [5]:
train.drop(to_drop, axis=1, inplace=True)
y = train['label']
train.drop('label', axis=1, inplace=True)

In [6]:
cols = train.columns.to_list()
test = test[cols]

In [7]:
X_train, X_valid, y_train, y_valid = train_test_split(train, y, test_size=0.25, random_state=2020)
    
print('y_train mean: ', y_train.mean())
print('y_valid mean: ', y_valid.mean())

used_cols = [i for i in cols if i not in ['eventId', 'label']]

useful_dict, useless_dict, useful_cols, useless_cols = auc_select(X_train, y_train, X_valid, y_valid, used_cols, threshold=0.52)
print('AUC drop features: \n', useless_cols)

train = train[useful_cols]
X_train = X_train[useful_cols]
X_valid = X_valid[useful_cols]
test = test[useful_cols]

col_corr = correlation(X_train, useful_dict, threshold=0.98)
print('Correlation drop features: \n', col_corr)

train.drop(col_corr, axis=1, inplace=True)
X_train.drop(col_corr, axis=1, inplace=True)
X_valid.drop(col_corr, axis=1, inplace=True)
test.drop(col_corr, axis=1, inplace=True)

used_cols = X_train.columns.to_list()

train_dataset = lgb.Dataset(X_train, y_train)
valid_dataset = lgb.Dataset(X_valid, y_valid, reference=train_dataset)
# all_dataset = lgb.Dataset(train, y, reference=train_dataset)

params = {
    'objective': 'binary',
    'boosting': 'gbdt',
    'metric': 'auc',
    'learning_rate': 0.1,
    'num_leaves': 31,
    'lambda_l1': 0,
    'lambda_l2': 1,
    'num_threads': 23,
    'min_data_in_leaf': 20,
    'first_metric_only': True,
    'is_unbalance': True,
    'max_depth': -1,
    'seed': 2020
}
valid_model = lgb.train(
    params,
    train_dataset,
    valid_sets=[train_dataset, valid_dataset],
    early_stopping_rounds=200,
    num_boost_round=100000,
    verbose_eval=300
)
pred = valid_model.predict(X_valid)

f1_best = 0
for i in np.arange(0.1, 1, 0.01):
    y_valid_pred = np.where(pred > i, 1, 0)
    f1 = np.round(f1_score(y_valid, y_valid_pred), 5)
    if f1 > f1_best:
        threshold = i
        f1_best = f1

print('threshold: ', threshold)
y_valid_pred = np.where(pred > threshold, 1, 0)
print('Valid F1: ', np.round(f1_score(y_valid, y_valid_pred), 5))
print('Valid mean label: ', np.mean(y_valid_pred))

# 导出特征重要性
importance = valid_model.feature_importance(importance_type='gain')
feature_name = valid_model.feature_name()

df_importance = pd.DataFrame({
    'feature_name': feature_name,
    'importance': importance
}).sort_values(by='importance', ascending=False)

df_importance['normalized_importance'] = df_importance['importance'] / df_importance['importance'].sum()
df_importance['cumulative_importance'] = np.cumsum(df_importance['normalized_importance'])
record_low_importance = df_importance[df_importance['cumulative_importance'] > 0.99]
to_drop = list(record_low_importance['feature_name'])
print('to drop: \n', to_drop)
print('len(to_drop): \n', len(to_drop))
# df_importance.to_csv('imp.csv', index=False)

print('After train.shape', train.shape)
train.drop(to_drop, axis=1, inplace=True)
X_train.drop(to_drop, axis=1, inplace=True)
X_valid.drop(to_drop, axis=1, inplace=True)
test.drop(to_drop, axis=1, inplace=True)
print('Before train.shape', train.shape)

train_dataset = lgb.Dataset(X_train, y_train)
valid_dataset = lgb.Dataset(X_valid, y_valid, reference=train_dataset)
all_dataset = lgb.Dataset(train, y, reference=train_dataset)

valid_model_2 = lgb.train(
    params,
    train_dataset,
    valid_sets=[train_dataset, valid_dataset],
    early_stopping_rounds=200,
    num_boost_round=100000,
    verbose_eval=300
)
pred_2 = valid_model_2.predict(X_valid)

f1_best = 0
for i in np.arange(0.1, 1, 0.01):
    y_valid_pred = np.where(pred_2 > i, 1, 0)
    f1 = np.round(f1_score(y_valid, y_valid_pred), 5)
    if f1 > f1_best:
        threshold = i
        f1_best = f1

print('threshold: ', threshold)
y_valid_pred = np.where(pred_2 > threshold, 1, 0)
print('Valid F1: ', np.round(f1_score(y_valid, y_valid_pred), 5))
print('Valid mean label: ', np.mean(y_valid_pred))


train_model = lgb.train(
    params,
    all_dataset,
    num_boost_round=valid_model_2.best_iteration
)
y_test_pred = np.where(train_model.predict(test) > threshold, 1, 0)

print('Test mean label: ', np.mean(y_test_pred))

sub['label'] = y_test_pred
sub.to_csv('机器不学习原子弹也不学习_eta_submission_1028.csv', index=False)

y_train mean:  0.09
y_valid mean:  0.09363636363636364
srcPort
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[4]	training's auc: 0.930248	valid_0's auc: 0.935111
Evaluated only: auc
**********
0.9351113534779094
destPort
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[118]	training's auc: 0.644376	valid_0's auc: 0.624224
Evaluated only: auc
**********
0.6242243234558043
bytesOut
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[31]	training's auc: 0.921089	valid_0's auc: 0.911637
Evaluated only: auc
**********
0.911637241822555
bytesIn
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[10]	training's auc: 0.853758	valid_0's auc: 0.857264
Evaluated only: auc
**********
0.8572644146030324
pktsIn
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[14]	trai

Early stopping, best iteration is:
[155]	training's auc: 0.90751	valid_0's auc: 0.903204
Evaluated only: auc
**********
0.9032037861156285
srcAddress_bytesOut_quantile_25
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[122]	training's auc: 0.950159	valid_0's auc: 0.929296
Evaluated only: auc
**********
0.9292958487111821
srcAddress_bytesOut_quantile_75
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[87]	training's auc: 0.924233	valid_0's auc: 0.913402
Evaluated only: auc
**********
0.9134017586740805
srcAddress_bytesIn_count
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[412]	training's auc: 0.868121	valid_0's auc: 0.889393
Evaluated only: auc
**********
0.8893926439512713
srcAddress_bytesIn_max
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[157]	training's auc: 0.860396	valid_0's auc: 0.851

Early stopping, best iteration is:
[54]	training's auc: 0.881417	valid_0's auc: 0.87352
Evaluated only: auc
**********
0.8735197826489176
srcAddress_pktsOut_quantile_75
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[153]	training's auc: 0.867168	valid_0's auc: 0.852827
Evaluated only: auc
**********
0.8528270247636113
destAddress_bytesOut_count
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[27]	training's auc: 0.877894	valid_0's auc: 0.893326
Evaluated only: auc
**********
0.8933261921687392
destAddress_bytesOut_max
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[178]	training's auc: 0.915689	valid_0's auc: 0.915864
Evaluated only: auc
**********
0.9158644866638751
destAddress_bytesOut_min
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[34]	training's auc: 0.956225	valid_0's auc: 0.960089
Ev

Early stopping, best iteration is:
[36]	training's auc: 0.902541	valid_0's auc: 0.895559
Evaluated only: auc
**********
0.8955589097389255
destAddress_pktsIn_quantile_75
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[78]	training's auc: 0.87264	valid_0's auc: 0.857052
Evaluated only: auc
**********
0.857052127255553
destAddress_pktsOut_count
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[27]	training's auc: 0.877894	valid_0's auc: 0.893326
Evaluated only: auc
**********
0.8933261921687392
destAddress_pktsOut_max
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[38]	training's auc: 0.851028	valid_0's auc: 0.854588
Evaluated only: auc
**********
0.854587646434449
destAddress_pktsOut_min
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[9]	training's auc: 0.868489	valid_0's auc: 0.871158
Evaluated 

Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[37]	training's auc: 0.746003	valid_0's auc: 0.732783
Evaluated only: auc
**********
0.732783398739909
tlsVersion_pktsOut_quantile_75
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[37]	training's auc: 0.746003	valid_0's auc: 0.732783
Evaluated only: auc
**********
0.732783398739909
tlsSubject_bytesOut_count
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	training's auc: 0.971016	valid_0's auc: 0.971879
Evaluated only: auc
**********
0.9718789377842265
tlsSubject_bytesOut_max
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[232]	training's auc: 0.98296	valid_0's auc: 0.975857
Evaluated only: auc
**********
0.9758572805796029
tlsSubject_bytesOut_min
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[7

Early stopping, best iteration is:
[53]	training's auc: 0.956706	valid_0's auc: 0.953511
Evaluated only: auc
**********
0.9535110184923703
tlsSubject_pktsOut_count
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	training's auc: 0.971016	valid_0's auc: 0.971879
Evaluated only: auc
**********
0.9718789377842265
tlsSubject_pktsOut_max
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[50]	training's auc: 0.973811	valid_0's auc: 0.966229
Evaluated only: auc
**********
0.9662285886786574
tlsSubject_pktsOut_min
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	training's auc: 0.892605	valid_0's auc: 0.894097
Evaluated only: auc
**********
0.8940968536677996
tlsSubject_pktsOut_median
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[23]	training's auc: 0.941159	valid_0's auc: 0.940028
Evaluated only: 

Early stopping, best iteration is:
[177]	training's auc: 0.989439	valid_0's auc: 0.981886
Evaluated only: auc
**********
0.9818856569709128
tlsIssuerDn_pktsIn_min
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[51]	training's auc: 0.877774	valid_0's auc: 0.87531
Evaluated only: auc
**********
0.8753098129339475
tlsIssuerDn_pktsIn_median
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[329]	training's auc: 0.973004	valid_0's auc: 0.965044
Evaluated only: auc
**********
0.9650438694724952
tlsIssuerDn_pktsIn_mean
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[172]	training's auc: 0.989875	valid_0's auc: 0.981276
Evaluated only: auc
**********
0.9812756716752198
tlsIssuerDn_pktsIn_sum
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[15]	training's auc: 0.990556	valid_0's auc: 0.987914
Evaluated onl

Early stopping, best iteration is:
[54]	training's auc: 0.943202	valid_0's auc: 0.939261
Evaluated only: auc
**********
0.9392612789825788
tlsSni_bytesIn_median
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[97]	training's auc: 0.943136	valid_0's auc: 0.940183
Evaluated only: auc
**********
0.9401828787332872
tlsSni_bytesIn_mean
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[85]	training's auc: 0.935363	valid_0's auc: 0.93208
Evaluated only: auc
**********
0.9320795395896427
tlsSni_bytesIn_sum
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[129]	training's auc: 0.928078	valid_0's auc: 0.935871
Evaluated only: auc
**********
0.935871497989113
tlsSni_bytesIn_skew
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[156]	training's auc: 0.901215	valid_0's auc: 0.910733
Evaluated only: auc
**********

Early stopping, best iteration is:
[1]	training's auc: 0.505352	valid_0's auc: 0.506018
Evaluated only: auc
**********
0.5060180541624875
tlsVersion_map_bytesOut_mean
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	training's auc: 0.505352	valid_0's auc: 0.506018
Evaluated only: auc
**********
0.5060180541624875
tlsVersion_map_bytesOut_sum
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	training's auc: 0.505352	valid_0's auc: 0.506018
Evaluated only: auc
**********
0.5060180541624875
tlsVersion_map_bytesOut_nunique
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	training's auc: 0.505352	valid_0's auc: 0.506018
Evaluated only: auc
**********
0.5060180541624875
tlsVersion_map_bytesOut_max_min
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	training's auc: 0.505352	valid_0's auc: 0.50

Early stopping, best iteration is:
[31]	training's auc: 0.921089	valid_0's auc: 0.911637
Evaluated only: auc
**********
0.911637241822555
srcAddressPort_bytesOut_min
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[31]	training's auc: 0.921089	valid_0's auc: 0.911637
Evaluated only: auc
**********
0.911637241822555
srcAddressPort_bytesOut_median
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[31]	training's auc: 0.921089	valid_0's auc: 0.911637
Evaluated only: auc
**********
0.911637241822555
srcAddressPort_bytesOut_mean
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[31]	training's auc: 0.921089	valid_0's auc: 0.911637
Evaluated only: auc
**********
0.911637241822555
srcAddressPort_bytesOut_sum
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[45]	training's auc: 0.924585	valid_0's auc: 0.915732

Early stopping, best iteration is:
[5]	training's auc: 0.796766	valid_0's auc: 0.788401
Evaluated only: auc
**********
0.78840073618915
srcAddressPort_pktsOut_median
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[5]	training's auc: 0.796766	valid_0's auc: 0.788401
Evaluated only: auc
**********
0.78840073618915
srcAddressPort_pktsOut_mean
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[5]	training's auc: 0.796766	valid_0's auc: 0.788401
Evaluated only: auc
**********
0.78840073618915
srcAddressPort_pktsOut_sum
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	training's auc: 0.801246	valid_0's auc: 0.8015
Evaluated only: auc
**********
0.8014996445647622
srcAddressPort_pktsOut_skew
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	training's auc: 0.5	valid_0's auc: 0.5
Evaluated only: auc
*

Early stopping, best iteration is:
[9]	training's auc: 0.897247	valid_0's auc: 0.897397
Evaluated only: auc
**********
0.8973972402644828
destAddressPort_pktsIn_median
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[6]	training's auc: 0.874655	valid_0's auc: 0.866191
Evaluated only: auc
**********
0.8661913897030898
destAddressPort_pktsIn_mean
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[99]	training's auc: 0.927657	valid_0's auc: 0.919214
Evaluated only: auc
**********
0.9192135630191546
destAddressPort_pktsIn_sum
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[107]	training's auc: 0.92678	valid_0's auc: 0.918223
Evaluated only: auc
**********
0.9182232133293083
destAddressPort_pktsIn_skew
Training until validation scores don't improve for 50 rounds
[500]	training's auc: 0.910042	valid_0's auc: 0.919727
Early stopping, best iteration is:

Early stopping, best iteration is:
[23]	training's auc: 0.857502	valid_0's auc: 0.838428
Evaluated only: auc
**********
0.8384275155563778
pktsIn_pktsOut_add
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[18]	training's auc: 0.855708	valid_0's auc: 0.843564
Evaluated only: auc
**********
0.8435644798473089
pktsIn_pktsOut_subtract
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[294]	training's auc: 0.878804	valid_0's auc: 0.873059
Evaluated only: auc
**********
0.8730593722916322
pktsIn_pktsOutc_multiply
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[25]	training's auc: 0.856834	valid_0's auc: 0.842748
Evaluated only: auc
**********
0.8427478552161338
bytesOut_bytesIn_ratio
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[45]	training's auc: 0.84788	valid_0's auc: 0.841905
Evaluated only: auc


Training until validation scores don't improve for 200 rounds
[300]	training's auc: 1	valid_1's auc: 0.999965
Early stopping, best iteration is:
[288]	training's auc: 1	valid_1's auc: 0.999966
Evaluated only: auc
threshold:  0.13
Valid F1:  0.9913
Valid mean label:  0.09454545454545454
to drop: 
 ['destAddress_pktsOut_nunique', 'tlsSni_bytesIn_skew', 'tlsSubject_bytesIn_skew', 'bytesIn_pktsIn_ratio', 'srcAddress_pktsOut_max', 'srcAddress_bytesIn_median', 'tlsSni_bytesIn_min', 'srcAddress_bytesIn_max', 'tlsIssuerDn_pktsIn_median', 'tlsIssuerDn_bytesOut_skew', 'srcAddress_pktsOut_min', 'tlsSni_bytesOut_mean', 'destPort', 'tlsIssuerDn_bytesOut_median', 'srcAddress_pktsIn_max', 'destAddressPort_bytesIn_skew', 'tlsIssuerDn_pktsOut_mean', 'tlsSubject_bytesIn_median', 'tlsSubject_bytesOut_min', 'tlsSni_bytesOut_skew', 'pktsOut_bytesOut_ratio', 'destAddress_pktsIn_mean', 'srcAddress_gp_destAddress_nunique_rate', 'destAddress_bytesOut_median', 'tlsSubject_pktsIn_skew', 'tlsSni_pktsIn_max_min', 