This repository has been archived by the owner on Jun 22, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 171
/
neptune.yaml
130 lines (115 loc) · 3.01 KB
/
neptune.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
project: ORGANIZATION/home-credit
name: home-credit-default-risk
tags: [solution-4, dev]
metric:
channel: 'ROC_AUC'
goal: maximize
exclude:
- output
- notebooks
- neptune.log
- offline_job.log
- .git
- .github
- .idea
- .ipynb_checkpoints
parameters:
# Data
train_filepath: YOUR/PATH/TO/application_train.csv
test_filepath: YOUR/PATH/TO/application_test.csv
bureau_balance_filepath: YOUR/PATH/TO/bureau_balance.csv
bureau_filepath: YOUR/PATH/TO/bureau.csv
credit_card_balance_filepath: YOUR/PATH/TO/credit_card_balance.csv
installments_payments_filepath: YOUR/PATH/TO/installments_payments.csv
POS_CASH_balance_filepath: YOUR/PATH/TO/POS_CASH_balance.csv
previous_application_filepath: YOUR/PATH/TO/previous_application.csv
sample_submission_filepath: YOUR/PATH/TO/sample_submission.csv
experiment_directory: YOUR/PATH/WORKDIR
# Kaggle
kaggle_api: 0
kaggle_message: 'solution-4'
# Data preparation
n_cv_splits: 5
validation_size: 0.2
stratified_cv: True
shuffle: 1
# Execution
clean_experiment_directory_before_training: 1
num_workers: 1
verbose: 1
# Preprocessing
fill_missing: True
fill_value: 0
# Feature Extraction
installments__last_k_trend_periods: '[10, 50, 100, 500]'
installments__last_k_agg_periods: '[1, 5, 10, 50, 100, 500]'
application_aggregation__use_diffs_only: True
use_nan_count: True
# Light GBM
lgbm_random_search_runs: 0
lgbm__device: cpu # gpu cpu
lgbm__boosting_type: gbdt
lgbm__objective: binary
lgbm__metric: auc
lgbm__number_boosting_rounds: 5000
lgbm__early_stopping_rounds: 100
lgbm__learning_rate: 0.1
lgbm__max_bin: 300
lgbm__max_depth: -1
lgbm__num_leaves: 35
lgbm__min_child_samples: 50
lgbm__subsample: 1.0
lgbm__subsample_freq: 1
lgbm__colsample_bytree: 0.2
lgbm__min_gain_to_split: 0.5
lgbm__reg_lambda: 100.0
lgbm__reg_alpha: 0.0
lgbm__scale_pos_weight: 1
# XGBoost
xgb_random_search_runs: 0
xgb__booster: gbtree
xgb__tree_method: hist # gpu_hist # auto hist
xgb__objective: binary:logistic
xgb__eval_metric: auc
xgb__nrounds: 10000
xgb__early_stopping_rounds: 100
xgb__eta: 0.001
xgb__max_leaves: 40
xgb__max_depth: 16
xgb__max_bin: 255
xgb__subsample: 0.5
xgb__colsample_bytree: 0.5
xgb__colsample_bylevel: 1
xgb__min_child_weight: 4
xgb__lambda: 0.001
xgb__alpha: 0.001
xgb__scale_pos_weight: 1
# Random forest
rf_random_search_runs: 0
rf__n_estimators: 500
rf__criterion: gini
rf__max_features: 0.2
rf__min_samples_split: 10
rf__min_samples_leaf: 5
rf__class_weight: 1
# Logistic regression
lr_random_search_runs: 0
lr__penalty: l1
lr__tol: 0.00001
lr__C: 1
lr__fit_intercept: 1
lr__class_weight: 1
lr__solver: liblinear
lr__max_iter: 10000
# SVC
svc_random_search_runs: 0
svc__kernel: rbf
svc__C: 1
svc__degree: 5
svc__gamma: auto
svc__coef0: 0.0
svc__probability: True
svc__tol: 0.00001
svc__max_iter: -1
# Postprocessing
aggregation_method: rank_mean