/
exp.conf
164 lines (137 loc) · 3.83 KB
/
exp.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
basic {
no_cuda = false
transformer = cambridgeltl/SapBERT-from-PubMedBERT-fulltext
report_frequency = 100
epoch_evaluation_frequency = 5
gradient_accumulation_steps = 1
feature_proj = false
feature_size = 768
gradient_checkpointing = false
max_length = 25
use_synthetic_train = false
# Learning-Related Configs
epochs = 50
batch_size = 256
transformer_learning_rate = 5e-05
task_learning_rate = 0.0001
max_grad_norm = 1.0
# Loss Function Related Configs
loss_scale_pos = 2
loss_scale_neg = 50
loss_thresh = 0.5
loss_lambda = 0.2
#
online_kd = false
gradual_unfreezing = false
lightweight = false
}
# Configs for Candidate Generator
cg_basic = ${basic} {
hard_negatives_training = false
#max_hard_candidates = 10
#gradient_accumulation_steps = 16
#batch_size = 16
}
cg_basic_with_synthetic_data = ${cg_basic} {
use_synthetic_train = true
}
cg_sapbert_3_layers = ${cg_basic} {
transformer = /shared/nas/data/m1/tuanml/biolinking/initial_models/sapbert_3_layers
}
cg_sapbert_6_layers = ${cg_basic} {
transformer = /shared/nas/data/m1/tuanml/biolinking/initial_models/sapbert_6_layers
}
cg_sapbert_9_layers = ${cg_basic} {
transformer = /shared/nas/data/m1/tuanml/biolinking/initial_models/sapbert_9_layers
}
# Configs for Rerankers
reranker_basic = ${basic} {
batch_size = 16
topk = 20
include_context = false
gradient_accumulation_steps = 4
}
reranker_basic_with_context = ${reranker_basic} {
include_context = true
batch_size = 8
gradient_accumulation_steps = 8
}
reranker_sapbert_3_layers = ${reranker_basic} {
transformer = /shared/nas/data/m1/tuanml/biolinking/initial_models/sapbert_3_layers
}
reranker_sapbert_6_layers = ${reranker_basic} {
transformer = /shared/nas/data/m1/tuanml/biolinking/initial_models/sapbert_6_layers
}
reranker_sapbert_9_layers = ${reranker_basic} {
transformer = /shared/nas/data/m1/tuanml/biolinking/initial_models/sapbert_9_layers
}
# Configs for Candidate Generator with Online KD
cg_with_online_kd = ${cg_basic} {
online_kd = true
feature_proj = false
enable_branch_ffnns = true
feature_size = 768
gradual_unfreezing = false
epochs = 80
}
cg_with_online_kd_none = ${cg_with_online_kd} {
kd_loss_term_type = none
}
cg_with_online_kd_individual = ${cg_with_online_kd} {
kd_loss_term_type = individual
gradual_unfreezing = true
}
cg_with_online_kd_relational = ${cg_with_online_kd} {
kd_loss_term_type = relational
gradual_unfreezing = true
}
# Configs for Dummy Candidates Generator
dummy = ${basic}{
dataset = ncbi-disease
}
# Configs for Exit Predictors
ep_basic = ${basic} {
dist_threshold = 0.6
}
# Configs for lightweight models
lightweight_basic = ${cg_basic}{
lightweight = true
gradient_accumulation_steps = 1
tokenizer = cambridgeltl/SapBERT-from-PubMedBERT-fulltext
embedding_configs_fp = student_configs/embeddings.json
embedding_saved_path = /shared/nas/data/m1/tuanml/biolinking/initial_embeddings/pubmedbert/embedding.pt
task_learning_rate = 0.001
batch_size = 256
epochs = 100
cnn_type = none
feature_size = 256
}
lightweight_vdcnn = ${lightweight_basic} {
cnn_type = vdcnn
vdcnn_cnn_depth = 12
vdcnn_cnn_kernel_size = 3
vdcnn_cnn_padding = 1
vdcnn_dropout = 0.2 # Change to larger number for smaller datasets
}
lightweight_cnn_text = ${lightweight_basic} {
cnn_type = cnn_text
cnn_text_depth = 4
cnn_text_dropout = 0.25
pooling_type = max
}
lightweight_cnn_text_with_attention_pooling = ${lightweight_cnn_text} {
pooling_type = attention
}
lightweight_cnn_text_with_mean_pooling = ${lightweight_cnn_text} {
pooling_type = mean
}
pretraining_lightweight_vdcnn = ${lightweight_vdcnn} {
batch_size = 1024
epochs = 5
hard_negatives_training = false
}
pretraining_lightweight_cnn_text = ${lightweight_cnn_text} {
batch_size = 1024
epochs = 5
hard_negatives_training = false
}