-
Notifications
You must be signed in to change notification settings - Fork 4
/
audioset_sed.yaml
50 lines (42 loc) · 1.55 KB
/
audioset_sed.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
DEVICE: cpu # device used for training
MODEL:
NAME: cnn14decisionlevelmax # name of the model you are using
PRETRAINED: ''
DATASET:
NAME: audioset # dataset name
ROOT: '' # dataset root path
METRIC: mAP
SAMPLE_RATE: 32000
AUDIO_LENGTH: 5
WIN_LENGTH: 1024
HOP_LENGTH: 320
N_MELS: 64
FMIN: 50
FMAX: 14000
AUG:
MIXUP: 0.0
MIXUP_ALPHA: 10
SMOOTHING: 0.1
TIME_MASK: 96
FREQ_MASK: 24
TRAIN:
EPOCHS: 100 # number of epochs to train
EVAL_INTERVAL: 10 # interval to evaluate the model during training
BATCH_SIZE: 16 # batch size used to train
LOSS: bcelogits # loss function name (ce, bce, bcelogits, label_smooth, soft_target)
AMP: true # use Automatic Mixed Precision training or not
DDP: false
SAVE_DIR: 'output' # output folder name used for saving the trained model and logs
OPTIMIZER:
NAME: adamw
LR: 0.0001 # initial learning rate used in optimizer
WEIGHT_DECAY: 0.001 # decay rate use in optimizer
SCHEDULER:
NAME: steplr
PARAMS: [30, 0.1]
TEST:
MODE: file # inference mode (file, mic)
FILE: 'assests/test.wav' # audio file name (not use if you choose MODE=mic)
MODEL_PATH: 'checkpoints/cnn14_decisionlevelmax.pth' # trained model path
THRESHOLD: 0.2
PLOT: false