-
Notifications
You must be signed in to change notification settings - Fork 0
/
conifg.py
79 lines (63 loc) · 2.58 KB
/
conifg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#Uncomment for Yes/No Answer
data_config = {
'train_metadata_path': 'dataset/metadata/binary_train_eng_majority.csv', # CSV containing audio URLs, Questions, Answers,filenames
'val_metadata_path': 'dataset/metadata/binary_val_eng_majority.csv',
'test_metadata_path': 'dataset/metadata/binary_test_eng_majority.csv',
'output_classes_file': 'dataset/metadata/output_classes.json',
'data_dir': 'dataset/audio_files', # path to store downloaded data
'feat_dir': 'dataset/features',
'pre_trained_word_embeddings_file': 'dataset/word_embedding/cc.en.300.vec',
'audio_embedding_size': 512
}
#Uncomment for Single Word Answer
# data_config = {
# 'train_metadata_path': 'dataset/metadata/single_word_train.csv', # CSV containing audio URLs, Questions, Answers,filenames
# 'val_metadata_path': 'dataset/metadata/single_word_val.csv',
# 'test_metadata_path': 'dataset/metadata/single_word_test.csv',
# 'output_classes_file': 'dataset/metadata/output_classes.json',
# 'data_dir': 'dataset/audio_files', # path to store downloaded data
# 'feat_dir': 'dataset/features',
# 'pre_trained_word_embeddings_file': 'dataset/word_embedding/wiki-news-300d-1M.vec',
# 'audio_embedding_size': 512
# }
model_config = {
# general params
'net_type': 'aquanet',
'output_dir': '.',
# learning params
'learning_rate': 0.001,
'batch_size': 1,
'num_workers': 8,
'num_epochs': 100,
# audio network
'audio_input_size': data_config['audio_embedding_size'],
'audio_lstm_n_layers': 2,
'audio_lstm_hidden_size': 128,
'audio_bidirectional': True,
'audio_lstm_dropout': 0.2,
# NLP network
'text_input_size': 300, # pretrained embedding size from fasttext
'text_lstm_n_layers': 2,
'text_lstm_hidden_size': 128,
'text_bidirectional': True,
'text_lstm_dropout': 0.2,
# classification
'n_dense1_units': 256,
'n_dense2_units': 128,
}
if 'binary' in data_config['train_metadata_path']:
model_config['n_classes'] = 1
else:
model_config['n_classes'] = 828
model_config['audio_lstm_hidden_size'] = 512
model_config['text_lstm_hidden_size'] = 512
dense1_input = 0
if model_config['audio_bidirectional']:
dense1_input = dense1_input + 2 * model_config['audio_lstm_hidden_size']
else:
dense1_input = dense1_input + model_config['audio_lstm_hidden_size']
if model_config['text_bidirectional']:
dense1_input = dense1_input + 2 * model_config['text_lstm_hidden_size']
else:
dense1_input = dense1_input + model_config['text_lstm_hidden_size']
model_config['dense1_input'] = dense1_input