-
Notifications
You must be signed in to change notification settings - Fork 2
/
preprocess.py
37 lines (27 loc) · 1.17 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from datasets.preprocess import get_cm_protocols, get_dataset_annotation, random_split_train_dev, create_non_label_eval_json
import pathlib
import json
if __name__ == '__main__':
# TODO: MAKE THIS PARSE ARGUMENTS
print('----Start to Process Data -----')
args = {}
args['data_type'] = ['labeled','unlabeled'][0]
if args['data_type'] == 'labeled':
print('Start to process labeled data:')
pathlib.Path('processed_data').mkdir(parents=True, exist_ok=True)
LA_PRO_DIR = '../data/LA/ASVspoof2019_LA_cm_protocols'
PRO_FILES = ('ASVspoof2019.LA.cm.train.trn.txt',
'ASVspoof2019.LA.cm.dev.trl.txt',
'ASVspoof2019.LA.cm.eval.trl.txt')
SAVE_DIR = '2021_data/'
DATA_DIR = '../data/'
split_features= get_cm_protocols(pro_dir=LA_PRO_DIR,
pro_files=PRO_FILES
)
get_dataset_annotation(split_features,
data_dir=DATA_DIR,
save_dir=SAVE_DIR,
)
random_split_train_dev()
else:
print('TODO: ')