-
Notifications
You must be signed in to change notification settings - Fork 2
/
run_preprocessing.py
112 lines (91 loc) · 3.29 KB
/
run_preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import time
'''
preprocessing method ["info","org","days_test","slice"]
info: just load and show info
org: from gru4rec (last day => test set)
org_min_date: from gru4rec (last day => test set) but from a minimal date onwards
days_test: adapted from gru4rec (last N days => test set)
slice: new (create multiple train-test-combinations with a window approach
buys: load buys and safe file to prepared
'''
import sys
from pathlib import Path
import yaml
import importlib
import traceback
import os
def main( conf ):
'''
Execute experiments for the given configuration path
--------
conf: string
Configuration path. Can be a single file or a folder.
out: string
Output folder path for endless run listening for new configurations.
'''
print( 'Checking {}'.format( conf ) )
file = Path( conf )
if file.is_file():
print( 'Loading file' )
stream = open( str(file) )
c = yaml.load(stream)
stream.close()
print( 'processing config ' + conf )
try:
run_file( c )
print( 'finished config ' + conf )
except (KeyboardInterrupt, SystemExit):
print( 'manually aborted config ' + conf )
raise
except Exception:
print( 'error for config ', file )
traceback.print_exc()
exit()
print( 'File not found: ' + conf )
def run_file( conf ):
#include preprocessing
preprocessor = load_preprocessor( conf )
#load data from raw and transform
data = preprocessor.load_data( conf['data']['folder'] + conf['data']['prefix'] )
if type(data) == tuple:
extra = data[1:]
data = data[0]
data = preprocessor.filter_data( data, **conf['filter'] )
ensure_dir( conf['output']['folder'] + conf['data']['prefix'] )
#call method according to config
if conf['type'] == 'single':
preprocessor.split_data( data, conf['output']['folder'] + conf['data']['prefix'], **conf['params'] )
elif conf['type'] == 'window':
preprocessor.slice_data( data, conf['output']['folder'] + conf['data']['prefix'], **conf['params'] )
else:
if hasattr(preprocessor, conf['type']):
method_to_call = getattr(preprocessor, conf['type'])
method_to_call( data, path+file, **conf['params'] )
else:
print( 'preprocessing type not supported' )
def load_preprocessor( conf ):
'''
Load the proprocessing module
--------
conf : conf
Just the last part of the path, e.g., evaluation_last
'''
return importlib.import_module( 'preprocessing.preprocess_' + conf['preprocessor'] )
def ensure_dir(file_path):
'''
Create all directories in the file_path if non-existent.
--------
file_path : string
Path to the a file
'''
directory = os.path.dirname(file_path)
if not os.path.exists(directory):
os.makedirs(directory)
if __name__ == '__main__':
'''
Run the preprocessing configured above.
'''
if len( sys.argv ) == 2:
main( sys.argv[1] )
else:
print( 'Preprocessing configuration expected.' )