2
2
from __future__ import absolute_import
3
3
4
4
import datetime
5
- import os
6
5
import re
7
6
from uuid import uuid4
8
7
9
8
from croniter import croniter
10
- from django .conf import settings
11
9
import gridfs
12
10
from mongoengine .fields import GridFSProxy
11
+ from nbconvert .preprocessors .execute import ExecutePreprocessor
13
12
from nbformat import read as nbread , write as nbwrite
14
- from nbformat .v4 .nbbase import nbformat
15
- from runipy .notebook_runner import NotebookRunner
16
13
from six import StringIO , BytesIO
17
14
import yaml
18
15
16
+ from omegajobs .tasks import run_omegaml_job
19
17
from omegaml import signals
20
18
from omegaml .documents import Metadata
21
19
from omegaml .store import OmegaStore
22
- from omegaml .tasks import run_omegaml_job
23
20
from omegaml .util import settings as omega_settings
24
21
25
22
@@ -47,29 +44,42 @@ def _fs(self):
47
44
return self .store .fs
48
45
49
46
def collection (self , name ):
47
+ if not name .endswith ('.ipynb' ):
48
+ name += '.ipynb'
50
49
return self .store .collection (name )
51
50
52
51
def drop (self , name ):
52
+ if not name .endswith ('.ipynb' ):
53
+ name += '.ipynb'
53
54
return self .store .drop (name )
54
55
55
56
def metadata (self , name ):
57
+ if not name .endswith ('.ipynb' ):
58
+ name += '.ipynb'
56
59
return self .store .metadata (name )
57
60
61
+ def exists (self , name ):
62
+ if not name .endswith ('.ipynb' ):
63
+ name += '.ipynb'
64
+ return len (self .store .list (name )) > 0
65
+
58
66
def put (self , obj , name , attributes = None ):
59
67
"""
60
68
Store a NotebookNode
61
69
62
70
:param obj: the NotebookNode to store
63
71
:param name: the name of the notebook
64
72
"""
73
+ if not name .endswith ('.ipynb' ):
74
+ name += '.ipynb'
65
75
sbuf = StringIO ()
66
76
bbuf = BytesIO ()
67
77
# nbwrite expects string, fs.put expects bytes
68
78
nbwrite (obj , sbuf , version = 4 )
69
79
sbuf .seek (0 )
70
80
bbuf .write (sbuf .getvalue ().encode ('utf8' ))
71
81
bbuf .seek (0 )
72
- # see if we have a file alredy , if so replace the gridfile
82
+ # see if we have a file already , if so replace the gridfile
73
83
meta = self .store .metadata (name )
74
84
if not meta :
75
85
filename = uuid4 ().hex
@@ -88,6 +98,8 @@ def get(self, name):
88
98
"""
89
99
Retrieve a notebook and return a NotebookNode
90
100
"""
101
+ if not name .endswith ('.ipynb' ):
102
+ name += '.ipynb'
91
103
meta = self .store .metadata (name )
92
104
if meta :
93
105
try :
@@ -127,48 +139,12 @@ def list(self, jobfilter='.*', raw=False):
127
139
job_list = self .store .list (regexp = jobfilter , raw = raw )
128
140
return job_list
129
141
130
- def run (self , nb_file ):
131
- """
132
- run the notebook on the runtime cluster
133
- """
134
- from omegaml .tasks import run_omegaml_job
135
- result = run_omegaml_job .delay (nb_file )
136
- signals .job_run .send (sender = None , name = nb_file )
137
- return result .get ()
138
-
139
- def open_notebook (self , nb_filename ):
140
- """
141
- Reads and returns a notebook
142
- """
143
- try :
144
- # for version 3
145
- notebook = nbread (open (nb_filename ), as_version = 3 )
146
- except Exception :
147
- # for version 4
148
- notebook = nbread (open (nb_filename ), as_version = 4 )
149
- except Exception :
150
- raise ValueError (
151
- "Notebook {0} do not match any applicable versions!" .format (
152
- nb_filename ))
153
- return notebook
154
-
155
142
def get_notebook_config (self , nb_filename ):
156
143
"""
157
144
returns the omegaml script config on
158
145
the notebook's first cell
159
146
"""
160
- gfs = self .get_fs ()
161
- try :
162
- # nb_filename = 'job_'+nb_file+'.ipynb'
163
- outf = gfs .get_last_version (nb_filename )
164
- with open (nb_filename , 'wb' ) as nb_file :
165
- nb_file .write (outf .read ())
166
- except gridfs .errors .NoFile :
167
- raise gridfs .errors .NoFile (
168
- "Notebook {0} does not exist in collection '{1}'" .format (
169
- nb_filename , self .defaults .OMEGA_NOTEBOOK_COLLECTION ))
170
-
171
- notebook = self .open_notebook (nb_filename )
147
+ notebook = self .get (nb_filename )
172
148
config_cell = notebook .get ('worksheets' )[0 ].get ('cells' )[0 ]
173
149
yaml_conf = '\n ' .join (
174
150
[re .sub ('#' , '' , x , 1 ) for x in str (
@@ -188,78 +164,49 @@ def get_notebook_config(self, nb_filename):
188
164
189
165
return yaml_conf .get ("omegaml.script" )
190
166
191
- def run_notebook (self , nb_filename ):
167
+ def run (self , name ):
168
+ """
169
+ Run a job immediately
170
+
171
+ The job is run and the results are stored in the given filename
172
+
173
+ :param name: the name of the jobfile
174
+ :return: the metadata of the job
192
175
"""
193
- run the job immediately.
176
+ return self .run_notebook (name )
177
+
178
+ def run_notebook (self , name ):
179
+ """
180
+ run a given notebook immediately.
194
181
the job parameter is the name of the job script as in ipynb.
195
182
Inserts and returns the Metadata document for the job.
196
183
"""
197
- from pycloudfs import S3Helper
198
- gfs = self .get_fs ()
199
- # FIXME get the notebook from mongo store without storing locally
200
- config = self .get_notebook_config (nb_filename )
201
- # nb_filename = 'job_'+nb_file+'.ipynb'
202
- # FIXME this only works because get_notebook_config stored the file
203
- # locally
204
- notebook = self .open_notebook (nb_filename )
205
- r = NotebookRunner (notebook )
206
- r .run_notebook (skip_exceptions = True )
207
- filename , ext = os .path .splitext (nb_filename )
184
+ notebook = self .get (name )
185
+ meta_job = self .metadata (name )
208
186
ts = datetime .datetime .now ().strftime ('%s' )
209
- result_nb = 'result' + filename .lstrip ('job' ) + '_{0}.ipynb' .format (ts )
210
- nbwrite (r .nb , open (result_nb , 'w' ,), version = 3 )
211
- # store results
212
- s3file = {}
213
- fileid = None
214
- if config .get ('results-store' ) == 's3' :
215
- AWS_ACCESS_KEY_ID = os .environ .get (
216
- 'AWS_ACCESS_KEY_ID' , getattr (
217
- settings , 'AWS_ACCESS_KEY_ID' ))
218
- AWS_SECRET_ACCESS_KEY = os .environ .get (
219
- 'AWS_SECRET_ACCESS_KEY' , getattr (
220
- settings , 'AWS_SECRET_ACCESS_KEY' ))
221
- bucket = os .environ .get ('AWS_TEST_BUCKET' , 'shrebo' )
222
- path = 'ipynb_results'
223
- s3 = S3Helper (
224
- bucket = bucket ,
225
- path = path ,
226
- aws_access_key_id = AWS_ACCESS_KEY_ID ,
227
- aws_secret_access_key = AWS_SECRET_ACCESS_KEY )
228
- s3file = dict (
229
- bucket = bucket ,
230
- prefix = path ,
231
- name = result_nb )
232
- s3 .upload_file (result_nb )
233
- if config .get ('results-store' ) == 'gridfs' :
234
- with open (result_nb , 'rb' ) as fin :
235
- fileid = gfs .put (fin , filename = os .path .basename (result_nb ))
236
- os .remove (result_nb ) if os .path .isfile (result_nb ) else None
237
- # shutdown the ipython kernel
238
- r .shutdown_kernel ()
239
- # check if this job was scheduled earlier
187
+ # execute
240
188
try :
241
- metadata = Metadata .objects .get (
242
- name = nb_filename , kind = Metadata .OMEGAML_RUNNING_JOBS )
243
- metadata .gridfile = GridFSProxy (
244
- grid_id = fileid ,
245
- collection_name = self .defaults .OMEGA_NOTEBOOK_COLLECTION )
246
- metadata .attributes ['state' ] = 'EXECUTED'
247
- metadata .s3file = s3file
248
- metadata .save ()
249
- # FIXME return only at function end, same below
250
- return metadata
251
- except Metadata .DoesNotExist :
252
- attrs = {}
253
- attrs ['config' ] = config
254
- attrs ['state' ] = 'EXECUTED'
255
- return Metadata (
256
- name = nb_filename ,
257
- kind = Metadata .OMEGAML_RUNNING_JOBS ,
258
- s3file = s3file ,
259
- gridfile = GridFSProxy (
260
- grid_id = fileid ,
261
- collection_name = self .defaults .OMEGA_NOTEBOOK_COLLECTION ),
262
- attributes = attrs ).save ()
189
+ ep = ExecutePreprocessor ()
190
+ ep .preprocess (notebook , {'metadata' : {'path' : '/' }})
191
+ except Exception as e :
192
+ status = str (e )
193
+ else :
194
+ status = 'OK'
195
+ # record results
196
+ meta_results = self .put (
197
+ notebook , 'results/{name}_{ts}' .format (** locals ()))
198
+ meta_results .attributes ['source_job' ] = name
199
+ meta_results .save ()
200
+ job_results = meta_job .attributes .get ('job_results' , [])
201
+ job_results .append (meta_results .name )
202
+ meta_job .attributes ['job_results' ] = job_results
203
+ # record final job status
204
+ job_runs = meta_job .attributes .get ('job_runs' , {})
205
+ job_runs [ts ] = status
206
+ meta_job .attributes ['job_runs' ] = job_runs
207
+ meta_job .save ()
208
+
209
+ return meta_job
263
210
264
211
def schedule (self , nb_file ):
265
212
"""
@@ -279,7 +226,7 @@ def schedule(self, nb_file):
279
226
iter_next = croniter (interval , now )
280
227
run_at = iter_next .get_next (datetime .datetime )
281
228
next_run_time = iter_next .get_next (datetime .datetime )
282
- from omegaml .tasks import schedule_omegaml_job
229
+ from omegajobs .tasks import schedule_omegaml_job
283
230
kwargs = dict (
284
231
config = config ,
285
232
run_at = run_at ,
@@ -309,31 +256,3 @@ def get_status(self, job):
309
256
"""
310
257
returns list of Metadata objects for this job
311
258
"""
312
- # FIXME this should use the store.metadata
313
- return Metadata .objects .filter (name = job , kind__in = Metadata .KINDS )
314
-
315
- def get_result (self , job ):
316
- """
317
- returns the result gridfile object for the respective Metadata
318
- """
319
- fs = self .get_fs (self .defaults .OMEGA_NOTEBOOK_COLLECTION )
320
- if isinstance (job , Metadata ):
321
- return fs .get (job .gridfile .grid_id )
322
-
323
- try :
324
- metadata = Metadata .objects .order_by (
325
- '-created' ).filter (name = job ).first ()
326
- if not metadata :
327
- raise Metadata .DoesNotExist
328
- return fs .get (metadata .gridfile .grid_id )
329
- except Metadata .DoesNotExist :
330
- try :
331
- collection = self .get_collection ('metadata' )
332
- doc = collection .find_one ({'attributes.task_id' : job })
333
- metadata = Metadata .objects .get (gridfile = doc .get ('gridfile' ))
334
- if not metadata :
335
- raise Exception
336
- return fs .get (metadata .gridfile .grid_id )
337
- except Exception :
338
- raise Metadata .DoesNotExist (
339
- 'No job found related to the name or task id: {0}' .format (job ))
0 commit comments