# nbjob.jobdb
Routines for interacting with the MongDB job database

In [1]:
try:
    # When this file is run as part of the nbjob package, relative imports are allowed
    from .structs import Struct, impl, init
    from . import ipyparallel_display
except SystemError:
    # This file is a Jupyter notebook precisely to allow opening it in Jupyter
    # during development. Don't use relative imports here
    import notebook_import_hooks
    from nbjob_dev.structs import Struct, impl, init
    import nbjob_dev.ipyparallel_display

Only nbjob developers should see this message.


### Core data structures

Objects in JobDB fall into five categories
* Entries - linked list corresponding to the source of a job, plus any checkpoints
* Jobs
* Analyses
* Analysis_entries - results of analyses run. These are not exposed directly, but can be accessed through the `Analysis` class
* fs - There is a GridFS filesystem that stores arbitrary binary objects, such as dumps of model parameters

In [2]:
from pymongo import MongoClient
from bson.objectid import ObjectId
import gridfs

In [662]:
class JobDB(metaclass=Struct):
    """
    A wrapper around a MongoDB database that is used for nbjob.
    """
    def __init__(self, db=None):
        if db is not None:
            self.db = db
            self.client = db.client
        else:
            # Connect to the database on localhost
            self.client = MongoClient()
            self.db = self.client.db
        
        # Key collections
        self.entries = self.db.entries_test
        self.jobs = self.db.jobs_test
        self.analysis_entries = self.db.analysis_entries_test
        self.analyses = self.db.analyses_test
        
        self.fs = gridfs.GridFSBucket(self.db)
        
        # Entry history cache
        # Iterating over entry history is slow for jobs with a long history,
        # so we can cache history results here.
        self.entry_history_cache={}

In [705]:
class Entry(object):
    """
    'entries' are a linked list of checkpoints and snippets of executable code.
    They are the core datastructure for storing source code in the system.
    """
    
    TYPE_SRC = 'src'
    TYPE_CHECKPOINT = 'checkpoint'
    TYPE_BINARY = 'binary' # deprecated, use 'file' instead
    TYPE_FILE = 'file'
    valid_types = [TYPE_SRC, TYPE_CHECKPOINT, TYPE_BINARY, TYPE_FILE]
    
    def __init__(self, jobdb, _id=None):
        """
        Get or create an entry in the database
        
        If _id is None, creates a new root entry (of type 'checkpoint')
        Otherwise, retreives an entry from the database
        """
        self.jobdb = jobdb
        
        if _id is None:
            res = self.jobdb.entries.insert_one({
                    'type' : Entry.TYPE_CHECKPOINT,
                    'names' : [],
                    'prev' : None
                })
            self._id = res.inserted_id
        else:
            if isinstance(_id, str):
                _id = ObjectId(_id)
            self._id = _id
            
    def __eq__(self, other):
        return type(other) is type(self) and other._id == self._id and other.jobdb == self.jobdb
    
    def __hash__(self):
        return hash(type(self)) ^ hash(self._id)
    
    @property
    def prev(self):
        prev_id = self.jobdb.entries.find_one({'_id' : self._id}, {'prev' : True})['prev']
        if prev_id is None:
            return None
        else:
            return Entry(self.jobdb, prev_id)
    
    @property
    def type(self):
        res = self.jobdb.entries.find_one({'_id' : self._id}, {'type' : True})['type']
        assert (res in Entry.valid_types)
        return res
    
    @property
    def names(self):
        res = self.jobdb.entries.find_one({'_id' : self._id}, {'names' : True})['names']
        res = set(res)
        return res
    
    @property
    def src(self):
        if self.type != Entry.TYPE_SRC:
            return ""
        res = self.jobdb.entries.find_one({'_id' : self._id}, {'src' : True})['src']
        return res
    
    # All types
    def new(self, *args, **kwargs):
        arg1, *_ = args
        if isinstance(arg1, list):
            return self._new_history(*args, **kwargs)
        else:
            raise NotImplementedError()

    def _new_history(self, history):
        current = self
        for names, src in history:
            current = current._new_entry(type=Entry.TYPE_SRC, names=names, src=src)
        return current
    
    def _new_entry(self, type, names=set(), src=""):
        d = {}
        if type == Entry.TYPE_SRC:
            if not (isinstance(names, set) and isinstance(src, str)):
                raise ValueError()
            d['type'] = type
            d['names'] = list(names)
            d['src'] = src
            d['prev'] = self._id
        elif type == Entry.TYPE_CHECKPOINT:
            d['type'] = type
            if names != set():
                raise ValueError()
            d['names'] = []
            d['prev'] = self._id
        else:
            raise ValueError("Invalid type")
            
        new_id = self.jobdb.entries.insert_one(d).inserted_id
        return Entry(self.jobdb, new_id)
    
    def checkpoint(self):
        return self._new_entry(type=Entry.TYPE_CHECKPOINT)
    
    def __repr__(self):
        if self._id is None:
            return "Entry()"
        else:
            return repr(self._id).replace('ObjectId', 'Entry')
        
    @property
    def history(self):
        res = []
        current = self
        num_checkpoints = 0
        checkpoint_names = set()
        cache_result = False
        
        while current is not None:
            if current in self.jobdb.entry_history_cache:
                # Cache results of iterating history
                cached_value = self.jobdb.entry_history_cache[current]
                if num_checkpoints > 0 and cached_value and '#Omitted' in cached_value[0][1]:
                    try:
                        cached_num_checkpoints = int(cached_value[0][1].split('Omitted')[1].split()[1])
                        num_checkpoints += cached_num_checkpoints
                        cached_value[0] = (cached_value[0][0] | checkpoint_names,
                                           '#Omitted {} checkpoints'.format(num_checkpoints))
                        if not res:
                            res = cached_value
                        else:
                            res.extend(cached_value)
                        break
                    except:
                        pass # Can't use cache
                else:
                    if not res:
                        res = cached_value
                    else:
                        res.extend(cached_value)
                    break
            if current.type == Entry.TYPE_SRC:
                if num_checkpoints or checkpoint_names:
                    if num_checkpoints > 30:
                        cache_result = True
                    res.append((checkpoint_names, '#Omitted {} checkpoints'.format(num_checkpoints)))
                    num_checkpoints = 0
                    checkpoint_names = set()
                res.append((current.names, current.src))
            elif current.type == Entry.TYPE_CHECKPOINT:
                num_checkpoints += 1
            elif current.type == Entry.TYPE_BINARY:
                checkpoint_names |= current.names
            elif current.type == Entry.TYPE_FILE:
                checkpoint_names |= current.names
            current = current.prev
        
        if cache_result or len(res) > 50:
            self.jobdb.entry_history_cache[self] = res
        
        res = res[::-1]
        return res

In [653]:
class Job(metaclass=Struct):
    def __init__(self, jobdb, _id):
        if isinstance(_id, str):
            _id = ObjectId(_id)
        self.jobdb = jobdb
        self._id = _id
        
    def __eq__(self, other):
        return type(other) is type(self) and other._id == self._id and other.jobdb == self.jobdb
    
    def __hash__(self):
        return hash(type(self)) ^ hash(self._id)
    
    @property
    def entry(self):
        return Entry(self.jobdb,
                     self.jobdb.jobs.find_one({'_id': self._id}, {'entry' : True})['entry'])
    
    @property
    def info(self):
        res = self.jobdb.jobs.find_one({'_id': self._id})
        if res['entry'] is not None:
            res['entry'] = Entry(self.jobdb, res['entry'])
            
        if 'tags' not in res:
            res['tags'] = []
        return res
    
    @property
    def analyses(self):
        for a_record in self.jobdb.analyses.find({'job': self._id}, {'_id' : True}):
            yield Analysis(self.jobdb, a_record['_id'])

In [654]:
@impl(Job)
def rename(self, new_name):
    self.jobdb.jobs.update_one({'_id': self._id},
                               {'$set': {'name': new_name,
                                         }})

In [657]:
@impl(Job)
def change_tags(self, new_tags):
    self.jobdb.jobs.update_one({'_id': self._id},
                               {'$set': {'tags': new_tags,
                                     }})

In [96]:
class Analysis(metaclass=Struct):
    def __init__(self, jobdb, _id):
        if isinstance(_id, str):
            _id = ObjectId(_id)
        self.jobdb = jobdb
        self._id = _id
        
    def __eq__(self, other):
        return type(other) is type(self) and other._id == self._id and other.jobdb == self.jobdb
    
    def __hash__(self):
        return hash(type(self)) ^ hash(self._id)
       
    @property
    def job(self):
        return Job(self.jobdb,
                   self.jobdb.analyses.find_one({'_id': self._id}, {'job' : True})['job'])
    
    @property
    def info(self):
        res = self.jobdb.analyses.find_one({'_id': self._id})
        res['job'] = Job(self.jobdb, res['job'])
        return res
    
    @property
    def results(self):
        info = self.info
        
        current_id = info['analysis_entry']
        while current_id is not None:
            current = self.jobdb.analysis_entries.find_one({'_id': current_id})
            current_id = current['prev']
            del current['prev'], current['_id']
            if 'checkpoint' in current and current['checkpoint'] is not None:
                current['checkpoint'] = Entry(self.jobdb, current['checkpoint'])
            yield current

In [457]:
@impl(Analysis)
def rename(self, new_name):
    self.jobdb.analyses.update_one({'_id': self._id},
                                   {'$set': {'name': new_name,
                                         }})

### Process management

In [130]:
import os, signal, psutil

In [655]:
@impl(Job)
def check_alive(self):
    """
    Check if job is alive.
    
    If a job is discovered to be dead, it updates the job record occordingly.
    """
    info = self.info
    
    if info['stop_time'] is not None:
        return False
    else:
        res = psutil.pid_exists(self.info['pid'])
        if not res:
            self.jobdb.jobs.update_one({'_id': self._id},
                                       {'$set': {'stop_time': 'unknown',
                                                 'status': 'Dead'}})
        return res

In [132]:
@impl(Analysis)
def check_alive(self):
    """
    Check if an analyzer is alive.
    
    If a analyzer is discovered to be dead, it updates the record occordingly.
    """
    info = self.info
    
    if info['stop_time'] is not None:
        return False
    else:
        res = psutil.pid_exists(self.info['pid'])
        if not res:
            self.jobdb.analyses.update_one({'_id': self._id},
                                             {'$set': {'stop_time': 'unknown',
                                                       'status': 'Dead'}})
        return res

In [656]:
@impl(Job)
def kill(self):
    pid = self.info['pid']
    
    if not check_alive(self):
        return
    
    os.kill(pid, signal.SIGINT)

In [138]:
@impl(Analysis)
def kill(self):
    pid = self.info['pid']
    
    if not check_alive(self):
        return
    
    os.kill(pid, signal.SIGINT)

### Diffing entries

In [None]:
import difflib

In [188]:
def unified_diff(a, b):
    if not isinstance(a, Entry) or not isinstance(b, Entry):
        raise ValueError
        
    a = [x[1] for x in a.history]
    b = [x[1] for x in b.history]
    s = difflib.SequenceMatcher(a=a, b=b)

    starta = 0
    startb = 0
    matches = s.get_matching_blocks()
    non_matches = []
    while True:
        match = matches.pop()
        enda = match.a
        endb = match.b
        non_matches.append((a[starta:enda], b[startb:endb]))
        if match.size == 0:
            break
        starta = enda + match.size
        startb = endb + match.size

    for va, vb in non_matches:
        for line in difflib.unified_diff('\n'.join(va).splitlines(True), '\n'.join(vb).splitlines(True)):
            yield line

Entry.unified_diff = unified_diff

## Dashboard code

In [192]:
import ipywidgets as ipw

In [244]:
class EntrySource(ipw.FlexBox):
    def __init__(self, entry):
        super().__init__()
        self.entry = entry
        self.refresh()
        #self.children = [ipw.Button(description="Show")]
        #self.children[0].on_click(lambda *_: self.refresh())
    
    def refresh(self):
        html = "".join(["<pre>" + el + "</pre>" for (_, el) in self.entry.history])
        self.children = (ipw.HTML(html),)

#EntrySource(job.entry)

In [251]:
class EntryDiff(ipw.FlexBox):
    def __init__(self, entry_a, entry_b):
        super().__init__()
        self.entry_a = entry_a
        self.entry_b = entry_b
        self.refresh()
        #self.children = [ipw.Button(description="Show")]
        #self.children[0].on_click(lambda *_: self.refresh())
    
    def refresh(self):
        html = ""
        style = "border:none; padding:1px; margin:0px;"
        for line in Entry.unified_diff(self.entry_a, self.entry_b):
            if line.startswith('-'):
                s = '<span style="color:red">{}</span>'
            elif line.startswith('+'):
                s = '<span style="color:green">{}</span>'
            elif line.startswith('@'):
                s = '<span style="color:purple">{}</span>'
            else:
                s = '{}'
            html += ('<pre style="{}">' + s + '</pre>').format(style, line)
        self.children = (ipw.HTML(html),)

#EntryDiff(job.entry, job2.entry)

In [707]:
class JobInfo(ipw.FlexBox):
    def __init__(self, job):
        super().__init__()
        self.job = job
        self.refresh()
        
    def refresh(self):
        job_alive = self.job.check_alive() # this can modify info, so do it first
        info = self.job.info
        job_history = self.job.entry.history
        
        children = []
        
        children.append(ipw.HTML("""
            <h3> Job Information </h3>
            <p> Id: <span style="font-family:monospace">{_id}</span></p>
        """.format(**info)))
        
        self.name_field = ipw.Text(value=info['name'])
        rename_button = ipw.Button(description='Rename')
        rename_button.on_click(self.rename)
        
        children.append(ipw.HBox((
                    ipw.HTML("Name:"),
                    self.name_field,
                    rename_button,
                )))
        
        self.tags_field = ipw.Text(value=' '.join(info['tags']))
        change_tags_button = ipw.Button(description='Change')
        change_tags_button.on_click(self.change_tags)
        
        children.append(ipw.HBox((
                    ipw.HTML('Tags:'),
                    self.tags_field,
                    change_tags_button,
                )))
        
        # TODO: non-hacky way finding number of checkpoints
        s = job_history[-1][1].split('Omitted')
        if len(s) == 2:
            num_checkpoints = s[1].split()[0]
        else:
            num_checkpoints = 'unknown'
        
        children.append(ipw.HTML("""
            <h4> Status </h4>
            <p> {status} </p>
            <p> {start_time} &#8212; {stop_time} </p>
            <p> Number of checkpoints: {num_checkpoints} </p>
        """.format(num_checkpoints=num_checkpoints, **info)))
        
        
        children.append(ipw.HTML("""
            <h4> Parameters </h4>
        """))
        
        parameter_srcs = [x for x in job_history if '_ParamContainer' in x[1]]
        for names, src in parameter_srcs:
            name = list(names)[0]
            pairs = []
            for line in src.splitlines():
                if line.startswith(name + '.'):
                    line = line.split(name, 1)[1][1:]
                    var, value = line.split('=', 1)
                    pairs.append((var, value))

            children.append(ipw.HBox((
                        ipw.HTML('<p style="font-family:monospace;">{}.</pre>'.format(name)),
                        ipw.HTML(
                            "\n".join(['<p style="font-family:monospace">{}={}</p>'.format(*x)
                                       for x in sorted(pairs)])
                        )
                    )))
        
        if job_alive:
            children.append(ipw.HTML("""
                <h4> Machine info </h4>
                <p> User: {user} </p>
                <p> Hostname: {hostname} </p>
                <p> PID: {pid} </p>
            """.format(**info)))
            
            kill_button = ipw.Button(description='Kill')
            kill_button.on_click(self.kill_job)
            children.append(kill_button)
        
        
        # Display
        self.children = children
    
    def rename(self, *_):
        new_name = self.name_field.value
        self.job.rename(new_name)
        self.refresh()
        
    def change_tags(self, *_):
        new_tags = sorted(set(self.tags_field.value.split(' ')))
        self.job.change_tags(new_tags)
        self.refresh()
    
    def kill_job(self, *_):
        self.job.kill()


#JobInfo(job)

In [458]:
class AnalysisInfo(ipw.FlexBox):
    def __init__(self, analysis):
        super().__init__()
        self.analysis = analysis
        self.refresh()
        
    def refresh(self):
        analysis_alive = self.analysis.check_alive() # this can modify info, so do it first
        info = self.analysis.info
        
        results = list(self.analysis.results)
        if results:
            keys = set.union(*[set(x.keys()) for x in results])
            keys -= set(['_id', 'checkpoint', 'prev'])
        else:
            keys = set()
        keys = sorted(keys)
        
        children = []
        
        children.append(ipw.HTML("""
            <h3> Analysis Information </h3>
            <p> Id: <span style="font-family:monospace">{_id}</span></p>
            <p> Job id: <span style="font-family:monospace">{job._id}</span></p>
        """.format(**info)))
        
        self.name_field = ipw.Text(value=info['name'])
        rename_button = ipw.Button(description='Rename')
        rename_button.on_click(self.rename)
        
        children.append(ipw.HBox((
                    ipw.HTML("Name:"),
                    self.name_field,
                    rename_button,
                )))
        
        children.append(ipw.HTML("""
            <h4> Status </h4>
            <p> {status} </p>
            <p> {start_time} &#8212; {stop_time} </p>
            <p> Number of checkpoints: {num_checkpoints} </p>
        """.format(num_checkpoints=len(results), **info)))
        
        var_text = "<h4> Variables </h4>"
        for key in keys:
            types = set([type(x.get(key, None)) for x in results[-50:]])
            var_text += """<p style="font-family=monospace">{} : {}</pre>""".format(
                key, ", ".join(t.__name__ for t in types))
        
        children.append(ipw.HTML(var_text))
        
        if analysis_alive:
            children.append(ipw.HTML("""
                <h4> Machine info </h4>
                <p> User: {user} </p>
                <p> Hostname: {hostname} </p>
                <p> PID: {pid} </p>
            """.format(**info)))
            
            kill_button = ipw.Button('Kill')
            kill_button.on_click(self.kill_analysis)
            children.append(kill_button)
        
        
        # Display
        self.children = children
    
    def rename(self, *_):
        new_name = self.name_field.value
        self.analysis.rename(new_name)
        self.refresh()
    
    def kill_analysis(self, *_):
        self.analysis.kill()


#AnalysisInfo(analysis)

In [338]:
class Listings(ipw.FlexBox):
    def __init__(self, iterable, on_hide, calc_status, on_second, on_info):
        super().__init__(orientation='vertical')
        listings = []
        for item in iterable:
            listing = ipw.HBox()
            hide_button = ipw.Button(description='X', button_style='danger')
            status_line = ipw.HTML("""<p style="width:12em; overflow:hidden; font-family:monospace">{}</p>""".format(
                calc_status(item)))
            status_line.margin = "5px"
            src_button = ipw.Button(description = '2nd')
            info_button = ipw.Button(description = 'info')
            listing.children += (hide_button, status_line, src_button, info_button)
            listing.padding = "2px"

            hide_button._item = item
            hide_button._listing = listing
            hide_button.on_click(lambda x: on_hide(x._item, x._listing))
            src_button._item = item
            #src_button._listing = listing
            src_button.on_click(lambda x: on_second(x._item))
            info_button._item = item
            #info_button._listing = listing
            info_button.on_click(lambda x: on_info(x._item))
            listings.append(listing)
        
        self.children = listings

Listings(['a', 'b', 'c'],
             lambda _, x:(None, x.close()),
             lambda x: x,
             lambda x:None,
             lambda x:None)

In [438]:
class SourceTab(ipw.FlexBox):
    def __init__(self, primary, secondary=None):
        super().__init__()
        self.primary = primary
        self.secondary = secondary
        self.suggestions = []
        
        if secondary is not None:
            self.suggestions.append(secondary)
        
        self.refresh()
    
    def refresh(self):
        if self.primary is None:
            self.children = [ipw.HTML("No job selected")]
            return
        children = []
        options = [('None', None)]
        
        for suggestion in self.suggestions:
            if suggestion == self.primary:
                continue
            options.append((suggestion.info['name'], suggestion))
        
        selected_label = 'None'
        if self.secondary is not None:
            assert self.secondary in self.suggestions
            selected_label = self.secondary.info['name']
        
        self.picker = ipw.Dropdown(options=options, selected_label=selected_label)
        
        
        children.append(ipw.HTML("""
            <h3> Job Source </h3>
            <p> Id: <span style="font-family:monospace">{_id}</span></p>
            <p> Name: {name} </p>
        """.format(**self.primary.info)))
        
        children.append(ipw.HBox((
                    ipw.HTML('Compare with:'),
                    self.picker
                )))
        
        self.contents_proxy = ipw.Proxy(None)
        
        children.append(self.contents_proxy)
        self.children = children
        
        self.picker.on_trait_change(self.picker_update)
        self.refresh_contents()
        
    def picker_update(self, *_):
        if self.picker.value != self.secondary:
            self.secondary = self.picker.value
            self.refresh_contents()
    
    def refresh_contents(self):
        if self.secondary is None:
            self.contents_proxy.child = EntrySource(self.primary.entry)
        else:
            self.contents_proxy.child = EntryDiff(self.secondary.entry, self.primary.entry)
            
    def set_primary(self, val):
        if val == self.primary:
            return
        
        if self.primary and self.primary not in self.suggestions:
            self.suggestions.append(self.primary)
        self.primary = val
        self.secondary = None
        
        self.refresh()
    
    def set_secondary(self, val):
        if val == self.secondary:
            return
        
        if val not in self.suggestions:
            self.suggestions.append(val)
        
        self.secondary = val
        
        self.refresh()

SourceTab(None)
#SourceTab(job, job2)

In [450]:
class AnalysisTab(ipw.FlexBox):
    def __init__(self, job=None, analysis=None):
        super().__init__()
        self.job = job
        self.analysis = None
        
        self.refresh()
        
    def refresh(self):
        if self.job is None:
            self.children = [ipw.HTML("No job selected")]
            return
        
        panel_1 = ipw.VBox()
        panel_1.width = "50%"
        panel_2 = ipw.VBox()
        panel_2.width = "50%"
        
        self.column_1 = ipw.Proxy(None)
        self.column_2 = ipw.Proxy(None)
        
        job_info_button = ipw.Button(description='Job info')
        job_info_button.on_click(self.on_job_info_button)
        
        panel_1.children = [job_info_button, self.column_1]
        panel_2.children = [self.column_2]
        
        container = ipw.HBox([panel_1, panel_2])
        self.children = [container]
        
        self.refresh_contents()
        
    def refresh_contents(self):
        if self.job is not None:
            self.column_1.child = Listings(self.job.analyses,
                 on_hide = lambda a, w: None,
                 calc_status = lambda a: a.info['name'],
                 on_second = self.on_analysis_second,
                 on_info = self.on_analysis_info,
                )
            
        if self.analysis is not None:
            self.column_2.child = AnalysisInfo(self.analysis)
        else:
            self.column_2.child = JobInfo(self.job)
    
    def on_analysis_info(self, a):
        if a != self.analysis:
            self.analysis = a
            self.refresh_contents()
    
    def on_analysis_second(self, a):
        import __main__
        __main__.analysis = a
        self.on_analysis_info(a)
        
    def on_job_info_button(self, *_):
        self.analysis = None
        self.refresh_contents()
        
    def set_job(self, job):
        if self.job == job:
            return
        
        if self.job is None:
            self.job = job
            self.refresh()
            return
        self.job = job
        self.analysis = None
        self.refresh_contents()
        
AnalysisTab(None)

In [470]:
import re

In [601]:
def run_jobs_query(db, query):
    try:
        res = list(db.jobs.find(query))
    except Exception as e:
        return "{}".format(e)
    
    return [Job(db, x['_id']) for x in res]

In [716]:
def find_jobs(db, id='', name='', tags='', only_running=False, exclude_status=''):
    if id or name or tags or only_running or exclude_status:
        pass
    else:
        return [], "Please specify a search term"
    
    
    query = {}
    if id:
        if not isinstance(id, ObjectId):
            try:
                id = ObjectId(id)
            except:
                return [], "Invalid id"
        query['_id'] = id
        
        res = run_jobs_query(db, query)
        if isinstance(res, str):
            return [], res
        
        if res:
            msg = "Found job matching id"
        else:
            msg = "Id not found"
        return res, msg
    
    if name:
        query['name'] = re.compile(name)
        
    if tags:
        query['tags'] = {'$all': tags.split()}
        #return [], "Tags are not yet supported"
    
    if only_running:
        query['stop_time'] = None
    
    if exclude_status:
        query['status'] = {'$not': re.compile(exclude_status)}
    
    res = run_jobs_query(db, query)
    if isinstance(res, str):
        return [], res
    
    if only_running:
        res = [x for x in res if x.check_alive()]
    
    return res, "Found {} results".format(len(res))

In [649]:
class JobSearch(ipw.FlexBox):
    def __init__(self, jobdb, on_job_info=lambda j: None, on_job_second=lambda j: None):
        super().__init__()
        self.jobdb = jobdb
        self.on_job_info = on_job_info
        self.on_job_second = on_job_second
        self.refresh()
    
    def refresh(self):
        self.id_field = ipw.Text()
        self.name_field = ipw.Text()
        self.tags_field = ipw.Text()
        self.only_running_box = ipw.Checkbox()
        self.exclude_status_field = ipw.Text()
        
        self.search_button = ipw.Button(description='Search')
        self.status_area = ipw.HTML()
        self.results_area = ipw.Proxy(None)
        
        for x in [self.id_field, self.name_field, self.tags_field, self.exclude_status_field]:
            x.width = '66%'
        
        self.search_button.on_click(self.do_search)
        
        def to_html(txt):
            return ipw.HTML('<p style="width:7em; text-align:right">{}</p>'.format(txt))
        
        self.children = [
            ipw.HBox((to_html('Id:'), self.id_field)),
            ipw.HBox((to_html('Name:'), self.name_field)),
            ipw.HBox((to_html('Tags:'), self.tags_field)),
            ipw.HBox((to_html(''), self.only_running_box, ipw.HTML('&nbsp; Only running'))),
            ipw.HBox((to_html('Exclude status:'), self.exclude_status_field)),
            ipw.HTML('<hr style="width:100%"/>'),
            ipw.HBox((to_html(''), self.search_button)),
            self.status_area,
            self.results_area
        ]
    
    def do_search(self, *_):
        jobs, msg = find_jobs(self.jobdb,
                              id = self.id_field.value,
                              name = self.name_field.value,
                              tags = self.tags_field.value,
                              only_running = self.only_running_box.value,
                              exclude_status = self.exclude_status_field.value
                             )
        
        self.status_area.value = msg

        
        self.results_area.child = Listings(jobs,
                                           on_hide = lambda j, w: w.close(),
                                           calc_status = lambda j: j.info['name'],
                                           on_second = self.on_job_second,
                                           on_info = self.on_job_info,
                                          )
        
#JobSearch(jobdb)    

In [648]:
class Dashboard(ipw.FlexBox):
    def __init__(self, jobdb):
        super().__init__(orientation='horizontal')
        self.jobdb = jobdb
        
        self.refresh()
        
    def refresh(self):
        panel_1 = ipw.VBox()
        panel_1.width = "33%"
        panel_2 = ipw.VBox()
        panel_2.width = "66%"
        
        self.job_search = JobSearch(self.jobdb, on_job_second=self.set_secondary, on_job_info=self.set_primary)
        self.analysis_tab = AnalysisTab(None)
        self.source_tab = SourceTab(None)
        
        tabs = ipw.Tab([self.analysis_tab, self.source_tab])
        tabs.set_title(0, 'Analysis')
        tabs.set_title(1, 'Source')
        
        panel_1.children = [self.job_search]
        panel_2.children = [tabs]
        
        self.children = [panel_1, panel_2]
        
    def set_primary(self, job):
        self.source_tab.set_primary(job)
        self.analysis_tab.set_job(job)
    
    def set_secondary(self, job):
        self.source_tab.set_secondary(job)
#Dashboard(db)

In [651]:
def make_default_dashboard():
    jobdb = JobDB()
    return Dashboard(jobdb)

In [None]:
# The magic string below signals to the notebook importing machinery that
# the examples section below should not be imported into any other modules

#NBIMPORT_STOP

In [708]:
Dashboard(jobdb)

In [27]:
jobdb = JobDB()

In [102]:
job = Job(jobdb, '56301716006f87241d481184')

In [139]:
job2 = Job(jobdb, '563ee5ef006f872846cb6fd1')

In [144]:
import difflib

In [164]:
s = difflib.SequenceMatcher(a=_153, b=_154)

In [165]:
s.get_matching_blocks()

[Match(a=1, b=1, size=2),
 Match(a=6, b=4, size=2),
 Match(a=24, b=15, size=1),
 Match(a=26, b=18, size=1),
 Match(a=32, b=25, size=1),
 Match(a=35, b=27, size=0)]

In [166]:
_165[0]

Match(a=1, b=1, size=2)

In [167]:
_166.a

1

In [187]:
a = _153
b = _154
s = difflib.SequenceMatcher(a=a, b=b)

starta = 0
startb = 0
matches = s.get_matching_blocks()
non_matches = []
while True:
    match = matches.pop()
    enda = match.a
    endb = match.b
    non_matches.append((a[starta:enda], b[startb:endb]))
    if match.size == 0:
        break
    starta = enda + match.size
    startb = endb + match.size
    
for va, vb in non_matches:
    for line in difflib.unified_diff('\n'.join(va).splitlines(True), '\n'.join(vb).splitlines(True)):
        print(line, end='')

--- 
+++ 
@@ -2,21 +2,19 @@
 class _ParamContainer(object): pass
 hparams = _ParamContainer()
 
+hparams.batch_size = 10
+
+hparams.max_epochs = 40
+
+hparams.reverse_input = True
+
+hparams.dim_hidden = 400
+
 hparams.learning_rate = 0.097
 
-hparams.max_epochs = 20
-
-hparams.reverse_input = True
-
-hparams.dim_embeddings = 10
-
-hparams.patience = 500000
-
-hparams.dim_hidden = 50
-
-hparams.batch_size = 1
-
-hparams.frequency = 40
+hparams.dim_embeddings = 400
+
+hparams.frequency = 4000
 
 # Imports for both remote engines and locally
 from theano import *
@@ -31,11 +29,8 @@
 import stat, subprocess
 from collections import OrderedDict
 floatX = theano.config.floatX
-theano.config.optimizer = 'fast_compile'
 theano.config.mode = 'FAST_RUN'
-#theano.config.mode = 'DebugMode'
-theano.config.DebugMode.check_py = False
-theano.config.DebugMode.check_strides = False
+theano.config.optimizer = 'fast_run'
 DATA_PREFIX = os.path.expanduser('~/data')
 def make_shared(datasets, x_cast=None,

In [121]:
job.info

{'_id': ObjectId('56301716006f87241d481184'),
 'entry': Entry('56304fc3006f87241d48125c'),
 'hostname': 'tomato',
 'name': '2015-10-27T17:30:14-nikita@tomato:9245',
 'pid': 9245,
 'start_time': datetime.datetime(2015, 10, 27, 17, 30, 14, 421000),
 'status': 'Dead',
 'stop_time': 'unknown',
 'user': 'nikita'}

In [120]:
job.check_alive()

False

In [105]:
psutil.pid_exists(9245)

False

In [135]:
list(job.analyses)[-1]

<__main__.Analysis at 0x7fcb90112f28>

In [136]:
_135.check_alive()

False