In [None]:
# default_exp deps
# hide
_FNAME='deps'

import unittest
from unittest import mock
from nbdev.export import notebook2script
import os
TESTCASE = unittest.TestCase()
_nbpath = os.path.join(_dh[0], _FNAME+'.ipynb')

In [None]:
#export 
import os
import sys
import logging
logger = logging.getLogger()
import collections
import argparse
import yaml
from dvcrecord.utils import maybe_yaml

In [None]:
#export

DVC_RECORD_STRING = '--dvc_record'
DVC_DRY_RUN = '--dvc_dryrun'
DO_NOT_INCLUDE_IN_PIPELINE = [DVC_RECORD_STRING, DVC_DRY_RUN]

def add_dvcr_commands(parser):
    parser.add_argument(DVC_RECORD_STRING, action='store_true', dest='dvc_record', 
                        help='Include this flag to record the dvc params entries')
    parser.add_argument(DVC_DRY_RUN, action='store_true', dest='dvc_dryrun', 
                        help='Include this flag to not actually write the dvc.yaml but just display the params that would be written')
    parser.add_argument('--dvc_dep', action='append', nargs='?', 
                        help='Filepaths to the dependency inputs')
    return parser

def make_parser(parser=None):
    parser = parser or argparse.ArgumentParser(description='Stock parser for dvcrecord commands')    
    parser = add_dvcr_commands(parser)
    return parser

In [None]:
def test_parser():
    parser = make_parser()
    #add custom parser code
    parser.add_argument("--athing", type=int)
    ns = parser.parse_args(['--athing=10', '--dvc_record'])
    TESTCASE.assertTrue(ns.dvc_record)
    TESTCASE.assertEqual(ns.athing, 10)
    
    ns2 = parser.parse_args([])
    TESTCASE.assertFalse(ns2.dvc_record)
test_parser()

In [None]:
def test_dependency_cli():
    parser = make_parser()
    ns = parser.parse_args(['--dvc_dep', 'first.txt' ,'--dvc_dep', 'second.txt'])
    TESTCASE.assertEqual(ns.dvc_dep, ['first.txt', 'second.txt'])

test_dependency_cli()

In [None]:
#export

class SearchStringNotFound(Exception):
    pass

def find_in_list(l, search_string):
    '''
    l is a list of strings
    Returns the first element in l for which search_string in element
    '''
    matches = [entry for entry in l if search_string in entry]
    if not matches:
        raise SearchStringNotFound("{ss} not found in {l}".format(ss=search_string, l=l))
    return matches[0]

In [None]:
TESTCASE.assertEqual(find_in_list(['abc', '123', 'c'], 'c'), 'abc')

with TESTCASE.assertRaises(SearchStringNotFound):
    find_in_list(['a','b'], 'c')

In [None]:
#export

class NoArgumentNamed(Exception):
    pass

class MustSpecifyIndex(Exception):
    pass

class Dependency:
    def __init__(self, namespace=None, pipefile=None):
        self.namespace = namespace
        self.deps = []
        self.pipefile = pipefile
        
    def register(self, *args):
        fpath = os.path.join(*args)
        if fpath not in self.deps:
            self.deps.append(fpath)
        return fpath
        
    def from_cli(self, namespace=None, arg_name='dvc_dep'):
        namespace = namespace or self.namespace
        if namespace:
            try:
                return getattr(namespace, arg_name)
            except AttributeError:
                logger.info("Looked for dvc file dependencies in an argument named '{}' but it wasnt in the command line".format(arg_name))
                return []
        else:
            return []
        
    def from_previous_output(self, stage:str, pipefile:str=None, index:int=None, search_string:str=None):
        '''
        Retrieve the name of the file of a previous output.  If the stage has only a single output
        it will be returned.  Otherwise you must give either the index or a substring to match.
        Params:
            stage: str 
                The name of the stage to pull the output from
            pipefile: str
                Optional path to the pipeline file.  e.g. dvc.yaml
            index: int
                Optional list index.  Give if the stage has several outputs and you want to specify which one
            search_string: str
                Optional.  Search for this string in the previous outputs.  Used to specify which of several previous outputs you want from a given stage.
            
        '''
        key = 'outs'
        pipefile = pipefile or self.pipefile
        
        with open(pipefile) as p:
            pipeline = yaml.safe_load(p)

        fnames = pipeline['stages'][stage][key]        
        if search_string:
            fpath = find_in_list(fnames, search_string)
        else:
            if len(fnames)==1:
                fpath = fnames[0]
            elif len(fnames)>1 and index is None:
                raise MustSpecifyIndex("Must give an index or search_string.  Possible files: {}".format(fnames))
            else:
                fpath = fnames[index]
        return self.register(fpath)
            
    def register_sourcecode(self, command=None):
        command = command or sys.argv[:]
        self.register(command[0])
            
    def render(self, as_yaml=False, *args, **kwargs):
        returnme = self.deps[:]
        
        deps_in_cli = self.from_cli(*args, **kwargs)
        if deps_in_cli:
            returnme.extend(deps_in_cli)
        return maybe_yaml(returnme, as_yaml=as_yaml)


In [None]:
import argparse
from tempfile import TemporaryDirectory

from dvcrecord.utils import write_yaml

def test_dep():    
    parser = make_parser()
    ns = parser.parse_args(['--dvc_dep', 'first.txt' ,'--dvc_dep', 'second.txt'])
    dep = Dependency(namespace=ns)    
    TESTCASE.assertEqual(dep.from_cli(), ['first.txt', 'second.txt'])

    ns = argparse.Namespace()
    TESTCASE.assertEqual(dep.from_cli(ns), [])
        
    with TemporaryDirectory() as tempdir:
        depfile = dep.register(tempdir, 'testdep.yaml')
        write_yaml({'try':'me'}, tempdir, 'testdep.yaml')
        with open(depfile) as f:
            f.read()
        TESTCASE.assertEqual(dep.render(as_yaml=False), 
                             [depfile, 'first.txt', 'second.txt']
                            )
test_dep()

In [None]:
def test_from_previous():
    pipeline = yaml.safe_load('''
    stages:
      previous_stage:
        cmd: python previous.py
        outs:
        - first.txt
        - second.txt
    ''')    
    with TemporaryDirectory() as tempdir:
        #write a pipeline file        
        pipefile = write_yaml(pipeline, folder=tempdir, fname='dvc.yaml')
        dep = Dependency(pipefile=pipefile)
        with TESTCASE.assertRaises(MustSpecifyIndex):
            TESTCASE.assertEqual(dep.from_previous_output(stage='previous_stage'), 'first.txt')
        TESTCASE.assertEqual(dep.from_previous_output(stage='previous_stage', index=1), 'second.txt')
        TESTCASE.assertEqual(dep.from_previous_output(stage='previous_stage', search_string="second"), 'second.txt')

test_from_previous()

In [None]:
notebook2script(_nbpath)

Converted deps.ipynb.
