In [1]:
import factory

In [None]:
with Scenario(
    name='must be unique?',
    pipe=pipe,
    runner=pipe.flow,
    factories={
        'students': StudentFactory,
        'classes': ClassFactory
    },
    sources={
        'main': pipe.sources['main'],
        'student_ids': pipe.pipes['student_ids'].targets['main']
    },
    targets={
        'main': pipe.targets['main'],
        'create_students': pipe.pipes['create_students'].sources['main']
    },    
    target_case_collectors={
        'main': CaseCollector(subject_field='student_id', factory=factories['students'], factory_field='external_id')
    }
) as scenario:
    
    ex_student_ids = pemi.data.Table(
        '''
        | external_id | uuid         | merged_into_student_id |
        | -           | -            | -                      |
        | {stu[1]}    | {stuUUID[1]} |                        |
        | {stu[2]}    | {stuUUID[2]} |                        |
        '''.format(
            stu=scenario.factories['students'].field('student_id') #self.scenario.case_keys.cache('student_ids', 'student_id')
            stuUUID=scenario.factories['students'].field('student_uuid')
        ),
        schema=scenario.sources['student_ids'].schema#self.pipe.pipes['lkp_student_uuids'].pipes['student_ids'].targets['main'].schema
    )
    
    background = [
        pt.when.example_for_source(scenario.sources['student_ids'], ex_student_ids),
    ]


In [None]:
def scenario_factory(pipe):
    return Scenario(
        pipe=pipe,
        '...'
    )

In [None]:
# If I could just make case keys more understandable, that would be an improvement.
#  I'm having trouble figuring out how to fully get rid of the need to define sources and targets,
#    mostly because the sources still need to be defined so they can be mocked
#    and the targets need to be mocked so we can relate the records to the case key factory


# Maybe I could auto-mock sources and targets, and put the schema in the mocked object, then references sources/targets as scenario.sources['blah'].schema
# Then just replace case keys with key factories

In [None]:
# A Pemi factory should generate cached, case-specific records to use in tests
#  Would like to reuse functionality provided by factoryboy
#  Need to combine this concept with CaseKeyTracker


In [None]:
class ItkBatchStudentPipeScenario(PipeScenario):
    __name__ = 'ItkBatchStudentPipeScenario'
    
    pipe = app.pipes.itk_batch_student.ItkBatchStudentPipe(
        organization_uuid='orgUUID1',
        schema=app.pipes.itk_batch_student.SOURCE_SCHEMA.select(
            lambda field: field.name not in ('to_update', 'to_create')
        )
    )

    factories = {
        'students': StudentFactory,
        'classes': ClassFactory
    }
    
    # Q: How do I reference a source for a scenario, without necessarily having to explicitly define them
    #     The only real sources I have are the pipe.sources, all others are impure pipes that have to be mocked
    #     Maybe I shouldn't think about the ImpurePipe targets as actual sources, but as mocked pipes that generate data according to some factory
    # Q: How do I declare that a particular target is associated with keys generated by a factory?
    #     Sometimes I'm going to be testing the actual targets of the pipe, other times, the sources of impure pipes
    # Q: How do I mock out the behavior of an intermediate Impure pipe (like a lookup)?
    

    # Mock a target pipe with a case collector pipe
    mock_pipe(pipe.pipes['create_students'], TargetCollectorPipe, {'main': ['student_id']})
    # But then, what about the pipe targets?
    #   We could connect the targets to target collectors; but that seems like a lot of extra work
    #   I want this to be auto-discoverable when possible, and customizable when needed
    
    
    # Maybe I still have to define the sources, but they get auto-mocked with some default mocked pipe
    sources = {
        'main': pipe.sources['main'],
        'student_ids': pipe.pipes['student_ids'].targets['main']
    }
    # Followup Q: How would I define custom pipe mocks?  Do I need to?
    
    
    targets = {
        'main': pipe.targets['main'],
        'create_students': pipe.pipes['create_students'].sources['main']
    }
    
    case_collectors = {
        'main': CaseCollector(subject_field='student_id', factory=factories['students'], factory_field='external_id')
    }
        
    
    
    keys = [
        (pipe.pipes['create_students'].sources['main'], ['student_id']) # but this needs to be associated with the right factory
        (pipe.pipes['create_student_classes'].sources['main'], ['student_uuid', 'class_id']) # but this needs to be associated with the right factory
        (pipe.pipes['create_classes'].sources['main'], ['class_id']) # but this needs to be associated with the right factory
    ]
    
    def background(self):
        mock_pipe(pipe.pipes['student_ids'], 'main', with='blah')
    
    
with ItkBatchStudentPipeScenario() as scenario:
    with scenario.case('it builds student creates') as case:
        pass


In [None]:
pipe = app.pipes.itk_batch_student.ItkBatchStudentPipe(
    organization_uuid='orgUUID1',
    schema=app.pipes.itk_batch_student.SOURCE_SCHEMA.select(
        lambda field: field.name not in ('to_update', 'to_create')
    )
)

#### Missing something here....
mock_pipe = PipeFactory(
    pipe=pipe,
    factories={
        'students': StudentFactory,
        'classes': ClassFactory
    },
    keys=[
        (pipe.pipes['create_students'].sources['main'], ['student_id']) # but this needs to be associated with the right factory
        (pipe.pipes['create_student_classes'].sources['main'], ['student_uuid', 'class_id']) # but this needs to be associated with the right factory
        (pipe.pipes['create_classes'].sources['main'], ['class_id']) # but this needs to be associated with the right factory
    ]
)

with pt.Scenario('ItkBatchStudentPipe', pipe) as scenario:
    with scenario.case('it builds student creates') as case:
        pass


In [None]:
# Should I create a Factory Builder that can use a schema to build a factory?
#  So, basically replace pemi.data.Table? <- Would have to support multi-record Factories
#  I still need a concept of a pemi.data.Table so I can do matches example comparisons...... or maybe that could be a whole separate functionality


In [15]:
import uuid

class PemiModel:
    def __init__(self, **params):
        self.params = params

class StudentFactory(factory.Factory):
    class Meta:
        model = dict
        
    student_id = factory.Sequence(lambda n: 'stu{}'.format(n))
    student_uuid = factory.LazyFunction(uuid.uuid4)
    
    
for i in range(5):
    print(StudentFactory())

{'student_id': 'stu0', 'student_uuid': UUID('3cfbbfd9-842f-46dc-8e30-2c8bd80ee870')}
{'student_id': 'stu1', 'student_uuid': UUID('37cdcb8e-63b3-44d5-98d0-05e814b6c789')}
{'student_id': 'stu2', 'student_uuid': UUID('53e3391c-41b9-4390-a898-b069a0ca0e95')}
{'student_id': 'stu3', 'student_uuid': UUID('5be09257-9655-4c9d-a106-044084d3fbd5')}
{'student_id': 'stu4', 'student_uuid': UUID('b69cb77c-0857-4073-98d5-6d1f1d5b9aa5')}


In [19]:
class FactoryCache:
    def __init__(self, factory):
        self.factory = factory
        self.cache = {}
    
    def __getitem__(self, value):
        if value not in self.cache:
            self.cache[value] = self.factory()
        return self.cache[value]

students = FactoryCache(StudentFactory)
for i in range(5):
    print(students[i])

{'student_id': 'stu15', 'student_uuid': UUID('aafec53f-e56b-4309-906a-4698f0c5f5ce')}
{'student_id': 'stu16', 'student_uuid': UUID('04b78e00-bbaa-49f2-a4ff-e0b101255aa2')}
{'student_id': 'stu17', 'student_uuid': UUID('cf4f8272-4bc8-47d5-b417-fec55d099fc3')}
{'student_id': 'stu18', 'student_uuid': UUID('6e21bc1c-20b2-4657-afaf-8fd8173a2839')}
{'student_id': 'stu19', 'student_uuid': UUID('24533e9a-2468-4e26-b494-7c4c7ce839d7')}


In [24]:
students['flerg']

{'student_id': 'stu21',
 'student_uuid': UUID('e87b3607-3069-4c74-ac25-2d7d56de61c4')}