# Refactored `prodstatus` basics

Needed imports:

In [1]:
from tempfile import TemporaryDirectory
from unittest import mock
from os import environ
from pathlib import Path
import pprint
import dataclasses

In [2]:
from jira import JIRA
import pandas as pd
import numpy as np
import yaml

In [3]:
from lsst.ctrl.bps import BpsConfig
from lsst.prodstatus.Workflow import Workflow
from lsst.prodstatus.Step import Step
from lsst.prodstatus.Campaign import Campaign

Make a temporary test directory that will get automatically cleaned up:

In [4]:
test_dir_itself = TemporaryDirectory()
test_dir = test_dir_itself.name
test_dir

'/tmp/tmpbao5anzs'

Make an object that takes the same calls an an instance of the jira client, but instead of actually doing anything just keeps track of how its called.

In [5]:
with mock.patch('jira.JIRA', spec=True) as MockJIRA:
    jira = MockJIRA()

In [6]:
jira

<jira.client.JIRA at 0x7fed610affa0>

## Workflows

### A minimal workflow

In [7]:
test_workflow_name = 'Alice'
bps_config_path = Path(
    environ["PRODSTATUS_DIR"], "tests", "data", "bps_config_base.yaml"
)
bps_config = BpsConfig(bps_config_path)

These are enought to make the required elements of a `Workflow` instance:

In [8]:
workflow = Workflow(bps_config, test_workflow_name)
print(workflow)

Workflow
name: Alice
issue name: None
step: None
band: all
BPS config dataQuery: instrument='LSSTCam-imSim' and skymap='DC2'
exposures: None



You can save it as files in a directory:

In [9]:
test_dir_itself = TemporaryDirectory()
test_dir = test_dir_itself.name
test_dir

'/tmp/tmpa0t1_lgr'

In [10]:
workflow.to_files(test_dir)

In [11]:
!find {test_dir}

/tmp/tmpa0t1_lgr
/tmp/tmpa0t1_lgr/Alice
/tmp/tmpa0t1_lgr/Alice/workflow.yaml
/tmp/tmpa0t1_lgr/Alice/bps_config.yaml


In [12]:
!cat {test_dir}/{test_workflow_name}/workflow.yaml

band: all
name: Alice


### Read it back again

In [13]:
reread_workflow = Workflow.from_files(test_dir, 'Alice')
print(reread_workflow)

Workflow
name: Alice
issue name: None
step: None
band: all
BPS config dataQuery: instrument='LSSTCam-imSim' and skymap='DC2'
exposures: None



### Split by band

In [14]:
band_workflows = workflow.split_by_band()

In [15]:
for workflow in band_workflows:
    print(workflow)

Workflow
name: Alice_u
issue name: None
step: None
band: u
BPS config dataQuery: (instrument='LSSTCam-imSim' and skymap='DC2') and (band == 'u')
exposures: None

Workflow
name: Alice_g
issue name: None
step: None
band: g
BPS config dataQuery: (instrument='LSSTCam-imSim' and skymap='DC2') and (band == 'g')
exposures: None

Workflow
name: Alice_r
issue name: None
step: None
band: r
BPS config dataQuery: (instrument='LSSTCam-imSim' and skymap='DC2') and (band == 'r')
exposures: None

Workflow
name: Alice_i
issue name: None
step: None
band: i
BPS config dataQuery: (instrument='LSSTCam-imSim' and skymap='DC2') and (band == 'i')
exposures: None

Workflow
name: Alice_z
issue name: None
step: None
band: z
BPS config dataQuery: (instrument='LSSTCam-imSim' and skymap='DC2') and (band == 'z')
exposures: None

Workflow
name: Alice_y
issue name: None
step: None
band: y
BPS config dataQuery: (instrument='LSSTCam-imSim' and skymap='DC2') and (band == 'y')
exposures: None



### Splitting into groups of exposures

Let's make a table of exposures:

In [16]:
def make_random_exposure_table(num_exposures, bands=tuple('ugrizy'), random_number_seed=42):
    random_number_generator = np.random.default_rng(random_number_seed)
    bands = random_number_generator.choice(np.array(bands), num_exposures)
    exp_ids = random_number_generator.choice(10000, num_exposures, replace=False)
    exposures = pd.DataFrame({'band': bands, 'exp_id': exp_ids}).sort_values('exp_id').set_index('exp_id', drop=False)
    return exposures

In [17]:
exposures = make_random_exposure_table(10, bands=('g', 'r'))
exposures

Unnamed: 0_level_0,band,exp_id
exp_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1280,g,1280
4503,r,4503
5130,g,5130
5260,g,5260
7171,r,7171
7352,r,7352
7606,g,7606
7857,g,7857
8396,g,8396
9748,r,9748


Now, create a "big" instance of `Workflow` that does all the exposures:

In [18]:
big_workflow = Workflow(bps_config, 'Bob', exposures=exposures)

Now lets split this workflow up into several workflows, one for each band:

In [19]:
exp_workflows = big_workflow.split_by_exposure(4)
for workflow in exp_workflows:
    print(workflow)

Workflow
name: Bob_1
issue name: None
step: None
band: all
BPS config dataQuery: (instrument='LSSTCam-imSim' and skymap='DC2') and (exposure >= 1280) and (exposure <= 5260)
number of exposures: 4
min exposure id: 1280
max exposure id: 5260
exposure counts by band: {'g': 3, 'r': 1}

Workflow
name: Bob_2
issue name: None
step: None
band: all
BPS config dataQuery: (instrument='LSSTCam-imSim' and skymap='DC2') and (exposure >= 7171) and (exposure <= 7606)
number of exposures: 3
min exposure id: 7171
max exposure id: 7606
exposure counts by band: {'r': 2, 'g': 1}

Workflow
name: Bob_3
issue name: None
step: None
band: all
BPS config dataQuery: (instrument='LSSTCam-imSim' and skymap='DC2') and (exposure >= 7857) and (exposure <= 9748)
number of exposures: 3
min exposure id: 7857
max exposure id: 9748
exposure counts by band: {'g': 2, 'r': 1}



## Step

Step creation takes a dictionary of parameters for `Workflow.split_by_exposure`:

In [20]:
exposure_groups = {
    'group_size': 4,
    'skip_groups': 0,
    'num_groups': None
}

Note that `skip_groups` defaults to 0, so it could be left off. A `num_groups` of `None` indicates "as many as it takes", and the default is `None`, so this could be left off as well:

In [21]:
help(Workflow.split_by_exposure)

Help on function split_by_exposure in module lsst.prodstatus.Workflow:

split_by_exposure(self, group_size=None, skip_groups=0, num_groups=None)
    Split the workflow by exposure number.
    
    Parameters
    ----------
    group_size : `int` optional
        The approximate size of the group. The default is None, which
        causes the method to return a single workflow with all
        exposures.
    skip_groups : `int` optional
        The number of groups to skip. The default is 0 (no skipped groups).
    num_groups : `int` optional
        The maximum number for groups. The default is None,
        for all groups
    
    Returns
    -------
    workflows : `List[Workflow]`
        A list of workflows.



Now, let's make a `Step`:

In [22]:
step = Step.generate_new('step99', base_bps_config=bps_config, split_bands=True, exposure_groups=exposure_groups, exposures=exposures, workflow_base_name='s99wf')

In [23]:
print(step)

Step
name: step99
issue name: None
split bands: True
exposure groups: {'group_size': 4, 'skip_groups': 0, 'num_groups': None}
workflows:
 - s99wf_step99_g_1 (issue None) with dataQuery ((instrument='LSSTCam-imSim' and skymap='DC2') and (band == 'g')) and (exposure >= 1280) and (exposure <= 5260)
 - s99wf_step99_g_2 (issue None) with dataQuery ((instrument='LSSTCam-imSim' and skymap='DC2') and (band == 'g')) and (exposure >= 7606) and (exposure <= 8396)
 - s99wf_step99_r (issue None) with dataQuery (instrument='LSSTCam-imSim' and skymap='DC2') and (band == 'r')


In [24]:
test_dir_itself = TemporaryDirectory()
test_dir = test_dir_itself.name
test_dir

'/tmp/tmp7tvqk3nq'

In [25]:
step.to_files(test_dir)

In [26]:
!find {test_dir}

/tmp/tmp7tvqk3nq
/tmp/tmp7tvqk3nq/step99
/tmp/tmp7tvqk3nq/step99/workflows
/tmp/tmp7tvqk3nq/step99/workflows/s99wf_step99_g_1
/tmp/tmp7tvqk3nq/step99/workflows/s99wf_step99_g_1/workflow.yaml
/tmp/tmp7tvqk3nq/step99/workflows/s99wf_step99_g_1/explist.txt
/tmp/tmp7tvqk3nq/step99/workflows/s99wf_step99_g_1/bps_config.yaml
/tmp/tmp7tvqk3nq/step99/workflows/s99wf_step99_r
/tmp/tmp7tvqk3nq/step99/workflows/s99wf_step99_r/workflow.yaml
/tmp/tmp7tvqk3nq/step99/workflows/s99wf_step99_r/explist.txt
/tmp/tmp7tvqk3nq/step99/workflows/s99wf_step99_r/bps_config.yaml
/tmp/tmp7tvqk3nq/step99/workflows/s99wf_step99_g_2
/tmp/tmp7tvqk3nq/step99/workflows/s99wf_step99_g_2/workflow.yaml
/tmp/tmp7tvqk3nq/step99/workflows/s99wf_step99_g_2/explist.txt
/tmp/tmp7tvqk3nq/step99/workflows/s99wf_step99_g_2/bps_config.yaml
/tmp/tmp7tvqk3nq/step99/step.yaml


In [27]:
!cat {test_dir}/{step.name}/step.yaml

exposure_groups:
    group_size: 4
    num_groups: null
    skip_groups: 0
name: step99
split_bands: true
workflows:
-   issue: null
    name: s99wf_step99_g_1
-   issue: null
    name: s99wf_step99_g_2
-   issue: null
    name: s99wf_step99_r


## Creating a campaign

In [28]:
campaign_def_dir = TemporaryDirectory()
campaign_def_dir_path = Path(campaign_def_dir.name)
campaign_def_dir_path

PosixPath('/tmp/tmpumjd14hx')

In [29]:
bps_config_path = campaign_def_dir_path.joinpath('base_bps_config.yaml')
with bps_config_path.open('wt') as bps_config_io:
    bps_config.dump(bps_config_io)
bps_config_path

PosixPath('/tmp/tmpumjd14hx/base_bps_config.yaml')

In [30]:
!head {bps_config_path}

campaign: v23_0_0_rc5/PREOPS-938
computeSite: LSST
defaultPreCmdOpts: --long-log --log-level=VERBOSE --log-file payload-log.json
executionButler:
  queue: DOMA_LSST_GOOGLE_MERGE
iddsServer: https://aipanda015.cern.ch:443/idds
maxJobsPerTask: 70000
payload:
  butlerConfig: s3://butler-us-central1-panda-dev/dc2/butler-external.yaml
  dataQuery: instrument='LSSTCam-imSim' and skymap='DC2'


In [31]:
explist_path = campaign_def_dir_path.joinpath('explist.txt')
exposures.to_csv(explist_path, header=False, index=False, sep=" ")
explist_path

PosixPath('/tmp/tmpumjd14hx/explist.txt')

In [32]:
!head {explist_path}

g 1280
r 4503
g 5130
g 5260
r 7171
r 7352
g 7606
g 7857
g 8396
r 9748


In [33]:
campaign_config = {
    'exposures': explist_path.as_posix(),
    'name': 'test_campaign',
    'steps': {
        'step1': {'base_bps_config': bps_config_path.as_posix(), 'exposure_groups': {}, 'split_bands': True},
        'step2': {'base_bps_config': bps_config_path.as_posix(), 'exposure_groups': {'group_size': 4}, 'split_bands': True}
    }
}

In [34]:
campaign_def_path = campaign_def_dir_path.joinpath('campaign.yaml')
with campaign_def_path.open('wt') as campaign_def_io:
    yaml.dump(campaign_config, campaign_def_io, indent=4)
campaign_def_path

PosixPath('/tmp/tmpumjd14hx/campaign.yaml')

In [35]:
!cat {campaign_def_path}

exposures: /tmp/tmpumjd14hx/explist.txt
name: test_campaign
steps:
    step1:
        base_bps_config: /tmp/tmpumjd14hx/base_bps_config.yaml
        exposure_groups: {}
        split_bands: true
    step2:
        base_bps_config: /tmp/tmpumjd14hx/base_bps_config.yaml
        exposure_groups:
            group_size: 4
        split_bands: true


In [36]:
campaign = Campaign.create_from_yaml(campaign_def_path)

In [37]:
print(campaign)

Campaign
name: test_campaign
issue name: None
steps:
 - step1 (issue None) with 2 workflows
 - step2 (issue None) with 3 workflows


In [38]:
for step in campaign.steps:
    print(step)
    print()
    for workflow in step.workflows:
        print(workflow)

Step
name: step1
issue name: None
split bands: True
exposure groups: {}
workflows:
 - test_campaign_step1_g (issue None) with dataQuery (instrument='LSSTCam-imSim' and skymap='DC2') and (band == 'g')
 - test_campaign_step1_r (issue None) with dataQuery (instrument='LSSTCam-imSim' and skymap='DC2') and (band == 'r')

Workflow
name: test_campaign_step1_g
issue name: None
step: step1
band: g
BPS config dataQuery: (instrument='LSSTCam-imSim' and skymap='DC2') and (band == 'g')
number of exposures: 6
min exposure id: 1280
max exposure id: 8396
exposure counts by band: {'g': 6}

Workflow
name: test_campaign_step1_r
issue name: None
step: step1
band: r
BPS config dataQuery: (instrument='LSSTCam-imSim' and skymap='DC2') and (band == 'r')
number of exposures: 4
min exposure id: 4503
max exposure id: 9748
exposure counts by band: {'r': 4}

Step
name: step2
issue name: None
split bands: True
exposure groups: {'group_size': 4}
workflows:
 - test_campaign_step2_g_1 (issue None) with dataQuery ((ins

In [39]:
campaign_dir = TemporaryDirectory()
campaign_dir_path = Path(campaign_dir.name)
campaign_dir_path

PosixPath('/tmp/tmpdtgc5lgd')

In [40]:
campaign.to_files(campaign_dir_path)

In [41]:
!find {campaign_dir_path}

/tmp/tmpdtgc5lgd
/tmp/tmpdtgc5lgd/test_campaign
/tmp/tmpdtgc5lgd/test_campaign/campaign.yaml
/tmp/tmpdtgc5lgd/test_campaign/steps
/tmp/tmpdtgc5lgd/test_campaign/steps/step2
/tmp/tmpdtgc5lgd/test_campaign/steps/step2/workflows
/tmp/tmpdtgc5lgd/test_campaign/steps/step2/workflows/test_campaign_step2_r
/tmp/tmpdtgc5lgd/test_campaign/steps/step2/workflows/test_campaign_step2_r/workflow.yaml
/tmp/tmpdtgc5lgd/test_campaign/steps/step2/workflows/test_campaign_step2_r/explist.txt
/tmp/tmpdtgc5lgd/test_campaign/steps/step2/workflows/test_campaign_step2_r/bps_config.yaml
/tmp/tmpdtgc5lgd/test_campaign/steps/step2/workflows/test_campaign_step2_g_1
/tmp/tmpdtgc5lgd/test_campaign/steps/step2/workflows/test_campaign_step2_g_1/workflow.yaml
/tmp/tmpdtgc5lgd/test_campaign/steps/step2/workflows/test_campaign_step2_g_1/explist.txt
/tmp/tmpdtgc5lgd/test_campaign/steps/step2/workflows/test_campaign_step2_g_1/bps_config.yaml
/tmp/tmpdtgc5lgd/test_campaign/steps/step2/workflows/test_campaign_step2_g_2
/tmp/

In [42]:
!cat {campaign_dir_path}/{campaign.name}/campaign.yaml

exposures: /tmp/tmpumjd14hx/explist.txt
name: test_campaign
steps:
    step1:
        base_bps_config: /tmp/tmpumjd14hx/base_bps_config.yaml
        exposure_groups: {}
        split_bands: true
    step2:
        base_bps_config: /tmp/tmpumjd14hx/base_bps_config.yaml
        exposure_groups:
            group_size: 4
        split_bands: true


## Modification of campaigns, steps, and workflows

The `steps` member of a `Campaign` and the `workflows` member of `Step` are just lists of the corresponding objects: the user can add or remove them as desired.

For example, we can add a workflow to one of the steps in the `Campaign` defined above:

In [43]:
extra_workflow = workflow = Workflow(bps_config, 'Carol')
campaign.steps[1].workflows.append(extra_workflow)
print(campaign)

Campaign
name: test_campaign
issue name: None
steps:
 - step1 (issue None) with 2 workflows
 - step2 (issue None) with 4 workflows


In [44]:
print(campaign.steps[1])

Step
name: step2
issue name: None
split bands: True
exposure groups: {'group_size': 4}
workflows:
 - test_campaign_step2_g_1 (issue None) with dataQuery ((instrument='LSSTCam-imSim' and skymap='DC2') and (band == 'g')) and (exposure >= 1280) and (exposure <= 5260)
 - test_campaign_step2_g_2 (issue None) with dataQuery ((instrument='LSSTCam-imSim' and skymap='DC2') and (band == 'g')) and (exposure >= 7606) and (exposure <= 8396)
 - test_campaign_step2_r (issue None) with dataQuery (instrument='LSSTCam-imSim' and skymap='DC2') and (band == 'r')
 - Carol (issue None) with dataQuery instrument='LSSTCam-imSim' and skymap='DC2'


Similarly, we can build a whole new step and add it:

In [45]:
step = Step.generate_new('extra_step', base_bps_config=bps_config, split_bands=True, exposure_groups=exposure_groups, exposures=exposures, workflow_base_name='my_extra_wf')
campaign.steps.append(step)
print(campaign)

Campaign
name: test_campaign
issue name: None
steps:
 - step1 (issue None) with 2 workflows
 - step2 (issue None) with 4 workflows
 - extra_step (issue None) with 3 workflows


We can also get rid of steps and workflows using `remove`, `del`, and other python commands that remove elements from a list:

In [46]:
del campaign.steps[1]
print(campaign)

Campaign
name: test_campaign
issue name: None
steps:
 - step1 (issue None) with 2 workflows
 - extra_step (issue None) with 3 workflows
