In [None]:
import logging

from pathlib import Path

from Pegasus.api import *

logging.basicConfig(level=logging.DEBUG)

# Properties
props = Properties()
props["pegasus.data.configuration"] = "nonsharedfs"
props["dagman.retry"] = "2"
props.write()

# Site
sc = SiteCatalog()
condorpool_site = Site("condorpool", arch=Arch.X86_64, os_type=OS.LINUX)\
                    .add_pegasus_profile(style="condor")\
                    .add_condor_profile(universe="vanilla")\
                    .add_pegasus_profile(auxillary_local="true")

staging_site = Site("stage", arch=Arch.X86_64, os_type=OS.LINUX)\
                .add_directories(
                    Directory(Directory.SHARED_SCRATCH, "/tmp/")
                        .add_file_servers(FileServer("file:///tmp/", Operation.ALL))
                )\
                .add_pegasus_profile(auxillary_local="true")

sc.add_sites(condorpool_site, staging_site)
sc.write()

# Transformation
exe = Transformation(
        "checkpoint_program.py",
        site="local",
        pfn=str(Path(".").resolve() / "checkpoint_program.py"),
        is_stageable=True
    )

tc = TransformationCatalog().add_transformations(exe).write()

# Workflow
job = Job(exe)\
        .add_args(180)\
        .add_checkpoint(File("saved_state.txt"), stage_out=True)\
        .set_stdout("output.txt")\
        .add_profiles(Namespace.PEGASUS, key="checkpoint.time", value=1)\
        .add_profiles(Namespace.PEGASUS, key="maxwalltime", value=2)

'''
KILL signal is sent at (checkpoint.time + (maxwalltime-checkpoint.time)/2) minutes. (hence -K 30)
.add_profiles(Namespace.PEGASUS, key="checkpoint.time", value=1)\
.add_profiles(Namespace.PEGASUS, key="maxwalltime", value=2)
# Kickstart args (note checkpoint.time converted to seconds)
pegasus-kickstart  -n checkpoint_program.py -N ID0000001 -o output.txt -R condorpool  -s output.txt=output.txt -s saved_state.txt=saved_state.txt -L checkpoint-wf -T 2020-07-07T05:35:37+00:00 -k 60 -K 30 ./checkpoint_program_py
'''

wf = Workflow("checkpoint-wf").add_jobs(job)

try:
    wf.plan(submit=True, sites=["condorpool"], staging_sites={"condorpool":"stage"})\
        .wait()\
        .analyze()\
        .statistics()
except PegasusClientError as e:
    print(e.output)