In [1]:
import logging, os, sys
import numpy as np
import urllib.request
import shutil
import subprocess
import zipfile
import pickle
from pathlib import Path
import pandas as pd
from Pegasus.api import *

In [2]:
from util_workflow import add_input_wf_files
from util_workflow import create_file_objects, create_file_objects_postfix, create_file_objects_postfix_range

In [3]:
from util_workflow import download_data, unzip_flatten_data, return_corrupted_files, return_input_files
from util_workflow import split_data_filenames, add_input_tune_model,create_tar_and_pkl, create_pkl

In [4]:
# --- Import Pegasus API ---
from Pegasus.api import *
logging.basicConfig(level=logging.DEBUG)
props = Properties()
props["dagman.retry"] = "2"
props["pegasus.transfer.arguments"] = "-m 1"
props.write()

In [5]:
dataset_link = "https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip"
zip_data = "kagglecatsanddogs_3367a.zip"
directory_to_extract_to = "."

DOWNLOAD_DATA = False
DATASET_SIZE = 12
DATA_DIR = "dev_data/"
UTILS_DIR = "utils/"

arch_names = ["basicnet", "densenet121", "vgg16"]
CATS = "PetImages/Cat"
DOGS = "PetImages/Dog"
LABELS = {CATS: 0, DOGS: 1}
NUM_EPOCHS = 4
NUM_TRIALS = 3

if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)   
    
if DOWNLOAD_DATA == True:
    download_data(dataset_link)

In [6]:
# Avoid corrupted files for now
corrupted_files = return_corrupted_files("corrupted_files.txt")
# Get names of image files that will serve as inputs to the workflow
input_file_names = return_input_files(corrupted_files, DATASET_SIZE, DATA_DIR, LABELS)

In [7]:
#train 70%, val 10% and test 20%
train_filenames,val_filenames,test_filenames, files_split_dict = split_data_filenames(input_file_names)

In [8]:
rc = ReplicaCatalog()

# TRAIN, VAL and TEST data are processed separately

# Pegasus File objects are created and ADDED to the replica catalog
input_preprocess1_train = add_input_wf_files(train_filenames, DATA_DIR,rc)
input_preprocess1_val   = add_input_wf_files(val_filenames, DATA_DIR,rc)
input_preprocess1_test  = add_input_wf_files(test_filenames, DATA_DIR,rc)

# Create FILE objects with correct postfixes
postfix_job1 = "_proc1.jpg"
output_preprocess1_train = create_file_objects_postfix(train_filenames,postfix_job1)
output_preprocess1_val   = create_file_objects_postfix(val_filenames,postfix_job1)
output_preprocess1_test  = create_file_objects_postfix(test_filenames,postfix_job1)

postfix = "_proc2_"
range_num = 4
output_preprocess2_train = create_file_objects_postfix_range(train_filenames, postfix, range_num)
output_preprocess2_val = create_file_objects_postfix_range(val_filenames, postfix, range_num)
output_preprocess2_test = create_file_objects_postfix_range(test_filenames, postfix, range_num)

#rc.write()

In [10]:
rc.write()

In [11]:
# Create and add our transformations to the TransformationCatalog.
tc = TransformationCatalog()

# Data preprocessing part 1 
preprocess_tc1 = Transformation(
                "preprocess1",
                site="local",
                pfn = str(Path(".").parent.resolve() / "bin/data_preprocessing1.py"), 
                is_stageable= True
            )

# Data preprocessing part 1 
preprocess_tc2 = Transformation(
                "preprocess2",
                site="local",
                pfn = str(Path(".").parent.resolve() / "bin/data_preprocessing2.py"), 
                is_stageable= True
            )

In [12]:
tc.add_transformations(preprocess_tc1, preprocess_tc2)#tune_model_vgg16,choose_best_model)
tc.write()

In [14]:
# Set infer_dependencies=True so that they are inferred based on job input and output file usage.
wf = Workflow("catVsdog-test-wf", infer_dependencies=True)

# Create Jobs

#-------------------------job preprocess 1 --------------------------------------
job_preprocess1_train = Job(preprocess_tc1)\
                    .add_inputs(*input_preprocess1_train)\
                    .add_outputs(*output_preprocess1_train)

job_preprocess1_val = Job(preprocess_tc1)\
                    .add_inputs(*input_preprocess1_val)\
                    .add_outputs(*output_preprocess1_val)


job_preprocess1_test = Job(preprocess_tc1)\
                    .add_inputs(*input_preprocess1_test)\
                    .add_outputs(*output_preprocess1_test)


#-------------------------job preprocess 2 --------------------------------------
job_preprocess2_train = Job(preprocess_tc2)\
                    .add_inputs(*output_preprocess1_train)\
                    .add_outputs(*output_preprocess2_train)

job_preprocess2_val = Job(preprocess_tc2)\
                    .add_inputs(*output_preprocess1_val)\
                    .add_outputs(*output_preprocess2_val)


job_preprocess2_test = Job(preprocess_tc2)\
                    .add_inputs(*output_preprocess1_test)\
                    .add_outputs(*output_preprocess2_test)


In [15]:
wf.add_jobs(
    job_preprocess1_train,
    job_preprocess1_val,
    job_preprocess1_test,
    job_preprocess2_train,
    job_preprocess2_val,
    job_preprocess2_test#,
#    job_tune_model_vgg16,
#    job_choose_best_model
)


<Pegasus.api.workflow.Workflow at 0x7fe2c07d8390>

In [16]:
try:
    wf.plan(submit=True)\
    .wait()\
    .analyze()\
    .statistics()
except PegasusClientError as e:
    print(e.output)


################
# pegasus-plan #
################
2021.02.09 20:23:21.016 UTC:
2021.02.09 20:23:21.021 UTC:   -----------------------------------------------------------------------
2021.02.09 20:23:21.027 UTC:   File for submitting this DAG to HTCondor           : catVsdog-test-wf-0.dag.condor.sub
2021.02.09 20:23:21.032 UTC:   Log of DAGMan debugging messages                 : catVsdog-test-wf-0.dag.dagman.out
2021.02.09 20:23:21.037 UTC:   Log of HTCondor library output                     : catVsdog-test-wf-0.dag.lib.out
2021.02.09 20:23:21.042 UTC:   Log of HTCondor library error messages             : catVsdog-test-wf-0.dag.lib.err
2021.02.09 20:23:21.047 UTC:   Log of the life of condor_dagman itself          : catVsdog-test-wf-0.dag.dagman.log
2021.02.09 20:23:21.052 UTC:
2021.02.09 20:23:21.057 UTC:   -no_submit given, not submitting DAG to HTCondor.  You can do this with:
2021.02.09 20:23:21.068 UTC:   -----------------------------------------------------------------------


[[1;32m####################################[0m] 100.0% ..Success ([1;32mCompleted: 18[0m, [1;33mQueued: 0[0m, [1;36mRunning: 0[0m, [1;31mFailed: 0[0m)



####################
# pegasus-analyzer #
####################
Your database is compatible with Pegasus version: 5.1.0dev

************************************Summary*************************************

Submit Directory   : /home/scitech/shared-data/pegasus-catdog-wf-master/scitech/pegasus/catVsdog-test-wf/run0011
Total jobs         :     18 (100.00%)
# jobs succeeded   :     18 (100.00%)
# jobs failed      :      0 (0.00%)
# jobs held        :      0 (0.00%)
# jobs unsubmitted :      0 (0.00%)


######################
# pegasus-statistics #
######################
Your database is compatible with Pegasus version: 5.1.0dev

#
# Pegasus Workflow Management System - http://pegasus.isi.edu
#
# Workflow summary:
#   Summary of the workflow execution. It shows total
#   tasks/jobs/sub workflows run, how many succeeded/failed etc.
#   In case of hierarchical workflow the calculation shows the
#   statistics across all the sub workflows.It shows the following
#   statistics about tasks, job