In [1]:
!sudo pip3 install --upgrade pip keras==2.1.5 tensorflow==1.13.1 numpy pandas pillow sklearn optuna scikit-image  optkeras

Collecting pip
  Downloading pip-20.2.3-py2.py3-none-any.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 1.7 MB/s eta 0:00:01
[?25hCollecting keras==2.1.5
  Downloading Keras-2.1.5-py2.py3-none-any.whl (334 kB)
[K     |████████████████████████████████| 334 kB 9.0 MB/s eta 0:00:01
[?25hCollecting tensorflow==1.13.1
  Downloading tensorflow-1.13.1-cp36-cp36m-manylinux1_x86_64.whl (92.5 MB)
[K     |████████████████████████████████| 92.5 MB 20.0 MB/s eta 0:00:01   |▎                               | 808 kB 5.0 MB/s eta 0:00:19     |███▉                            | 11.2 MB 5.6 MB/s eta 0:00:15     |████████████▏                   | 35.2 MB 36.1 MB/s eta 0:00:02
[?25hCollecting numpy
  Downloading numpy-1.19.2-cp36-cp36m-manylinux2010_x86_64.whl (14.5 MB)
[K     |████████████████████████████████| 14.5 MB 617 kB/s eta 0:00:01
[?25hCollecting pandas
  Downloading pandas-1.1.2-cp36-cp36m-manylinux1_x86_64.whl (10.5 MB)
[K     |████████████████████████████████| 10.5 MB 26.

Collecting cycler>=0.10
  Downloading cycler-0.10.0-py2.py3-none-any.whl (6.5 kB)
Collecting pyperclip>=1.6
  Downloading pyperclip-1.8.0.tar.gz (16 kB)
Collecting colorama>=0.3.7
  Downloading colorama-0.4.3-py2.py3-none-any.whl (15 kB)
Using legacy 'setup.py install' for sklearn, since package 'wheel' is not installed.
Using legacy 'setup.py install' for termcolor, since package 'wheel' is not installed.
Using legacy 'setup.py install' for PrettyTable, since package 'wheel' is not installed.
Using legacy 'setup.py install' for pyperclip, since package 'wheel' is not installed.
Building wheels for collected packages: optuna
  Building wheel for optuna (PEP 517) ... [?25ldone
[?25h  Created wheel for optuna: filename=optuna-2.1.0-py3-none-any.whl size=321090 sha256=073d4452c1c7ae9e504091782416663cbcc087874840aab036ee78363fb0e3f6
  Stored in directory: /root/.cache/pip/wheels/23/13/e3/e2c767339ab685a3fae35e10741b5c4345369a901352cc8d5a
Successfully built optuna
Installing collected pac

In [11]:
#!/usr/bin/env python3
import os
import logging
from pathlib import Path
import requests 
from glob import glob
from zipfile import ZipFile
import pickle
import pandas as pd

logging.basicConfig(level=logging.DEBUG)

#Import Pegasus API
from Pegasus.api import *

#Properties
props = Properties()
props["dagman.retry"] = "100"
props["pegasus.transfer.arguments"] = "-m 1"
props.write()

#Replica Catalog
rc = ReplicaCatalog()
input_files = glob('*.jpg')
input_files.sort()
in_files=[]

checkpoint_file = "checkpoint_file.hdf5"
if not os.path.isfile(checkpoint_file):
    df = pd.DataFrame(list())
    df.to_csv(checkpoint_file)
    
    
for file in input_files:
    in_files.append(File(file))
    rc.add_replica("local", File(file), str(Path(".").resolve() / file))  
rc.add_replica("local", checkpoint_file, Path(".").resolve() / checkpoint_file)
rc.write()


#Transformation
pre_process_resize = Transformation( "preprocess1.py",
        site = "local",
        pfn = "/home/scitech/shared-data/CatsAndDogs/preprocess1.py",
        is_stageable=True)

pre_process_augment = Transformation( "Augmentation.py",
        site = "local",
        pfn = "/home/scitech/shared-data/CatsAndDogs/Augmentation.py",
        is_stageable=True)

data_split  = Transformation( "Data_Split.py",
        site = "local",
        pfn = "/home/scitech/shared-data/CatsAndDogs/Data_Split.py",
        is_stageable=True)

vgg_model  = Transformation( "VGG_model.py",
        site = "local",
        pfn = "/home/scitech/shared-data/CatsAndDogs/VGG_model.py",
        is_stageable=True)
                    
tc = TransformationCatalog()\
    .add_transformations(pre_process_resize,pre_process_augment,data_split,vgg_model)\
    .write()

#Workflow
wf = Workflow("Cats_and_Dogs", infer_dependencies=True)


resized_images = File('resized_images.txt')
all_files = [File("resized_{}".format(f.lfn)) for f in in_files]
labels = File('labels.txt')

job_preprocess1 = Job(pre_process_resize)\
                    .add_inputs(*in_files)\
                    .add_outputs(*all_files,resized_images,labels) 

aug_images_txt = File('augmentation.txt')
aug_labels_txt = File('aug_labels.txt')
augmented_files = []
for f in all_files:
    augmented_files.extend([File(str(f).replace("{}".format(os.path.splitext(str(f))[0]), "Aug_{}_{}".format(os.path.splitext(str(f))[0],i))) for i in range(3)])

    
job_preprocess2 = Job(pre_process_augment)\
                    .add_inputs(*all_files,labels)\
                    .add_outputs(aug_images_txt,aug_labels_txt,*augmented_files)

training_data = File('training.pkl')
testing_data = File('testing.pkl')
val_data = File('validation.pkl')

job_data_split = Job(data_split)\
                    .add_inputs(*augmented_files,labels)\
                    .add_outputs(training_data,testing_data,val_data)

model = File('model.h5')
csv_log = File("model_history_log.csv")

job_vgg_model = Job(vgg_model)\
                    .add_checkpoint(File(checkpoint_file), stage_out=True)\
                    .add_inputs(*augmented_files,training_data,testing_data,val_data)\
                    .add_profiles(Namespace.PEGASUS, key="maxwalltime", value=1)\
                    .add_outputs(model,csv_log)

wf.add_jobs(job_preprocess1,job_preprocess2,job_data_split,job_vgg_model)                                    

<Pegasus.api.workflow.Workflow at 0x7f514c1bd1d0>

In [None]:
try:
     wf.plan(submit=True)\
        .wait()\
        .analyze()\
        .statistics()
except PegasusClientError as e:
    print(e.output)


################
# pegasus-plan #
################
[main] WARN  schema.JsonMetaSchema  - Unknown keyword $defs - you should define your own Meta Schema. If the keyword is irrelevant for validation, just use a NonValidationKeyword
[main] WARN  schema.JsonMetaSchema  - Unknown keyword additionalItems - you should define your own Meta Schema. If the keyword is irrelevant for validation, just use a NonValidationKeyword
[main] WARN  schema.JsonMetaSchema  - Unknown keyword examples - you should define your own Meta Schema. If the keyword is irrelevant for validation, just use a NonValidationKeyword
2020.10.01 09:36:40.867 UTC:
2020.10.01 09:36:40.872 UTC:   -----------------------------------------------------------------------
2020.10.01 09:36:40.878 UTC:   File for submitting this DAG to HTCondor           : Cats_and_Dogs-0.dag.condor.sub
2020.10.01 09:36:40.883 UTC:   Log of DAGMan debugging messages                 : Cats_and_Dogs-0.dag.dagman.out
2020.10.01 09:36:40.889 UTC:   Log of 

[[1;32m######################################[0m------------]  76.0% ..Running ([1;32mCompleted: 19[0m, [1;33mQueued: 0[0m, [1;36mRunning: 1[0m, [1;31mFailed: 0[0m)