In [33]:
from azureml.core import Experiment, Environment, Workspace, Datastore, Dataset, Model, ScriptRunConfig
import os
import glob
# get the current workspace
ws = Workspace.from_config()

In [18]:
%cd Satellite_ComputerVision
!git pull
%cd ..

Already up-to-date.
/mnt/batch/tasks/shared/LS_root/mounts/clusters/test-compute-instance/code/Users/mevans


In [34]:
# access our registered data share containing image data in this workspace
datastore = Datastore.get(workspace = ws, datastore_name = 'solardatafilestore')

cpk_train_path = (datastore, 'CPK_solar/training/')
cpk_eval_path = (datastore, 'CPK_solar/eval/')

nc_train_path = (datastore, 'NC_solar/training/')
nc_eval_path = (datastore, 'NC_solar/eval/')

test_path = (datastore, 'CPK_solar/predict/testpred6')

cpk_train_dataset = Dataset.File.from_files(path = [cpk_train_path])
cpk_eval_dataset = Dataset.File.from_files(path = [cpk_eval_path])

nc_train_dataset = Dataset.File.from_files(path = [nc_train_path])
nc_eval_dataset = Dataset.File.from_files(path = [nc_eval_path])

# when we combine datasets the selected directories and relative paths to the datastore are brought in
# mount folder
# |-cddatafilestore
# | |-GEE
# | | |-training
# | | |-eval
# | |-Onera
# | | |-training
# | | |-eval
train_dataset = Dataset.File.from_files(path = [cpk_train_path, nc_train_path])
eval_dataset = Dataset.File.from_files(path = [cpk_eval_path, nc_eval_path])
test_dataset = Dataset.File.from_files(path = [test_path])

In [35]:
# FInd the run corresponding to the model we want to register
run_id = 'solar-nc-cpk_1624989679_f59da7cf'

run = ws.get_run(run_id)

In [36]:
model_name = 'solar'

In [5]:
model = run.register_model(model_name=model_name,
                           tags=run.tags,
                           description = 'UNET model delineating ground mounted solar arrays in S2 imagery. Trained on multi-season data from Chesapeake Bay and NC',
                           model_path='outputs/',
                           model_framework = 'Tensorflow',
                           model_framework_version= '2.0',
                           datasets = [('training', train_dataset), ('evaluation', eval_dataset), ('testing', test_dataset)])
print(model.name, model.id, model.version, sep='\t')

solar	solar:1	1


In [37]:
# use the azure folder as our script folder
source = 'Satellite_ComputerVision'
util_folder = 'utils'
script_folder = 'azure'
script_file = 'train_solar.py'

In [44]:
# get our environment
envs = Environment.list(workspace = ws)
env = envs.get('solar-training')

In [39]:
# define the compute target
compute_target = ws.compute_targets['mevans1']

In [40]:
experiment_name = 'solar-nc-cpk'
exp = Experiment(workspace = ws, name = experiment_name)

In [46]:
args = [
    '--train_data', train_dataset.as_mount(),
    '--eval_data', eval_dataset.as_mount(),
    '--test_data', test_dataset.as_mount(),
    '--model_id', model_name,
    '--weight', 0.8,
    '-lr', 0.0005,
    '--epochs', 100,
    '--batch', 16,
    '--size', 7755,
    '--kernel_size', 256,
    '--response', 'landcover']

In [45]:
src = ScriptRunConfig(source_directory=script_folder,
                      script=script_file, 
                      arguments=args,
                      compute_target=compute_target,
                      environment=env)

In [47]:
#  run the training job
run = exp.submit(config=src, tags = dict({'splits':'None', 'model':'Unet', 'dataset':'CPK+NC', 'normalization':'pixel', 'epochs':'100-200'}))
run