In [2]:
import azureml.core
from azureml.core import Experiment, Workspace, Dataset, Datastore, ScriptRunConfig
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies
import os
import shutil
import glob
from os.path import join
import tensorflow as tf
from sys import path
import numpy as np

# check core SDK version number

print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.34.0


In [None]:
# load workspace configuration from the config.json file in the current folder.
ws = Workspace.from_config()
# get metadata about the workspace
print(ws.name, ws.location, ws.resource_group, sep='\t')
# list the registered datastores
ws.datastores

In [44]:
# access our registered data share containing image data in this workspace
datastore = Datastore.get(workspace = ws, datastore_name = 'autoencoderblob')

train_path = (datastore, 'train/')
eval_path = (datastore, 'eval/')

train_dataset = Dataset.File.from_files(path = [train_path])
eval_dataset = Dataset.File.from_files(path = [eval_path])


In [5]:
experiment_name = 'sentinel-autoencoder'
exp = Experiment(workspace = ws, name = experiment_name)

In [6]:
envs = Environment.list(workspace = ws)
env = envs.get('acd-training')

We need to copy all necessary files for training, including required modules, to a local directory that will be submitted during training

In [45]:

# use the azure folder as our script folder
source = 'Satellite_ComputerVision'
util_folder = 'utils'
script_folder = 'Satellite_ComputerVision/azure'
script_file = 'train_autoencoder.py'

# # copy the training script from github repo to local folder
# shutil.copy(src = os.path.join(source, script_folder, script_file),
# dst = os.path.join(script_folder, script_file))

# # copy the entire 'utils' folder from github repo to local
# shutil.copytree(src = '/home/azureuser/cloudfiles/code/Users/mevans/Satellite_ComputerVision/utils',
# dst = '/home/azureuser/cloudfiles/code/Users/mevans/azure/utils')

# copy single updated files
# shutil.copy(src = join(source, util_folder, 'prediction_tools.py'), dst = join(script_folder, util_folder, 'prediction_tools.py'))

In [46]:
# define the compute target
ws.compute_targets
compute_target = ws.compute_targets['place-dsvm']

### Training Data Summary
Here we will do a rudimentary read of our training data to calculate metrics like size, bias, and positive sample weighting to be passed to our training script

In [8]:
%cd Satellite_ComputerVision
!git pull
%cd ..

/mnt/batch/tasks/shared/LS_root/mounts/clusters/change-detection-cpu/code/Users/mevans/Satellite_ComputerVision
remote: Enumerating objects: 7, done.[K
remote: Counting objects: 100% (7/7), done.[K
remote: Compressing objects: 100% (1/1), done.[K
remote: Total 4 (delta 3), reused 4 (delta 3), pack-reused 0[K
Unpacking objects: 100% (4/4), done.
From https://github.com/mjevans26/Satellite_ComputerVision
   961bd34..ecf71a5  master     -> origin/master
Updating 961bd34..ecf71a5
error: Your local changes to the following files would be overwritten by merge:
	utils/model_tools.py
Please, commit your changes or stash them before you can merge.
Aborting
/mnt/batch/tasks/shared/LS_root/mounts/clusters/change-detection-cpu/code/Users/mevans


In [9]:
# attach our utilities folder to the path to import modules
path.append('/home/azureuser/cloudfiles/code/Users/mevans/Satellite_ComputerVision/utils')

In [10]:
from model_tools import get_model, make_confusion_matrix
from processing import get_training_dataset

In [11]:
# Define some global variabes

BANDS = ['B2', 'B3', 'B4', 'B8', 'B2_1', 'B3_1', 'B4_1', 'B8_1']
RESPONSE = 'change'
FEATURES = BANDS + [RESPONSE]

# Specify the size and shape of patches expected by the model.
KERNEL_SIZE = 256
KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]
COLUMNS = [
  tf.io.FixedLenFeature(shape=KERNEL_SHAPE, dtype=tf.float32) for k in FEATURES
]
FEATURES_DICT = dict(zip(FEATURES, COLUMNS))

In [1]:
# Create mountcontext and mount the dataset
test_mount = train_dataset.mount()

# open access to the mount point
test_mount.start() 

# Get the mount point
dataset_mount_folder = test_mount.mount_point
print(dataset_mount_folder)

test_files = []
for root, dirs, files in os.walk(dataset_mount_folder):
    for f in files:
        test_files.append(join(root, f))

# test_files = glob.glob(join(dataset_mount_folder, '*.'))

# read in the training data so we can calculate size

training = get_training_dataset(
    files = test_files,
    ftDict = FEATURES_DICT,
    features = BANDS,
    response = RESPONSE,
    buff = 1,
    batch = 1,
    repeat = False,
    splits = None)

NameError: name 'train_dataset' is not defined

In [None]:
iterator = iter(training)
print(iterator.next())

In [14]:
# # calculate some summary statistics used in model training
m = get_model(depth = len(BANDS), optim = tf.keras.optimizers.Adam(learning_rate = 0.001, beta_1=0.9, beta_2=0.999), loss = 'mse', mets = [tf.keras.metrics.categorical_accuracy], bias = None)
train_con_mat = make_confusion_matrix(training, m)
classums = train_con_mat.sum(axis = 1)
BIAS = np.log(classums[1]/classums[0])
WEIGHT = classums[0]/classums[1]
TRAIN_SIZE = train_con_mat.sum()//(256*256)

test_mount.stop()



In [15]:
print('size', TRAIN_SIZE)
print('weight', WEIGHT)
print('bias', BIAS)

size 1041
weight 21.17979152165524
bias -3.0530474969546195


### Submit the Training Job

In [47]:
# create script run config

# define the command line arguments to our training sript
args = [
    '--train_data', train_dataset.as_mount(),
    '--eval_data', eval_dataset.as_mount(),
    '-lr', 0.001,
    '--epochs', 100,
    '--batch', 16,
    '--size', 7659,#985,
    '--kernel_size', 256]

# TODO: figure out how to pass lists as command line arguments

src = ScriptRunConfig(source_directory=script_folder,
                      script=script_file, 
                      arguments=args,
                      compute_target=compute_target,
                      environment=env)



In [37]:
exp.name

'sentinel-autoencoder'

In [48]:
#  run the training job
run = exp.submit(config=src, tags = dict({'model':'Unet', 'dataset':'S2_autoencoder', 'normalization':'pixel', 'epochs':'0-100', 'bands':'8'}))
run

Experiment,Id,Type,Status,Details Page,Docs Page
sentinel-autoencoder,sentinel-autoencoder_1637710868_08f06874,azureml.scriptrun,Starting,Link to Azure Machine Learning studio,Link to Documentation


In [49]:
from azureml.widgets import RunDetails
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

KeyError: 'log_files'