In [1]:
import azureml.core
from azureml.core import Experiment, Workspace, Dataset, Datastore, ScriptRunConfig
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies
import os
import shutil
import glob
import math
from os.path import join
import tensorflow as tf
from sys import path
import numpy as np

# check core SDK version number

print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.18.0


In [2]:
# load workspace configuration from the config.json file in the current folder.
ws = Workspace.from_config()
# get metadata about the workspace
print(ws.name, ws.location, ws.resource_group, sep='\t')
# list the registered datastores
ws.datastores

solar-compviz	eastus2	PLACE


{'solardatafilestore': {
   "name": "solardatafilestore",
   "container_name": "gee-data",
   "account_name": "solarcompviz6161005396",
   "protocol": "https",
   "endpoint": "core.windows.net"
 },
 'workspaceblobstore': {
   "name": "workspaceblobstore",
   "container_name": "azureml-blobstore-20200d5f-4836-4a79-bf1c-7bd46e0d0c35",
   "account_name": "solarcompviz6161005396",
   "protocol": "https",
   "endpoint": "core.windows.net"
 },
 'workspacefilestore': {
   "name": "workspacefilestore",
   "container_name": "azureml-filestore-20200d5f-4836-4a79-bf1c-7bd46e0d0c35",
   "account_name": "solarcompviz6161005396",
   "protocol": "https",
   "endpoint": "core.windows.net"
 }}

In [3]:
# access our registered data share containing image data in this workspace
datastore = Datastore.get(workspace = ws, datastore_name = 'solardatafilestore')

cpk_train_path = (datastore, 'CPK_solar/training/')
cpk_eval_path = (datastore, 'CPK_solar/eval/')

nc_train_path = (datastore, 'NC_solar/training/')
nc_eval_path = (datastore, 'NC_solar/eval/')

test_path = (datastore, 'CPK_solar/predict/testpred6')

cpk_train_dataset = Dataset.File.from_files(path = [cpk_train_path])
cpk_eval_dataset = Dataset.File.from_files(path = [cpk_eval_path])

nc_train_dataset = Dataset.File.from_files(path = [nc_train_path])
nc_eval_dataset = Dataset.File.from_files(path = [nc_eval_path])

# when we combine datasets the selected directories and relative paths to the datastore are brought in
# mount folder
# |-cddatafilestore
# | |-GEE
# | | |-training
# | | |-eval
# | |-Onera
# | | |-training
# | | |-eval
train_dataset = Dataset.File.from_files(path = [cpk_train_path, nc_train_path])
eval_dataset = Dataset.File.from_files(path = [cpk_eval_path, nc_eval_path])
test_dataset = Dataset.File.from_files(path = [test_path])

In [4]:
experiment_name = 'solar-nc-cpk'
exp = Experiment(workspace = ws, name = experiment_name)

In [12]:
# define the compute target
ws.compute_targets
compute_target = ws.compute_targets['mevans1']

In [6]:
envs = Environment.list(workspace = ws)
env = envs.get('solar-training')

In [7]:
%cd Satellite_ComputerVision
!git pull
%cd ..

/mnt/batch/tasks/shared/LS_root/mounts/clusters/test-compute-instance/code/Users/mevans/Satellite_ComputerVision
remote: Enumerating objects: 53, done.[K
remote: Counting objects: 100% (53/53), done.[K
remote: Compressing objects: 100% (18/18), done.[K
remote: Total 43 (delta 35), reused 33 (delta 25), pack-reused 0[K
Unpacking objects: 100% (43/43), done.
From https://github.com/mjevans26/Satellite_ComputerVision
   2cff298..6bc9cbb  master     -> origin/master
Updating 2cff298..6bc9cbb
Fast-forward
 .spyproject/workspace.ini |   2 [32m+[m[31m-[m
 azure/retrain_wetland.py  | 232 [32m++++++++++++++++++++++++++++++++++++++++++++++[m
 azure/train_acd.py        |   4 [32m+[m[31m-[m
 azure/train_solar.py      |   6 [32m+[m[31m-[m
 azure/train_wetland.py    |  50 [32m+++++[m[31m-----[m
 utils/model_tools.py      |  18 [32m++[m[31m--[m
 utils/prediction_tools.py |  52 [32m++++++[m[31m-----[m
 utils/processing.py       |  32 [32m++++[m[31m---[m
 8 files chang

In [9]:
# attach our utilities folder to the path to import modules
path.append('/home/azureuser/cloudfiles/code/Users/mevans/Satellite_ComputerVision/utils')

In [10]:
from model_tools import get_model, make_confusion_matrix
from processing import get_training_dataset

In [15]:
# Define some global variabes

BANDS = ['B2', 'B3', 'B4', 'B8', 'B11', 'B12']
RESPONSE = 'landcover'
FEATURES = BANDS + [RESPONSE]

# Specify the size and shape of patches expected by the model.
KERNEL_SIZE = 256
KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]
COLUMNS = [
  tf.io.FixedLenFeature(shape=KERNEL_SHAPE, dtype=tf.float32) for k in FEATURES
]
FEATURES_DICT = dict(zip(FEATURES, COLUMNS))

In [10]:
# Create mountcontext and mount the dataset
test_mount = train_dataset.mount()

# open access to the mount point
test_mount.start() 

# Get the mount point
dataset_mount_folder = test_mount.mount_point
print(dataset_mount_folder)

test_files = []
for root, dirs, files in os.walk(dataset_mount_folder):
    for f in files:
        test_files.append(join(root, f))

# test_files = glob.glob(join(dataset_mount_folder, '*.'))

# read in the training data so we can calculate size

training = get_training_dataset(
    files = test_files,
    ftDict = FEATURES_DICT,
    features = BANDS,
    response = RESPONSE,
    buff = 1,
    batch = 1,
    repeat = False)

/tmp/tmptskm6c4k
CRITICAL - MSI: Failed to retrieve a token from 'http://127.0.1.1:46808/MSI/auth/?resource=https://management.core.windows.net/&api-version=2017-09-01' with an error of 'HTTPConnectionPool(host='127.0.1.1', port=46808): Max retries exceeded with url: /MSI/auth/?resource=https://management.core.windows.net/&api-version=2017-09-01 (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fa6d4744c50>: Failed to establish a new connection: [Errno 111] Connection refused',))'.


In [None]:
iterator = iter(training)
print(iterator.next())

In [12]:
# # calculate some summary statistics used in model training
m = get_model(depth = len(BANDS), optim = tf.keras.optimizers.Adam(learning_rate = 0.001, beta_1=0.9, beta_2=0.999), loss = 'mse', mets = [tf.keras.metrics.categorical_accuracy], bias = None)
train_con_mat = make_confusion_matrix(training, m)
classums = train_con_mat.sum(axis = 1)
BIAS = np.log(classums[1]/classums[0])
WEIGHT = classums[0]/classums[1]
TRAIN_SIZE = train_con_mat.sum()//(256*256)

test_mount.stop()

CRITICAL - MSI: Failed to retrieve a token from 'http://127.0.1.1:46808/MSI/auth/?resource=https://management.core.windows.net/&api-version=2017-09-01' with an error of 'HTTPConnectionPool(host='127.0.1.1', port=46808): Max retries exceeded with url: /MSI/auth/?resource=https://management.core.windows.net/&api-version=2017-09-01 (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fa6d475bda0>: Failed to establish a new connection: [Errno 111] Connection refused',))'.
7755


In [13]:
print('size', TRAIN_SIZE)
print('weight', WEIGHT)
print('bias', BIAS)

size 7755
weight 78.48739612112549
bias -4.362938052930838


In [11]:
# use the azure folder as our script folder
source = 'Satellite_ComputerVision'
util_folder = 'utils'
script_folder = 'azure'
script_file = 'train_solar.py'

In [None]:
# copy the entire 'utils' folder from github repo to local
# shutil.copytree(src = f'/home/azureuser/cloudfiles/code/Users/mevans/Satellite_ComputerVision/{script_folder}',
# dst = f'/home/azureuser/cloudfiles/code/Users/mevans/{script_folder}')

# shutil.copytree(src = f'/home/azureuser/cloudfiles/code/Users/mevans/Satellite_ComputerVision/{util_folder}',
# dst = f'/home/azureuser/cloudfiles/code/Users/mevans/{script_folder}/{util_folder}')

In [16]:
# create script run config
# WEIGHT = int(math.ceil(WEIGHT/5)*5)
# define the command line arguments to our training sript
args = [
    '--train_data', train_dataset.as_mount(),
    '--eval_data', eval_dataset.as_mount(),
    '--test_data', test_dataset.as_mount(),
    '--weight', 80,
    '--bias', -4,#BIAS,
    '-lr', 0.001,
    '--epochs', 100,
    '--batch', 16,
    '--size', 7755,
    '--kernel_size', 256,
    '--response', f'{RESPONSE}']
    # '--bands', 'B2' 'B3' 'B4' 'B8' 'B2_1' 'B3_1' 'B4_1' 'B8_1',
    # '--splits', None]

# TODO: figure out how to pass lists as command line arguments

src = ScriptRunConfig(source_directory=script_folder,
                      script=script_file, 
                      arguments=args,
                      compute_target=compute_target,
                      environment=env)

In [19]:
#  run the training job
run = exp.submit(config=src, tags = dict({'splits':'None', 'model':'Unet', 'dataset':'CPK+NC', 'normalization':'pixel', 'epochs':'0-100'}))
run

Experiment,Id,Type,Status,Details Page,Docs Page
solar-nc-cpk,solar-nc-cpk_1624989679_f59da7cf,azureml.scriptrun,Starting,Link to Azure Machine Learning studio,Link to Documentation


In [36]:
from azureml.widgets import RunDetails
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…