## Import package
Import python packages you need in this session

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

import azureml.core
from azureml.core import Workspace

print("Azure ML SDK version: ", azureml.core.VERSION)

## Connect to workspace

In [None]:
ws = Workspace.from_config()
print(ws.name, ws.location, ws.resource_group, sep = '\t')

## Create experiment

Create experiment to track the runs in your workspace. A workspace can have multiple experiments

In [None]:
experiment_name = 'sklearn-mnist'

from azureml.core import Experiment
exp = Experiment(workspace=ws, name = experiment_name)

## Create or Attach existing compute resource

By using Azure ML Compute, data scientists can train machine learning models on cluster of Azure virtual machines.

In [1]:
import os

cluster_type = os.environ.get("AML_COMPUTE_CLUSTER_TYPE", "CPU")
compute_target = ws.get_default_compute_target(cluster_type)

NameError: name 'ws' is not defined

## Download MNIST data

In [None]:
data_folder = os.path.join(os.getcwd(), 'data')
os.makedirs(data_folder, exist_ok=True)

urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', filename=os.path.join(data_folder, 'train-images.gz'))
urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', filename=os.path.join(data_folder, 'train-labels.gz'))
urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename=os.path.join(data_folder, 'test-images.gz'))
urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename=os.path.join(data_folder, 'test-labels.gz'))

In [None]:
# Utils
import gzip
import numpy as np
import struct

# Load compressed MNIST gz files and return numpy array
def load_data(filename, label=False):
    with gzip.open(filename) as gz:
        struct.unpack('I', gz.read(4))
        n_items = struct.unpack('>I', gz.read(4))
        if not label:
            n_rows = struct.unpack('>I', gz.read(4))[0]
            n_cols = struct.unpack('>I', gz.read(4))[0]
            res = np.frombuffer(gz.read(n_items[0] * n_rows * n_cols), dtype=np.uint8)
            res = res.reshape(n_items[0], n_rows * n_cols)
        else:
            res = np.frombuffer(gz.read(n_items[0]), dtype=np.uint8)
            res = res.reshape(n_items[0], 1)
    return res


# one-hot encode a 1-D array
def one_hot_encode(array, num_of_classes):
return np.eye(num_of_classes)[array.reshape(-1)]

## Upload data to the cloud

In [None]:
ds = ws.get_default_datastore()
print(ds.datastore_type, ds.account_name, ds.container_name)

ds.upload(src_dir = data_folder, target_path ='mnist', overwrite=True,
          show_progess = True)

## Train on remote cluster

For this task, submit the job to the remote training cluster you set up earlier. To submit a job you:

- Create a directory
- Create a training script
- Create an estimator object
- Submit the job

### Create a directory

In [None]:
import os
script_folder = os.path.join(os.getcwd(), "sklearn-mnist")
os.makedirs(script_folder, exist_ok = True)

### Create a training script

To submit the job to the cluster, first create a training script. Run the fol