## Azure ML Setup

In [None]:
from azureml.core import Workspace, Dataset, Datastore

In [None]:
ws = Workspace.from_config()

## Sample data and explore

In [None]:
ds = ws.datasets['noaa-isd-tabular']
df = ds.sample(1000).to_pandas_dataframe()

df.describe()

## Start Spark Session

In [None]:
%synapse start --compute-target sparky --environment 'AzureML-Synapse' # compute target name

In [None]:
%synapse restart --environment 'MyCustomEnvironment' # restart with new spark/python Environment changes

## Work with data

In [None]:
%%synapse # default to PySpark

# get workspace
from azureml.core import Workspace

# automagic
ws = Workspace.from_config()
ws

In [None]:
%%synapse 

ds = ws.datasets['noaa-isd-tabular']
ds

In [None]:
%%synapse

df = ds.to_spark_dataframe()
df.show()

In [None]:
%%synapse

# basic EDA
df.summary().eval() # equivalent to Pandas dataframe df.describe()

## start data prep code
X = df.drop('target')
Y = df['target']

X = X.fillna('0').groupby(df['datetime']).mean().filter(df['temperature'] < 50)
Y = Y.fillna(NaN).groupby(df['datetime']).mean()
## end data prep code

# save state in cloud as Dataset
dsX = Dataset.Tabular.from_spark_df(X).register(ws, 'noaa-isd-X')
dsY = Dataset.Tabular.from_spark_df(Y).register(ws, 'noaa-isd-Y')

In [None]:
%%synapse 

# write to detla table in temp storage for use in Synapse Spark/Dask contexts
df.write.format('delta').save(f'/tmp/noaa-isd') 

# save as a delta table in ADLS gen2 for use in almost any Azure Compute context
df.write.format('delta').save(f'abfs://{container_name}@{account_name}.dfs.core.windows.net/datasets/noaa-isd/')

# save as a dataset 
df.to_azureml_dataset('noaa-isd') # intentionally incorrect API 

# make a sql-readable table
spark.sql("CREATE TABLE noaaisd USING DELTA LOCATION '/tmp/noaa-isd'")

In [None]:
%%synapse sql

# use SQL syntax to query/visulize data
SELECT * FROM noaaisd

In [None]:
from azureml.core import Experiment

script_folder = './pytorch-dnn'
os.makedirs(script_folder, exist_ok=True)

exp = Experiment(workspace=ws, name='keras-mnist')

script_params = {
    '--epochs': 1000,
    '--final_layer': 'sigmoid',
    '--X_dataset': ws.datasets['noaa-isd-X'],
    '--Y_dataset': ws.datasets['noaa-isd-Y']
}

est = Estimator(source_directory = '.', 
                script_params    = script_params, 
                compute_target   = ws.compute_targets['gpu-cluster'],
                entry_script     = 'keras_train.py', 
                use_docker       = False
                )

run = exp.submit(est)
RunDetails(run).show()

### Stop Session

In [None]:
%synapse stop