# View Azure Machine Learning resources in the workspace

In [None]:
from azureml.core import Workspace
ws = Workspace.from_config()

from azureml.core import ComputeTarget

print("Compute Resources:")
for compute_name in ws.compute_targets:
    compute = ws.compute_targets[compute_name]
    print("\t", compute.name, ':', compute.type)

# Get  Prediction from Automated ML Endpoint

In [3]:
import json
import requests


endpoint = 'ENDPOINT' #Replace with your endpoint
key = 'PRIMARY_KEY' #Replace with your key

#Features for a patient
x = [{"PatientID": 1,     "Pregnancies": 5,     "PlasmaGlucose": 181.0,     "DiastolicBloodPressure": 90.6,     "TricepsThickness": 34.0,     "SerumInsulin": 23.0,     "BMI": 43.51,     "DiabetesPedigree": 1.21,     "Age": 21.0}]

#Create a "data" JSON object
input_json = json.dumps({"data": x})

#Set the content type and authentication for the request
headers = {"Content-Type":"application/json","Authorization":"Bearer " + key}

In [None]:
response = requests.post(endpoint, str.encode(input_json), headers=headers)
y = json.loads(response.json())
print("Prediction:", y["result"][0])#Get the first prediction in the results
    
print('Diabetic') if (y["result"][0] == 1) else print("Not Diabetic")    
    

# Get Predictions from a Designer Pipeline

In [None]:
req = urllib.request.Request(endpoint, input_json, headers)
response = urllib.request.urlopen(req)
json_result = json.loads(response.read())
output = json_result["Results"]["WebServiceOutput0"][0]
print('Patient: {}\nPrediction: {}\nProbability: {:.2f}'
      .format(output["PatientID"],output["DiabetesPrediction"],output["Probability"],))

# Retrieve experiment details using the SDK

In [None]:
# View run details
from azureml.widgets import RunDetails
RunDetails(run).show()


    The Details tab contains the general properties of the experiment run.
    The Metrics tab enables you to select logged metrics and view them as tables or charts.
    The Images tab enables you to select and view any images or plots that were logged in the experiment (in this case, the Label Distribution plot)
    The Child Runs tab lists any child runs (in this experiment there are none).
    The Outputs + Logs tab shows the output or log files generated by the experiment.
    The Snapshot tab contains all files in the folder where the experiment code was run (in this case, everything in the same folder as this notebook).
    The Explanations tab is used to show model explanations generated by the experiment (in this case, there are none).
    The Fairness tab is used to visualize predictive performance disparities that help you evaluate the fairness of machine learning models (in this case, there are none).


In [None]:
metrics = run.get_metrics() 
[print(metric_name, ":", metrics[metric_name]) for metric_name in metrics] 
files = run.get_file_names()
[print(file) for file in files]

In [None]:
download_folder = 'downloaded-files'

# Download files in the "outputs" folder
run.download_files(prefix='outputs', output_directory=download_folder)
for root, directories, filenames in os.walk(download_folder): 
    [print (os.path.join(root,filename))  for filename in filenames]  

In [None]:
run.get_details_with_logs()

###  to download the log files and view them in a text editor.

In [None]:

run.get_all_logs(destination='downloaded-logs')

# Verify the files have been downloaded
for root, directories, filenames in os.walk(log_folder): 
[print (os.path.join(root,filename)) for filename in filenames] 
        

### View experiment run history

In [None]:
diabetes_experiment = ws.experiments['diabetes-experiment']
for logged_run in diabetes_experiment.get_runs():
    print('Run ID:', logged_run.id)
    metrics = logged_run.get_metrics()
    [print('-', key, metrics.get(key)) for key in metrics.keys()]
        

# # Use MLflow
MLflow is an open source platform for managing machine learning processes. It's commonly (but not exclusively) used in Databricks environments to coordinate experiments and track metrics. In Azure Machine Learning experiments, you can use MLflow to track metrics as an alternative to the native log functionality.

In [None]:
%%writefile $folder_name/mlflow_diabetes.py
from azureml.core import Run
import mlflow

# start the MLflow experiment
with mlflow.start_run():
     mlflow.log_metric('observations', row_count)

In [None]:
from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.widgets import RunDetails


# Create a Python environment for the experiment
mlflow_env = Environment("mlflow-env")

# Ensure the required packages are installed
packages = CondaDependencies.create(conda_packages=['pandas','pip'],
                                    pip_packages=['mlflow','azureml-mlflow'])
mlflow_env.python.conda_dependencies = packages

In [None]:

# Create a script config
script_mlflow = ScriptRunConfig(source_directory=experiment_folder,
                                script='mlflow_diabetes.py',
                                environment=mlflow_env) 

# submit the experiment
experiment = Experiment(workspace=ws, name='diabetes-mlflow-script')
run = experiment.submit(config=script_mlflow)
RunDetails(run).show()
run.wait_for_completion()

In [None]:
metrics = run.get_metrics()
[print(key, metrics.get(key)) for key in metrics.keys()]
        

# Configure automated machine learning

In [None]:
import azureml.train.automl.utilities as automl_utils
from azureml.train.automl import AutoMLConfig
[ print(metric) for metric in automl_utils.get_primary_metrics('classification')]
    


automl_config = AutoMLConfig(name='Automated ML Experiment',
                             task='classification',
                             compute_target=training_cluster,
                             training_data = train_ds,
                             validation_data = test_ds,
                             label_column_name='Diabetic',
                             iterations=4,
                             primary_metric = 'AUC_weighted',
                             max_concurrent_iterations=2,
                             featurization='auto'
                             )

print("Ready for Auto ML run.")

automl_experiment = Experiment(ws, 'diabetes_automl')
automl_run = automl_experiment.submit(automl_config)

### best model

In [None]:
best_run, fitted_model = automl_run.get_output()
print(best_run),print(fitted_model)

best_run_metrics = best_run.get_metrics()
[print(metric_name, best_run_metrics[metric_name]) for metric_name in best_run_metrics]

#  Differential Privacy
Differential privacy is a technique that is designed to preserve the privacy of individual data points by adding "noise" to the data. 
SmartNoise aims to provide building blocks for using differential privacy in data analysis and machine learning projects.



The histograms are similar enough to ensure that reports based on the differentially private data provide the same insights as reports from the raw data.



In [None]:
import opendp.smartnoise.core as sn

cols = list(diabetes.columns)
age_range = [0.0, 120.0]
samples = len(diabetes)

with sn.Analysis() as analysis:
    data = sn.Dataset(path=data_path, column_names=cols)    # load data
    age_dt = sn.to_float(data['Age']) # Convert Age to float
    age_mean = sn.dp_mean(data = age_dt,   # get mean of age
                          privacy_usage = {'epsilon': .50},
                          data_lower = age_range[0],  data_upper = age_range[1], data_rows = samples  )
    

    age_histogram = sn.dp_histogram(
            sn.to_int(data['Age'], lower=0, upper=120),
            edges = ages,upper = 10000, null_value = -1,
            privacy_usage = {'epsilon': 0.5})
    
    age_bp_cov_scalar = sn.dp_covariance(
                left = sn.to_float(sn_data['Age']),
                right = sn.to_float(sn_data['DiastolicBloodPressure']),
                privacy_usage = {'epsilon': 1.0},
                left_lower = 0.,left_upper = 120.,left_rows = 10000,
                right_lower = 0., right_upper = 150., right_rows = 10000)
analysis.release()

# print ifferentially private estimate of mean age ad actual mean age
print(age_mean.value,diabetes.Age.mean())

###  SQL queries

In [None]:
from opendp.smartnoise.metadata import CollectionMetadata
from opendp.smartnoise.sql import PandasReader, PrivateReader

meta = CollectionMetadata.from_file('metadata/diabetes.yml')
private_reader = PrivateReader(meta, reader, 5.0)  # large epsilon, less privacy# smaller epsilon, more privacy


query = 'SELECT Diabetic, AVG(Age) AS AvgAge FROM diabetes.diabetes GROUP BY Diabetic'
result_dp = private_reader.execute_typed(query)


# get a list of datasets from the workspace object

In [None]:
for dataset_name in list(ws.datasets.keys()):
    dataset = Dataset.get_by_name(ws, dataset_name)
    print("\t", dataset.name, 'version', dataset.version)

# Create a compute cluster

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "your-compute-cluster"

try:
    # Check for existing compute target
    training_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # If it doesn't already exist, create it
    try:
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', max_nodes=2)
        training_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        training_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)


# Run an experiment on remote compute

Now you're ready to re-run the experiment you ran previously, but this time on the compute cluster you created.

In [None]:
# Create a script config
script_config = ScriptRunConfig(source_directory=experiment_folder,
                                script='diabetes_training.py',
                                arguments = ['--input-data', diabetes_ds.as_named_input('training_data')],
                                environment=registered_env,
                                compute_target=cluster_name) 

In [None]:
cluster_state = training_cluster.get_status()
print(cluster_state.allocation_state, cluster_state.current_node_count)

You can use the Azure Machine Learning extension for Azure DevOps to combine Azure ML pipelines with Azure DevOps pipelines (yes, it is confusing that they have the same name!) and integrate model retraining into a continuous integration/continuous deployment (CI/CD) process. 

# Deploy the model as a web service

In [None]:
# let's get the model that we want to deploy. By default, if we specify a model name, the latest version will be returned.

model = ws.models['diabetes_model']
script_file = os.path.join(experiment_folder,"score_diabetes.py")



The web service where we deploy the model will need some Python code to load the input data, get the model from the workspace, and generate and return predictions. We'll save this code in an entry script (often called a scoring script) that will be deployed to the web service:


# Tune Hyperparameters

### Run a hyperparameter tuning experiment
Azure Machine Learning includes a hyperparameter tuning capability through hyperdrive experiments. These experiments launch multiple child runs, each with a different hyperparameter combination.

In [None]:
from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.train.hyperdrive import GridParameterSampling, HyperDriveConfig, PrimaryMetricGoal, choice
from azureml.widgets import RunDetails
from azureml.core import Model

# Create a Python environment for the experiment
sklearn_env = Environment("sklearn-env")

# Ensure the required packages are installed (we need scikit-learn, Azure ML defaults, and Azure ML dataprep)
packages = CondaDependencies.create(pip_packages=['scikit-learn','azureml-defaults','azureml-dataprep[pandas]'])
sklearn_env.python.conda_dependencies = packages

# Get the training dataset
diabetes_ds = ws.datasets.get("diabetes dataset")

# Create a script config
script_config = ScriptRunConfig(source_directory=experiment_folder,
                                script='diabetes_training.py',
                                # Add non-hyperparameter arguments -in this case, the training dataset
                                arguments = ['--input-data', diabetes_ds.as_named_input('training_data')],
                                environment=sklearn_env,
                                compute_target = training_cluster)

# Sample a range of parameter values
params = GridParameterSampling(
    {
        # Hyperdrive will try 6 combinations, adding these as script arguments
        '--learning_rate': choice(0.01, 0.1, 1.0),
        '--n_estimators' : choice(10, 100)
    }
)

# Configure hyperdrive settings
hyperdrive = HyperDriveConfig(run_config=script_config, 
                          hyperparameter_sampling=params, 
                          policy=None, # No early stopping policy
                          primary_metric_name='AUC', # Find the highest AUC metric
                          primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, 
                          max_total_runs=6, # Restict the experiment to 6 iterations
                          max_concurrent_runs=2) # Run up to 2 iterations in parallel

# Run the experiment
experiment = Experiment(workspace = ws, name = 'diabates_training_hyperdrive')
run = experiment.submit(config=hyperdrive)

# Show the status in the notebook as the experiment runs
RunDetails(run).show()
run.wait_for_completion()


# Register model
best_run.register_model(model_path='outputs/diabetes_model.pkl', model_name='diabetes_model',
                        tags={'Training context':'Hyperdrive'},
                        properties={'AUC': best_run_metrics['AUC'], 'Accuracy': best_run_metrics['Accuracy']})