In [7]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML 1.21.0 to work with ml-p15g2


In [8]:
from azureml.core import Dataset
print("Datasets:")
for dataset_name in list(ws.datasets.keys()):
    dataset = Dataset.get_by_name(ws, dataset_name)
    print("\t", dataset.name, 'version', dataset.version)

Datasets:
	 features version 1


In [9]:
import os
# Create a folder for the pipeline step files
experiment_folder = 'analysis_pipeline'
try:
    os.makedirs(experiment_folder, exist_ok=True)
    print(experiment_folder)
except:
    pass

analysis_pipeline


In [34]:
%%writefile $experiment_folder/analysis.py
# Import libraries
from azureml.core import Run
import argparse
import pandas as pd
import numpy as np
import joblib
import os
import matplotlib.pyplot as plt
import seaborn as sns

# Get parameters
parser = argparse.ArgumentParser()
parser.add_argument("--input_data", type=str, dest='raw_dataset_id', help='raw dataset')
parser.add_argument('--analysis_data', type=str, dest='analysis_data', default='analysis_data', help='Folder for results')
args = parser.parse_args()
save_folder = args.analysis_data




# Get the experiment run context
run = Run.get_context()
run.log('test',1)

# load the data (passed as an input dataset)
print("Loading Data...")
df = run.input_datasets['raw_data'].to_pandas_dataframe()
run.log('df',df.head())

thresholds = {'Maturity Group 1': [84.74,23.53,15.41,11.11,11.91],'Maturity Group 2': [96.83,26.14,15.06,10.9,11.30] }

thresholds=pd.DataFrame(thresholds,index=['Formant 2','Formant 1','Max Freq','Min Freq','Finish Freq'], columns = ['Maturity Group 1', 'Maturity Group 2'])

thresholds_mean=np.mean(thresholds, axis=0)

Fmean=np.mean(df[['formant2','formant1','f0max','f0min','f0final']],axis=1)

t=np.empty(np.shape(df)[0])
t[:] = np.nan
t[Fmean<thresholds_mean['Maturity Group 1']]=0
t[Fmean>thresholds_mean['Maturity Group 1']]=1
t=pd.DataFrame(t)
t.columns=['Maturity Group Method 1']
t=t.astype('int32')

df.reset_index()

df.insert(3,"Maturity Group Method 1",t)


fig, axes = plt.subplots(2,2)

fig.set_figheight(10)
fig.set_figwidth(15)
               
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.15, hspace=0.5)

cols=['Maturity Group Zero', 'Maturity Group One']

axes[0,0].violinplot(dataset = [df[df["Maturity Group Method 1"] == 0]["peak2peak(audio)"].values,
                                df[df["Maturity Group Method 1"] == 1]["peak2peak(audio)"].values,
                              ])

axes[0,0].set_xticks(range(1,len(cols)+1))
axes[0,0].set_xticklabels(cols,rotation=0)
axes[0,0].set_ylabel('peak2peak')
axes[0,0].grid()

axes[0,1].violinplot(dataset = [df[df["Maturity Group Method 1"] == 0]["rms(audio)"].values,
                                df[df["Maturity Group Method 1"] == 1]["rms(audio)"].values,
                              ])

axes[0,1].set_xticks(range(1,len(cols)+1))
axes[0,1].set_xticklabels(cols,rotation=0)
axes[0,1].set_ylabel('rms')
axes[0,1].grid()

axes[1,0].violinplot(dataset = [df[df["Maturity Group Method 1"] == 0]["f0final"].values,
                           df[df["Maturity Group Method 1"] == 1]["f0final"].values,
                              ])

axes[1,0].set_xticks(range(1,len(cols)+1))
axes[1,0].set_xticklabels(cols,rotation=0)
axes[1,0].set_ylabel('f0final')

axes[1,0].grid()


axes[1,1].violinplot(dataset = [df[df["Maturity Group Method 1"] == 0]["formant2"].values,
                           df[df["Maturity Group Method 1"] == 1]["formant2"].values,
                              ])

axes[1,1].set_xticks(range(1,len(cols)+1))
axes[1,1].set_xticklabels(cols,rotation=0)
axes[1,1].set_ylabel('formant2')

axes[1,1].grid()


run.log_image(name = "Plot", plot = fig)

# Save the prepped data
print("Saving Data...")
os.makedirs(save_folder, exist_ok=True)
save_path = os.path.join(save_folder,'data.csv')
df.to_csv(save_path, index=False, header=True,encoding='utf-8')


Overwriting analysis_pipeline/analysis.py


In [35]:
from azureml.core.compute import ComputeTarget, AmlCompute


from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import RunConfiguration

pipeline_cluster = ComputeTarget(workspace=ws, name='newtest')




# Create a Python environment for the experiment
env = Environment("threshold-pipeline-env")
env.python.user_managed_dependencies = False # Let Azure ML manage dependencies
env.docker.enabled = True # Use a docker container


# Create a set of package dependencies
packages = CondaDependencies.create(conda_packages=['scikit-learn','ipykernel','matplotlib','pandas','pip'],
                                             pip_packages=['azureml-defaults','azureml-dataprep[pandas]','pyarrow',
                                                          'seaborn','numpy'])

# Add the dependencies to the environment
env.python.conda_dependencies = packages

# Register the environment 
env.register(workspace=ws)
registered_env = Environment.get(ws, 'threshold-pipeline-env')



# Create a new runconfig object for the pipeline
pipeline_run_config = RunConfiguration()

# Use the compute you created above. 
pipeline_run_config.target = pipeline_cluster

# Assign the environment to the run configuration
pipeline_run_config.environment = registered_env

print ("Run configuration created.")





Run configuration created.


In [39]:
from azureml.pipeline.core import PipelineData
from azureml.pipeline.steps import PythonScriptStep

# Get the training dataset
dataset = ws.datasets.get("features")


# Create a PipelineData (temporary Data Reference) for the model folder
prepped_data_folder = PipelineData("analysis_data", datastore=ws.get_default_datastore())


# Step 1, Run the data prep script
train_step = PythonScriptStep(name = "Threshold analysis",
                                source_directory = experiment_folder,
                                script_name = "analysis.py",
                                arguments = ['--input_data', dataset.as_named_input('raw_data'),
                                             '--analysis_data', prepped_data_folder],
                                outputs=[prepped_data_folder],
                                compute_target = pipeline_cluster,
                                runconfig = pipeline_run_config,
                                allow_reuse = True)


print("Pipeline steps defined")

Pipeline steps defined


In [40]:
from azureml.core import Experiment
from azureml.pipeline.core import Pipeline
from azureml.widgets import RunDetails

# Construct the pipeline
pipeline_steps = [train_step]
pipeline = Pipeline(workspace=ws, steps=pipeline_steps)
print("Pipeline is built.")

Pipeline is built.


In [41]:
# Create an experiment and run the pipeline
experiment = Experiment(workspace=ws, name = 'featuresthreshold-pipeline')
pipeline_run = experiment.submit(pipeline, regenerate_outputs=True)
print("Pipeline submitted for execution.")
RunDetails(pipeline_run).show()
pipeline_run.wait_for_completion(show_output=True)

Created step Threshold analysis [36eea9dd][9b8b8602-0db2-4dff-87b5-170d524e0140], (This step will run and generate new outputs)
Submitted PipelineRun c7d75651-b29a-404c-84b0-9ebf9ca1ff90
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/featuresthreshold-pipeline/runs/c7d75651-b29a-404c-84b0-9ebf9ca1ff90?wsid=/subscriptions/93090376-bea9-43cd-b404-3ba1bff96478/resourcegroups/p15g2-rg/workspaces/ml-p15g2
Pipeline submitted for execution.


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

PipelineRunId: c7d75651-b29a-404c-84b0-9ebf9ca1ff90
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/featuresthreshold-pipeline/runs/c7d75651-b29a-404c-84b0-9ebf9ca1ff90?wsid=/subscriptions/93090376-bea9-43cd-b404-3ba1bff96478/resourcegroups/p15g2-rg/workspaces/ml-p15g2
PipelineRun Status: NotStarted
PipelineRun Status: Running


StepRunId: 1f79e365-cab1-42d8-9e16-ee46dd36df92
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/featuresthreshold-pipeline/runs/1f79e365-cab1-42d8-9e16-ee46dd36df92?wsid=/subscriptions/93090376-bea9-43cd-b404-3ba1bff96478/resourcegroups/p15g2-rg/workspaces/ml-p15g2
StepRun( Threshold analysis ) Status: NotStarted
StepRun( Threshold analysis ) Status: Running

Streaming azureml-logs/55_azureml-execution-tvmps_f99311a0c08ed19ab47c7ebea5fe4d1a5e1ebcd2c9bb1f469bd8465023551ab5_d.txt
2021-01-27T00:11:04Z Starting output-watcher...
2021-01-27T00:11:04Z IsDedicatedCompute == True, won't poll for Low Pri Preemption
2021-01


Streaming azureml-logs/70_driver_log.txt
2021/01/27 00:11:15 Attempt 1 of http call to http://10.0.0.4:16384/sendlogstoartifacts/info
2021/01/27 00:11:15 Attempt 1 of http call to http://10.0.0.4:16384/sendlogstoartifacts/status
[2021-01-27T00:11:17.210003] Entering context manager injector.
[context_manager_injector.py] Command line Options: Namespace(inject=['ProjectPythonPath:context_managers.ProjectPythonPath', 'RunHistory:context_managers.RunHistory', 'TrackUserError:context_managers.TrackUserError'], invocation=['analysis.py', '--input_data', '08982c06-b572-47d4-92b0-c310b13f7565', '--analysis_data', '/mnt/batch/tasks/shared/LS_root/jobs/ml-p15g2/azureml/1f79e365-cab1-42d8-9e16-ee46dd36df92/mounts/workspaceblobstore/azureml/1f79e365-cab1-42d8-9e16-ee46dd36df92/analysis_data'])
Script type = None
Starting the daemon thread to refresh tokens in background for process with pid = 89
Entering Run History Context Manager.
[2021-01-27T00:11:19.793704] Current directory: /mnt/batch/task



PipelineRun Execution Summary
PipelineRun Status: Finished
{'runId': 'c7d75651-b29a-404c-84b0-9ebf9ca1ff90', 'status': 'Completed', 'startTimeUtc': '2021-01-27T00:10:37.444709Z', 'endTimeUtc': '2021-01-27T00:11:56.130575Z', 'properties': {'azureml.runsource': 'azureml.PipelineRun', 'runSource': 'SDK', 'runType': 'SDK', 'azureml.parameters': '{}'}, 'inputDatasets': [], 'outputDatasets': [], 'logFiles': {'logs/azureml/executionlogs.txt': 'https://sapg15g2.blob.core.windows.net/azureml/ExperimentRun/dcid.c7d75651-b29a-404c-84b0-9ebf9ca1ff90/logs/azureml/executionlogs.txt?sv=2019-02-02&sr=b&sig=rGHKOvOsu%2FDpgS9SwmApB5RbkCACnS5i88gfNMCavhA%3D&st=2021-01-27T00%3A00%3A58Z&se=2021-01-27T08%3A10%3A58Z&sp=r', 'logs/azureml/stderrlogs.txt': 'https://sapg15g2.blob.core.windows.net/azureml/ExperimentRun/dcid.c7d75651-b29a-404c-84b0-9ebf9ca1ff90/logs/azureml/stderrlogs.txt?sv=2019-02-02&sr=b&sig=h9IvczTPBnZ4Yvd91NmvmL0N9j1hOR1CvKnf%2B3DO2ho%3D&st=2021-01-27T00%3A00%3A58Z&se=2021-01-27T08%3A10%3A5

'Finished'