 ### Run a sample experiment using AzureML SDK - Part 1

In [2]:
# Import required classes from Azureml
from azureml.core import Workspace, Datastore, Dataset, Experiment


# Access the Worspace, Datastore and Datasets
ws = Workspace.from_config("./config")
az_store = Datastore.get(ws, "azure_sdk_blob01")
az_dataset = Dataset.get_by_name(ws, "Loan Applications Using SDK")
az_default_store = ws.get_default_datastore()

In [3]:
# Create/Access an experiment object

experiment = Experiment(workspace=ws,
                       name="Loan-SDK-Exp01")

In [4]:
# Run an experiment using start_logging method

new_run = experiment.start_logging()

In [5]:
# Complete an experiment run

new_run.complete()

### Run a sample experiment using AzureML SDK - Part 2

Steps :

- Run experiment using start_logging methode
- Do your stuff here - all the experiments code/pipeline
- Log Metrics and Complete and experiment run

In [6]:
# Run an experiment using start_logging method
new_run_2 = experiment.start_logging()

In [None]:
# Do your stuff here
df = az_dataset.to_pandas_dataframe()

In [9]:
# Count the observations 
total_observations = len(df)
total_observations

614

In [8]:
# Get the null/missing values
null_df = df.isnull().sum()
null_df

Loan_ID               0
Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     2
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64

In [11]:
# Log metrics and Complet and experiment run

# Log the metrics to the workspace
new_run_2.log("Total Observations", total_observations)

# Log the missing data values
for columns in df.columns:
    new_run_2.log(columns, null_df[columns])

new_run_2.complete()

### Run a script in Azureml environment 

This code will submit the script provided in ScriptRunConfig and create an Azureml environment on the local machine including docker for Azureml

#### Submit_job.py

In [14]:
# Import the Azure Ml Classes
from azureml.core import Workspace, Experiment, ScriptRunConfig

# Access the workplace using config.json
ws = Workspace.from_config("./config")


new_experiment = Experiment(workspace=ws,
                           name="Loan_Script")

script_config = ScriptRunConfig(source_directory=".",
                               script="180 - Script To Run.py")

new_run = new_experiment.submit(config=script_config)


# Create a wait for completion of the script
new_run.wait_for_completion()

{'runId': 'Loan_Script_1678813244_9752d51b',
 'target': 'local',
 'status': 'Finalizing',
 'startTimeUtc': '2023-03-14T17:00:47.070861Z',
 'services': {},
 'properties': {'_azureml.ComputeTargetType': 'local',
  'ContentSnapshotId': '30bfd7c9-40c2-45e9-8f3b-5fcdd2e47827',
  'azureml.git.branch': 'main',
  'mlflow.source.git.branch': 'main',
  'azureml.git.commit': '238e7b1bd25bca96f53d45421031b57b1aff4242',
  'mlflow.source.git.commit': '238e7b1bd25bca96f53d45421031b57b1aff4242',
  'azureml.git.dirty': 'True'},
 'inputDatasets': [{'dataset': {'id': 'ab1adce6-6ad9-49f5-b00d-d3e0e5392b64'}, 'consumptionDetails': {'type': 'Reference'}}],
 'outputDatasets': [],
 'runDefinition': {'script': '180 - Script To Run.py',
  'command': '',
  'useAbsolutePath': False,
  'arguments': [],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'local',
  'dataReferences': {},
  'data': {},
  'outputData': {},
  'datacaches': [],
  'jobName': None,
  'maxRunD

#### Script_to_run - 180 - Script To Run.py

In [None]:
# Import required classes from Azureml
from azureml.core import Workspace, Datastore, Dataset, Experiment
from azureml.core import Run


# Access the Worspace, Datastore and Datasets
ws = Workspace.from_config("./config")
az_store = Datastore.get(ws, "azure_sdk_blob01")
az_dataset = Dataset.get_by_name(ws, "Loan Applications Using SDK")
az_default_store = ws.get_default_datastore()

# Get the context - data from the experience lunch on the submit script
new_run = Run.get_context()


# Do your stuff here
df = az_dataset.to_pandas_dataframe()

# Count the observations 
total_observations = len(df)

# Get the null/missing values
null_df = df.isnull().sum()



# Create a new dataframe with new features and 
# write to outputs folder
new_df = df[["Gender", "Married", "Education", "Loan_Status"]]
new_df.to_csv("./outputs/Loan_trunc_csv", index=False)



# ---------------------------------------------------
# Log metrics and Complet and experiment run
# ---------------------------------------------------

# Log the metrics to the workspace
new_run.log("Total Observations", total_observations)

# Log the missing data values
for columns in df.columns:
    new_run.log(columns, null_df[columns])

new_run.complete()