## Get Workspace

In [1]:
from azureml.core import Workspace

ws = Workspace.from_config()
ws

If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


Workspace.create(name='bing-eastus', subscription_id='6560575d-fa06-4e7d-95fb-f962e74efd7a', resource_group='cody-bing')

## Prepare Data

In [4]:
from azureml.core import ScriptRunConfig, Experiment, Environment

# arguments
npartitions = 2048
filename = "azureml/a59853eb-a1ae-4444-8aed-6712f77f6aba/LGBMTrainTSV"
output = f"cody-bing/out/90gb/{npartitions}/train.parquet"

# azureml setup
compute_target = "cpu-m128ms"

# submit run
arguments = ["--input", filename, "--npartitions", npartitions]
env = Environment.from_conda_specification("lightgbm-cpu-tutorial", "environment.yml")
src = ScriptRunConfig(
    source_directory="src",
    script="prep.py",
    arguments=arguments,
    compute_target=compute_target,
    environment=env,
)
run = Experiment(ws, "default").submit(src)
run

Experiment,Id,Type,Status,Details Page,Docs Page
default,default_1610498744_226ba0d6,azureml.scriptrun,Preparing,Link to Azure Machine Learning studio,Link to Documentation


## Train LightGBM

In [None]:
from azureml.core import ScriptRunConfig, Experiment, Environment
from azureml.core.runconfig import MpiConfiguration

# arguments
boosting = "gbdt"
iterations = 3
lr = 0.2
leaves = 31

# azureml setup
compute_target = "cpu-d32sv3"

nodes = 100 + 2
cpus = 32

# submit run
arguments = [
    "--input",
    output,
    "--boosting",
    boosting,
    "--num_iterations",
    iterations,
    "--learning_rate",
    lr,
    "--num_leaves",
    leaves,
    "--nodes",
    nodes,
    "--cpus",
    cpus,
]
env = Environment.from_conda_specification("lightgbm-cpu-tutorial", "environment.yml")
mpi_config = MpiConfiguration(node_count=nodes)
src = ScriptRunConfig(
    source_directory="src",
    script="train.py",
    arguments=arguments,
    compute_target=compute_target,
    environment=env,
    distributed_job_config=mpi_config,
)
run = Experiment(ws, "default").submit(src)
run