In [None]:
from azure.ai.ml import MLClient#, Input, command
from azure.identity import DefaultAzureCredential
import sys
sys.path.append("../..")
from utils import azure_ml_configs

workspace_id = azure_ml_configs.workspace_id
subscription_id = azure_ml_configs.subscription_id
resource_group = azure_ml_configs.resource_group
workspace_name = azure_ml_configs.workspace_name

# Get a handle to the workspace
ml_client = MLClient(
    credential=DefaultAzureCredential(),
    subscription_id=subscription_id,
    resource_group_name=resource_group,
    workspace_name=workspace_name,
)

data_asset = ml_client.data.get(name="clinicalNote_AcuteReadmission_DedupCont", version=1) 
print(f"Data asset URI: {data_asset.path}")

In [None]:
from azureml.core import Workspace, Experiment, ScriptRunConfig, Environment, Datastore
from azureml.core.script_run_config import ScriptRunConfig
from azureml.core.conda_dependencies import CondaDependencies
import azureml._restclient.snapshots_client
from azureml.fsspec import AzureMachineLearningFileSystem
import mlflow

In [None]:
# get workspace
ws = Workspace.from_config()

# Create an Experiment
experiment_name = 'roberta-finetune-jobs-tests'
experiment = Experiment(workspace=ws, name=experiment_name)


# Set the desired snapshot size (in bytes)
snapshot_size = 10073741824
# Update the maximum snapshot size
azureml._restclient.snapshots_client.SNAPSHOT_MAX_SIZE_BYTES = snapshot_size


# Get the Curated Environment
curated_env = Environment.get(workspace=ws, name="AzureML-ACPT-pytorch-1.11-py38-cuda11.3-gpu") #"AzureML-ACPT-pytorch-1.11-py38-cuda11.3-gpu"


#command = f"pip install -r mlm_requirements.txt && python3 test.py"


command = "pip install -r finetune_requirements.txt && accelerate launch --multi_gpu --mixed_precision 'fp16' --num_processes 4 \
finetune.py \
--pretrained_model_path './' \
--model_name 'roberta-base-danish' \
--data '{}' \
--text_column_name 'DedupCont' \
--checkpointing_steps 5000 \
--add_special_tokens_notes \
--checkpoint_dir './outputs' \
--lr 0.000003 \
--num_epochs 13 \
--batch_size 64 \
--random_seed 22 \
--scale_loss \
--with_tracking \
--report_to 'mlflow' \
".format(data_asset.path)

# --nrows 10000 \
# --max_seq_splits 5 \ # this has to change as now I use concatenated input
#--shuffle \
# --resume_from_checkpoint 'epoch_9' \
# --discharge_notes_only \

print(command)




# set up script run configuration
config = ScriptRunConfig(
    source_directory='.',
    command=command,
    #compute_target='Terne8c128',
    compute_target='Terne4A100',
    environment=curated_env
)

In [None]:
# submit script to AML
Run = experiment.submit(config)
print(Run.get_portal_url()) # link to ml.azure.com
Run.wait_for_completion(show_output=True)