### Train CNN LSTM

Reference: https://learn.microsoft.com/en-us/azure/machine-learning/how-to-train-pytorch?view=azureml-api-2

In [13]:
# Import necessary libraries
from dotenv import load_dotenv
import os
import subprocess
import platform

# Load the .env file
load_dotenv()

# Configuration variable to switch between local and cloud execution
RUN_IN_CLOUD = os.getenv("RUN_IN_CLOUD", "True").lower() == "true"

# Common variables
network = "cnn2d_multiclass"
dataset = "ccccii"
k = 5
i = 0
num_epochs = 20
learning_rate = 0.0005
batch_size = 16


In [14]:
if RUN_IN_CLOUD:
    # Azure ML specific imports
    from azure.ai.ml import MLClient
    from azure.identity import DefaultAzureCredential
    from azure.ai.ml.entities import AmlCompute
    from azure.ai.ml import command

    # Configure the Azure workspace and authentication
    subscription_id = os.getenv("AZURE_SUBSCRIPTION_ID")
    resource_group_name = os.getenv("AZURE_RESOURCE_GROUP")
    workspace_name = os.getenv("AZURE_WORKSPACE_NAME")

    credential = DefaultAzureCredential()
    ml_client = MLClient(
        credential=credential,
        subscription_id=subscription_id,
        resource_group_name=resource_group_name,
        workspace_name=workspace_name,
    )

    # Create or get the GPU cluster
    gpu_compute_target = "gpucluteruk"
    try:
        gpu_cluster = ml_client.compute.get(gpu_compute_target)
        print(f"You already have a cluster named {gpu_compute_target}, we'll reuse it as is.")
    except Exception:
        print("Creating a new GPU compute target...")
        gpu_cluster = AmlCompute(
            name=gpu_compute_target,
            type="amlcompute",
            size="Standard_ND96amsr_A100_v4",
            min_instances=1,
            max_instances=4,
            idle_time_before_scale_down=180,
            tier="Dedicated",
        )
        gpu_cluster = ml_client.begin_create_or_update(gpu_cluster).result()
    print(f"AMLCompute with name {gpu_cluster.name} is created, the compute size is {gpu_cluster.size}")


You already have a cluster named gpucluteruk, we'll reuse it as is.
AMLCompute with name gpucluteruk is created, the compute size is Standard_ND96amsr_A100_v4


In [15]:
if RUN_IN_CLOUD:
    # Azure ML environment and job setup
    custom_env_name = "custom-acpt-pytorch-113-cuda117:9"

    env_vars = {
        'AZURE_STORAGE_ACCOUNT': os.getenv("AZURE_STORAGE_ACCOUNT"),
        'AZURE_STORAGE_KEY': os.getenv("AZURE_STORAGE_KEY"),
        'BLOB_CONTAINER': os.getenv("BLOB_CONTAINER")
    }

    inputs = {
        'network': network,
        'dataset': dataset, 
        'k': k, 
        'i': i, 
        'num_epochs': num_epochs, 
        'learning_rate': learning_rate, 
        'batch_size': batch_size
    }

    job = command(
        inputs = inputs,
        compute=gpu_compute_target,
        environment=custom_env_name,
        code="../",  # location of source code
        command="python -m scripts.train.train_cnn2d_multiclass --run_cloud --dataset ${{inputs.dataset}} --k ${{inputs.k}} --i ${{inputs.i}} --num_epochs ${{inputs.num_epochs}} --batch_size ${{inputs.batch_size}} --learning_rate ${{inputs.learning_rate}}",
        environment_variables=env_vars,
        experiment_name="train_cnn_2d_multiclass",
        display_name="train-cnn-2d-multiclass",
        tags={key: str(value) for key, value in inputs.items()}
    )

    ml_client.jobs.create_or_update(job)
else:
    # Local execution setup
    import sys
    from pathlib import Path
    import subprocess

    # Set the working directory to the root of your project
    project_root = Path("../")  # Adjust this if your scripts directory is elsewhere
    sys.path.append(str(project_root.resolve()))

    print("Running the training script locally...")
    python_command = ["python", "-m", "scripts.train.train_cnn2d_multiclass",
                    "--dataset", dataset,
                    "--num_epochs", str(num_epochs),
                    "--batch_size", str(batch_size),
                    "--learning_rate", str(learning_rate)]

    subprocess.run(python_command, cwd=project_root, check=True)



Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
[32mUploading pulmo-sense (43.49 MBs)