In [1]:

from azure.identity import DefaultAzureCredential

credential = DefaultAzureCredential()


from azure.ai.ml import MLClient
ml_client = MLClient(
    credential=credential,
    subscription_id="8db6e466-5fa0-4e7f-b009-c5e20e1a7fe5",
    resource_group_name="sparc2023-workspace-xudyu-rg",
    workspace_name="sparc2023-ws-xudyu",
)




In [2]:
from azure.ai.ml.entities import Environment
import os
project_dir="."
dependencies_dir = os.path.join(project_dir,"dependencies/")

pipeline_job_env = Environment(
    name="conda-6DIM",
    description="env for 6DIMCOCO experiments",
    conda_file=os.path.join(dependencies_dir, "conda.yml"),
    image="mcr.microsoft.com/azureml/curated/acpt-pytorch-2.0-cuda11.7:15"
    #set OS var
    
    )

env = ml_client.environments.create_or_update(pipeline_job_env)

print(
    f"Environment with name {pipeline_job_env.name} is registered to workspace, the environment version is {pipeline_job_env.version}"
)

Environment with name conda-6DIM is registered to workspace, the environment version is 18


In [32]:
from azure.ai.ml import command
from azure.ai.ml import Input,Output
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes, InputOutputModes


'''        #more info at https://williamfalcon.github.io/test-tube/hyperparameter_optimization/HyperOptArgumentParser/
        self.add_argument("--dir",default="/nobackup/projects/bdlan05/smander3/data",type=str)
        self.add_argument("--log_path",default="/nobackup/projects/bdlan05/smander3/logs/",type=str)
        '''
outputs = {
    "input_data": Output(type=AssetTypes.URI_FOLDER,
                        path="azureml://subscriptions/8db6e466-5fa0-4e7f-b009-c5e20e1a7fe5/resourcegroups/sparc2023-workspace-xudyu-rg/workspaces/sparc2023-ws-xudyu/datastores/workspaceblobstore/paths/data/",
                        mode=InputOutputModes.RW_MOUNT
                        )
}

registered_model_name = "Contrastive trained DETR Model"
job = command(
    code=project_dir,  # location of source code
    command="python launch.py --num_trials 0 --dir ${{outputs.input_data}} --log_path "+os.path.join(project_dir,"./logs"),#--data ${{inputs.datadir}}",
    environment=pipeline_job_env.name+":"+pipeline_job_env.version,
    compute="smander",
    experiment_name="6DIMCOCO",
    display_name="ContrastiveTraining-6D-StephenM",
    distribution={
        "type": "PyTorch",
        "process_count_per_instance": 1,
        "node_count": 1,
        "instance_count": 1,
    },
    outputs=outputs,
)

In [26]:
ml_client.create_or_update(job)

Uploading 6DIMCOCO (82.11 MBs): 100%|██████████| 82105638/82105638 [00:01<00:00, 51406600.79it/s]




Experiment,Name,Type,Status,Details Page
6DIMCOCO,gentle_rainbow_mzyjynvpsg,command,Starting,Link to Azure Machine Learning studio


In [33]:
#Lets define some sweeps, We really want to trial a load of values for all the following:
from azure.ai.ml import MLClient
from azure.ai.ml import command, Input, Output
from azure.ai.ml.sweep import Choice, Uniform, MedianStoppingPolicy
from azure.identity import DefaultAzureCredential

# Create your command
command_job_for_sweep = command(
    code=project_dir,  # location of source code
    command="python launch.py --num_trials 0 --dir ${{outputs.input_data}} --log_path ${{outputs.input_data}}",#--data ${{inputs.datadir}}",
    environment=pipeline_job_env.name+"@latest",
    compute="cpu-cluster",
    experiment_name="6DIMCOCOSWEEP",
    display_name="6DIMSweepStephenM",
    distribution={
        "type": "PyTorch",
        "process_count_per_instance": 1,
    },
    outputs={
        "input_data": Output(type=AssetTypes.URI_FOLDER,
                        path="azureml://subscriptions/8db6e466-5fa0-4e7f-b009-c5e20e1a7fe5/resourcegroups/sparc2023-workspace-xudyu-rg/workspaces/sparc2023-ws-xudyu/datastores/workspaceblobstore/paths/data/",
                        mode=InputOutputModes.RW_MOUNT # this RW mount is important, otherwise the sweep will fail
                        ),
    },
    inputs={
        "prune":Choice([1,0]),
        "projection":Choice(["None","inv","iinv"]),
        "normlogits":Choice([1,0]),
        "exactlabels":Choice([1,0]),
        "meanloss":Choice([1,0]),
        "maskLosses":Choice([0,1,2]),
        "logitsversion":Choice([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18]),
        "precision":Choice([32,16]),
        "codeversion":Choice([6]),
        "transformer_layers":Choice([3,4,5,6,8,12]),
        "transformer_heads":Choice([16]),
        "embed_dim":Choice([64,128,512]),
        "transformer_width":Choice([64,128,512]),
        "devices":Choice([4]),
        "accelerator":Choice(["gpu"]),
        #"log_path":os.path.join(project_dir,"./logs"),
        "batch_size":Choice(range(2,8)),
    },
)

# Call sweep() on your command job to sweep over your parameter expressions
sweep_job = command_job_for_sweep.sweep(
    compute="smander", 
    sampling_algorithm="random",
    primary_metric="train_loss",#should really set this to something at the validation stage 
    goal="Minimize",
)
# Define the limits for this sweep
sweep_job.set_limits(max_total_trials=500, max_concurrent_trials=20, timeout=14400)

# Set early stopping on this one
sweep_job.early_termination = MedianStoppingPolicy(delay_evaluation=5, evaluation_interval=2)

# Specify your experiment details
sweep_job.display_name = "CLIP-SWEEP"
sweep_job.experiment_name = "StephenM-CLIP-HighDimSweep"
sweep_job.description = "Run a hyperparameter sweep 6D repo"

# submit the sweep
returned_sweep_job = ml_client.create_or_update(sweep_job)

# get a URL for the status of the job
returned_sweep_job.services["Studio"].endpoint

# Download best trial model output
#ml_client.jobs.download(returned_sweep_job.name, output_name="model")



Uploading 6DIMCOCO (82.11 MBs): 100%|██████████| 82109759/82109759 [00:01<00:00, 60795914.27it/s]




'https://ml.azure.com/runs/icy_leather_0fm5jmkg0p?wsid=/subscriptions/8db6e466-5fa0-4e7f-b009-c5e20e1a7fe5/resourcegroups/sparc2023-workspace-xudyu-rg/workspaces/sparc2023-ws-xudyu&tid=c681f89a-795a-4473-bc07-d86cb09d4312'

In [15]:
from azure.ai.ml import command
from azure.ai.ml import Input

registered_model_name = "Find Masks from CLIP Model"
job = command(
    code=os.path.join(project_dir),  # location of source code
    command="python ClipToMask.py --Cache_dir {} --batch_size {}".format("./data",2) ,#--data ${{inputs.datadir}}",
    environment=pipeline_job_env.name+"@latest",
    compute="sparc-v100-low-priority",
    experiment_name="MaskPredictionwithCLIPVisGenome",
    display_name="MASKFINDER-STeveM",
    distribution={
        "type": "PyTorch",
        "process_count_per_instance": 1,
    },
)
ml_client.create_or_update(job)

Experiment,Name,Type,Status,Details Page
MaskPredictionwithCLIPVisGenome,musing_cartoon_z3pl62g4ys,command,Starting,Link to Azure Machine Learning studio


In [18]:
#Lets define some sweeps, We really want to trial a load of values for all the following:
from azure.ai.ml import MLClient
from azure.ai.ml import command, Input
from azure.ai.ml.sweep import Choice, Uniform, MedianStoppingPolicy
from azure.identity import DefaultAzureCredential

# Create your command
command_job_for_sweep = command(
    code=project_dir,  # location of source code
    command="python launch.py --num_trials 0 --dir ${{inputs.input_data}}",#--data ${{inputs.datadir}}",
    environment=pipeline_job_env.name+"@latest",
    compute="cpu-cluster",
    experiment_name="6DIMCOCOSWEEP2",
    display_name="6DIMSweepStephenMd1",
    distribution={
        "type": "PyTorch",
        "process_count_per_instance": 1,
    },
    inputs={
        "prune":Choice([1,0]),
        "projection":Choice(["None","inv","iinv"]),
        "normlogits":Choice([1,0]),
        "exactlabels":Choice([1,0]),
        "meanloss":Choice([1,0]),
        "maskLosses":Choice([0,1,2]),
        "logitsversion":Choice([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18]),
        "precision":Choice([32,16]),
        "codeversion":Choice([6]),
        "transformer_layers":Choice([3,4,5,6,8,12]),
        "transformer_heads":Choice([16]),
        "embed_dim":Choice([64,128,512]),
        "transformer_width":Choice([64,128,512]),
        "devices":Choice([1]),
        "accelerator":Choice(["gpu"]),
        "log_path":os.path.join(project_dir,"./logs"),
        "batch_size":Choice(range(2,8)),
    },
)

# Call sweep() on your command job to sweep over your parameter expressions
sweep_job = command_job_for_sweep.sweep(
    compute="sparc-v100-low-priority", 
    sampling_algorithm="random",
    primary_metric="train_loss",#should really set this to something at the validation stage 
    goal="Minimize",
)
# Define the limits for this sweep
sweep_job.set_limits(max_total_trials=500, max_concurrent_trials=20, timeout=14400)

# Set early stopping on this one
sweep_job.early_termination = MedianStoppingPolicy(delay_evaluation=5, evaluation_interval=2)

# Specify your experiment details
sweep_job.display_name = "CLIP-SWEEP2"
sweep_job.experiment_name = "StephenM-CLIP-HighDimSweep"
sweep_job.description = "Run a hyperparameter sweep 6D repo"

# submit the sweep
returned_sweep_job = ml_client.create_or_update(sweep_job)

# get a URL for the status of the job
returned_sweep_job.services["Studio"].endpoint

# Download best trial model output
#ml_client.jobs.download(returned_sweep_job.name, output_name="model")



'https://ml.azure.com/runs/affable_nose_dlgm42hj07?wsid=/subscriptions/8db6e466-5fa0-4e7f-b009-c5e20e1a7fe5/resourcegroups/sparc2023-workspace-xudyu-rg/workspaces/sparc2023-ws-xudyu&tid=c681f89a-795a-4473-bc07-d86cb09d4312'