In [29]:

from azure.identity import DefaultAzureCredential

credential = DefaultAzureCredential()


from azure.ai.ml import MLClient
ml_client = MLClient(
    credential=credential,
    subscription_id="8db6e466-5fa0-4e7f-b009-c5e20e1a7fe5",
    resource_group_name="sparc2023-workspace-xudyu-rg",
    workspace_name="sparc2023-ws-xudyu",
)




In [30]:
from azure.ai.ml.entities import Environment
import os
project_dir="."
dependencies_dir = os.path.join(project_dir,"dependencies/")

pipeline_job_env = Environment(
    name="conda-6DIM",
    description="env for 6DIMCOCO experiments",
    conda_file=os.path.join(dependencies_dir, "conda.yml"),
    image="mcr.microsoft.com/azureml/curated/acpt-pytorch-2.0-cuda11.7:15"
    #set OS var
    
    )

env = ml_client.environments.create_or_update(pipeline_job_env)

print(
    f"Environment with name {pipeline_job_env.name} is registered to workspace, the environment version is {pipeline_job_env.version}"
)

Environment with name conda-6DIM is registered to workspace, the environment version is 15


In [31]:
from azure.ai.ml import command
from azure.ai.ml import Input

'''        #more info at https://williamfalcon.github.io/test-tube/hyperparameter_optimization/HyperOptArgumentParser/
        self.add_argument("--dir",default="/nobackup/projects/bdlan05/smander3/data",type=str)
        self.add_argument("--log_path",default="/nobackup/projects/bdlan05/smander3/logs/",type=str)
        self.opt_list("--learning_rate", default=0.00001, type=float, options=[1e-3,1e-5, 1e-4,], tunable=True)
        self.opt_list("--batch_size", default=10, type=int, options=[6,8,10,12], tunable=True)
        self.opt_list("--JSE", default=0, type=int, options=[0], tunable=True)
        self.opt_list("--prune",default=False,type=bool,options=[True,False])
        self.opt_list("--projection",default="None",type=str,options=["None","inv","iinv"])
        self.opt_list("--normlogits",default=True,type=bool,options=[True,False])
        self.opt_list("--exactlabels",default=0,type=int,options=[1,0])
        self.opt_list("--meanloss",default=False,type=bool,options=[True,False])
        self.opt_list("--maskLosses",default=0,type=int,options=[0,1,2]) #1 and 2 often result in nan in labels?

        self.opt_list("--logitsversion",default=4,type=int,options=[0,1,2,3,4,5,6,7,8]) #1 and 2 often result in nan in labels?
        self.opt_list("--precision", default=32, options=[16], tunable=False)
        self.opt_list("--codeversion", default=6, type=int, options=[6], tunable=False)
        self.opt_list("--transformer_layers", default=8, type=int, options=[3,4,5,6], tunable=True)
        self.opt_list("--transformer_heads", default=16, type=int, options=[16], tunable=True)
        self.opt_list("--embed_dim", default=512, type=int, options=[128,512], tunable=True)
        self.opt_list("--transformer_width", default=512, type=int, options=[128,512], tunable=True)
        self.opt_list("--devices", default=1, type=int, options=[1], tunable=False)
        self.opt_list("--accelerator", default='gpu', type=str, options=['gpu'], tunable=False)
        self.opt_list("--num_trials", default=0, type=int, tunable=False)
        #self.opt_range('--neurons', default=50, type=int, tunable=True, low=1 '''
registered_model_name = "Contrastive trained DETR Model"
job = command(
    code=project_dir,  # location of source code
    command="python launch.py --num_trials 0 --dir {} --log_path {} ".format(os.path.join(project_dir,"./data"),os.path.join(project_dir,"./logs")),#--data ${{inputs.datadir}}",
    environment=pipeline_job_env.name+":"+pipeline_job_env.version,
    compute="sparc-v100-low-priority-hv",
    experiment_name="6DIMCOCO",
    display_name="ContrastiveTraining-6D-StephenM",
    distribution={
        "type": "PyTorch",
        "process_count_per_instance": 1,
        "node_count": 1,
        "instance_count": 1,
    },

)

In [32]:
ml_client.create_or_update(job)

[32mUploading 6DIMCOCO (82.03 MBs): 100%|██████████| 82027144/82027144 [00:01<00:00, 60587125.33it/s]
[39m



Experiment,Name,Type,Status,Details Page
6DIMCOCO,lime_battery_d7h4b4fjgc,command,Starting,Link to Azure Machine Learning studio


In [33]:
#Lets define some sweeps, We really want to trial a load of values for all the following:
from azure.ai.ml import MLClient
from azure.ai.ml import command, Input
from azure.ai.ml.sweep import Choice, Uniform, MedianStoppingPolicy
from azure.identity import DefaultAzureCredential

# Create your command
command_job_for_sweep = command(
    code=project_dir,  # location of source code
    command="python main.py",#--data ${{inputs.datadir}}",
    environment=pipeline_job_env.name+"@latest",
    compute="cpu-cluster",
    experiment_name="Testrelationshipdetection",
    display_name="SweepTest-StephenM",
    distribution={
        "type": "PyTorch",
        "process_count_per_instance": 1,
    },
    inputs={
        #TO DO :: Should really be putting our data here as a path too 
        "coco_path":"./data",
        "batch_size":Choice(range(2,3)),
    },
)

# Call sweep() on your command job to sweep over your parameter expressions
sweep_job = command_job_for_sweep.sweep(
    compute="sparc-v100-low-priority", 
    sampling_algorithm="random",
    primary_metric="train_loss",#should really set this to something at the validation stage 
    goal="Minimize",
)
# Define the limits for this sweep
sweep_job.set_limits(max_total_trials=50, max_concurrent_trials=8, timeout=14400)

# Set early stopping on this one
sweep_job.early_termination = MedianStoppingPolicy(delay_evaluation=5, evaluation_interval=2)

# Specify your experiment details
sweep_job.display_name = "CLIP-relationdetection"
sweep_job.experiment_name = "StephenM-CLIP-relationdetection"
sweep_job.description = "Run a hyperparameter sweep job for assessing how good MASK finding is from CLIP embeddings"

# submit the sweep
returned_sweep_job = ml_client.create_or_update(sweep_job)

# get a URL for the status of the job
returned_sweep_job.services["Studio"].endpoint

# Download best trial model output
#ml_client.jobs.download(returned_sweep_job.name, output_name="model")



'https://ml.azure.com/runs/calm_oil_kqckd746s0?wsid=/subscriptions/8db6e466-5fa0-4e7f-b009-c5e20e1a7fe5/resourcegroups/sparc2023-workspace-xudyu-rg/workspaces/sparc2023-ws-xudyu&tid=c681f89a-795a-4473-bc07-d86cb09d4312'

In [34]:
from azure.ai.ml import command
from azure.ai.ml import Input

registered_model_name = "Find Masks from CLIP Model"
job = command(
    code=os.path.join(project_dir),  # location of source code
    command="python ClipToMask.py --Cache_dir {} --batch_size {}".format("./data",2) ,#--data ${{inputs.datadir}}",
    environment=pipeline_job_env.name+"@latest",
    compute="sparc-v100-low-priority",
    experiment_name="MaskPredictionwithCLIPVisGenome",
    display_name="MASKFINDER-STeveM",
    distribution={
        "type": "PyTorch",
        "process_count_per_instance": 1,
    },
)
ml_client.create_or_update(job)

Experiment,Name,Type,Status,Details Page
MaskPredictionwithCLIPVisGenome,ashy_grass_k0d99zw7lc,command,Starting,Link to Azure Machine Learning studio


In [35]:
from azure.ai.ml import MLClient
from azure.ai.ml import command, Input
from azure.ai.ml.sweep import Choice, Uniform, MedianStoppingPolicy
from azure.identity import DefaultAzureCredential

# Create your command
command_job_for_sweep = command(
    code=project_dir,  # location of source code
    command="python ClipToMask.py",#--data ${{inputs.datadir}}",
    environment=pipeline_job_env.name+"@latest",
    compute="cpu-cluster",
    experiment_name="Test CLIP Learning of MASKS",
    display_name="SweepTest-StephenM",
    distribution={
        "type": "PyTorch",
        "process_count_per_instance": 1,
    },
    inputs={
        #TO DO :: Should really be putting our data here as a path too 
        "version":Choice([1,2]),
        "batch_size":Choice(range(1,2)),
        "layers":Choice([2,3,4])
    },
)

# Call sweep() on your command job to sweep over your parameter expressions
sweep_job = command_job_for_sweep.sweep(
    compute="sparc-v100-low-priority-hv", 
    sampling_algorithm="random",
    primary_metric="train_loss",#should really set this to something at the validation stage 
    goal="Minimize",
)
# Define the limits for this sweep
sweep_job.set_limits(max_total_trials=50, max_concurrent_trials=8, timeout=14400)

# Set early stopping on this one
sweep_job.early_termination = MedianStoppingPolicy(delay_evaluation=5, evaluation_interval=2)

# Specify your experiment details
sweep_job.display_name = "CLIP-MaskLearning-sweep"
sweep_job.experiment_name = "StephenM-CLIP-Mask-Finding"
sweep_job.description = "Run a hyperparameter sweep job for assessing how good MASK finding is from CLIP embeddings"

# submit the sweep
returned_sweep_job = ml_client.create_or_update(sweep_job)

# get a URL for the status of the job
returned_sweep_job.services["Studio"].endpoint

# Download best trial model output
#ml_client.jobs.download(returned_sweep_job.name, output_name="model")


'https://ml.azure.com/runs/mighty_tray_jcng8jy17z?wsid=/subscriptions/8db6e466-5fa0-4e7f-b009-c5e20e1a7fe5/resourcegroups/sparc2023-workspace-xudyu-rg/workspaces/sparc2023-ws-xudyu&tid=c681f89a-795a-4473-bc07-d86cb09d4312'