In [1]:
import azureml
from azureml.core.model import Model, InferenceConfig 
from azureml.core import Workspace, Datastore, Experiment, Run, Environment, ScriptRunConfig
from azureml.core.compute import ComputeTarget, AmlCompute, AksCompute
from azureml.train.dnn import PyTorch
from azureml.widgets import RunDetails
from azureml.core.webservice import Webservice, AksWebservice, AciWebservice
from azureml.core.dataset import Dataset
from azureml.core.resource_configuration import ResourceConfiguration
from azureml.core.conda_dependencies import CondaDependencies

print(f"Azure ML version: {azureml.core.VERSION}")

Azure ML version: 1.44.0


In [2]:
ws = Workspace.from_config()
print(f"Workspace name:    {ws.name}")

Workspace name:    keypoints_ws


In [3]:
compute_target = ComputeTarget(ws,"KeypointsCompute")
print(f"Compute Target name:    {compute_target.name}")

Compute Target name:    KeypointsCompute


In [4]:
ds = Datastore.get_default(ws)
print(f"Datastore name:   {ds.name}")

Datastore name:   workspaceblobstore


In [5]:
exp = Experiment(ws,"kp_exp") 
print(f"Experiment name:    {exp.name}")

Experiment name:    kp_exp


In [6]:
ds.upload(src_dir="./data/",target_path="kp_fixed_data",overwrite=True,show_progress=True)

"Datastore.upload" is deprecated after version 1.0.69. Please use "Dataset.File.upload_directory" to upload your files             from a local directory and create FileDataset in single method call. See Dataset API change notice at https://aka.ms/dataset-deprecation.


Uploading an estimated of 643 files
Uploading ./data\img_test_ann.json
Uploaded ./data\img_test_ann.json, 1 files out of an estimated total of 643
Uploading ./data\img_test_ann_o4.json
Uploaded ./data\img_test_ann_o4.json, 2 files out of an estimated total of 643
Uploading ./data\test_images\img_test_12.jpg
Uploaded ./data\test_images\img_test_12.jpg, 3 files out of an estimated total of 643
Uploading ./data\test_images\img_test_13.jpg
Uploaded ./data\test_images\img_test_13.jpg, 4 files out of an estimated total of 643
Uploading ./data\test_images\img_test_16.jpg
Uploaded ./data\test_images\img_test_16.jpg, 5 files out of an estimated total of 643
Uploading ./data\test_images\img_test_11.jpg
Uploaded ./data\test_images\img_test_11.jpg, 6 files out of an estimated total of 643
Uploading ./data\test_images\img_test_14.jpg
Uploaded ./data\test_images\img_test_14.jpg, 7 files out of an estimated total of 643
Uploading ./data\test_images\img_test_15.jpg
Uploaded ./data\test_images\img_test

$AZUREML_DATAREFERENCE_897ddb5022164d0f82672242fd096ca8

In [8]:
datastore_path = [(ds,"kp_fixed_data/**")]
print(datastore_path)
kp_dataset = Dataset.File.from_files(path=datastore_path)

[({
  "name": "workspaceblobstore",
  "container_name": "azureml-blobstore-168b84d2-2ce4-428a-ba88-c46f2c71ac4e",
  "account_name": "keypointsws1231877371",
  "protocol": "https",
  "endpoint": "core.windows.net"
}, 'kp_fixed_data/**')]


In [9]:
# New version of dataset to submit and use
kp_dataset.register(workspace=ws,name="KeypointsData_v1",description="Dataset with grasping keypoints",create_new_version=True)

{
  "source": [
    "('workspaceblobstore', 'kp_fixed_data/**')"
  ],
  "definition": [
    "GetDatastoreFiles"
  ],
  "registration": {
    "id": "2efc8d09-1f71-47db-9756-a97040954860",
    "name": "KeypointsData_v1",
    "version": 2,
    "description": "Dataset with grasping keypoints",
    "workspace": "Workspace.create(name='keypoints_ws', subscription_id='934da754-f5e0-4843-b86c-af997cb810d8', resource_group='kp_resource')"
  }
}

In [None]:
# Connecting to previous version of dataset
# kp_dataset = Dataset.get_by_name(workspace=ws,name="KeypointsData_v1")

In [10]:
%%writefile "scripts/training_script.py"

import os
import numpy as np
import torch
import torchvision
from PIL import Image
import pycocotools
from torchvision import transforms
from torchvision.models.detection.rpn import AnchorGenerator
from utils import collate_fn
import transforms, utils, engine, train
from engine import train_one_epoch, evaluate
import os
import argparse
from azureml.core import Run
run = Run.get_context()
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

print(f"Device type:    {device}")

parser = argparse.ArgumentParser()
parser.add_argument("--data-folder",type=str,dest="data_folder",help="path to folder with data",default="")
parser.add_argument("--num-epochs",type=int,dest="num_epochs",help="number of epochs to train model",default=30)

args = parser.parse_args()
data_folder_path = args.data_folder 
num_epochs = args.num_epochs


class ObjectsDataset(torch.utils.data.Dataset):
    def __init__(self,ds_root,ann_file,transform=None):
        self.ds_root=ds_root
        self.ann_file=ann_file
        self.raw_coco_ds=torchvision.datasets.CocoDetection(self.ds_root,self.ann_file)
        self.transform=transform
    
    def __getitem__(self,idx):
        image=self.raw_coco_ds[idx][0]
        boxes,labels,image_id,area,iscrowd,keypoints=[],[],[],[],[],[]
        image_id.append(self.raw_coco_ds[idx][1][0]['image_id'])
        for item_id, item in enumerate(self.raw_coco_ds[idx][1]):
            bbox_xywh=item['bbox']
            boxes.append([bbox_xywh[0], bbox_xywh[1],bbox_xywh[0] + bbox_xywh[2],bbox_xywh[1] + bbox_xywh[3]])
            labels.append(item['category_id'])
            area.append(item['area'])
            iscrowd.append(item['iscrowd'])
            raw_keypoints=self.raw_coco_ds[idx][1][item_id]['keypoints']
            kps=[]
            for i in range(0,len(raw_keypoints),3):
                kp=[]
                kp.append(raw_keypoints[i])
                kp.append(raw_keypoints[i+1])
                if raw_keypoints[i+2] > 1:
                    kp.append(1)
                else:
                    kp.append(raw_keypoints[i+2])
                kps.append(kp)
            keypoints.append(kps)   
        image=torchvision.transforms.functional.to_tensor(image)
        target={}
        target['boxes'] = torch.as_tensor(boxes,dtype=torch.float32)
        target['labels'] = torch.as_tensor(labels,dtype=torch.int64)
        target['image_id'] = torch.as_tensor(image_id, dtype=torch.int64)
        target['area'] = torch.as_tensor(area,dtype=torch.float32)
        target['iscrowd'] = torch.as_tensor(iscrowd,dtype=torch.int64)
        target['keypoints'] = torch.as_tensor(keypoints,dtype=torch.float32)

        return image, target
        
    def __len__(self):
        return len(self.raw_coco_ds)

def get_keypoints_model():
    anchor_generator = AnchorGenerator(sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.25, 0.5, 0.75, 1.0, 2.0, 3.0, 4.0))
    model=torchvision.models.detection.keypointrcnn_resnet50_fpn(pretrained=False,pretrained_backbone=True,num_keypoints=1,num_classes=7,rpn_anchor_generator=anchor_generator)
    return model

def main():

    dataset_train_root=os.path.join(data_folder_path,"train_images")
    ann_train_file=os.path.join(data_folder_path,"img_train_ann.json")
    dataset_test_root=os.path.join(data_folder_path,"test_images")
    ann_test_file=os.path.join(data_folder_path,"img_test_ann.json")
    dataset_train = ObjectsDataset(dataset_train_root,ann_train_file)
    dataset_test = ObjectsDataset(dataset_test_root,ann_test_file)

    dataset_train_loader=torch.utils.data.DataLoader(dataset_train,batch_size=6,shuffle=True,num_workers=8,collate_fn=collate_fn)
    dataset_test_loader=torch.utils.data.DataLoader(dataset_test,batch_size=2,shuffle=False,num_workers=8,collate_fn=collate_fn)

    model=get_keypoints_model()
    model.to(device)
    params=[p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.3)
    # num_epochs = num_epochs + 1

    for epoch in range(1,num_epochs + 1):
        train_one_epoch(model,optimizer,dataset_train_loader,device,epoch,print_freq=10)
        lr_scheduler.step()
        evaluate(model,dataset_test_loader,device)    
        if epoch % 5 == 0 and epoch >=10:
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'lr_scheduler_state_dict': lr_scheduler.state_dict()
            },os.path.join('./outputs',f'checkpoint_epoch{epoch}.pth'))

    torch.save(model,'./outputs/kp_model.pth')


if __name__ == "__main__":
    main()

Overwriting scripts/training_script.py


In [None]:
# conda = CondaDependencies()
# conda.add_channel('pytorch')

# conda.add_conda_package('python=3.8')
# conda.add_conda_package('pytorch')
# conda.add_conda_package('torch')
# conda.add_conda_package('torchvision')

# conda.add_pip_package('pycocotools')

# env = Environment(name="kp_pytorch")
# env.register(ws)

In [11]:
args = ["--data-folder",kp_dataset.as_named_input("keypoints_ds").as_mount(),"--num-epochs",40]
print(args)

scripts_dir = "./scripts/"

# default_env = Environment.get(ws,'AzureML-pytorch-1.9-ubuntu18.04-py37-cuda11-gpu')
env = Environment.get(ws,"kp_env")

config = ScriptRunConfig(source_directory=scripts_dir,
                            script="training_script.py",
                            compute_target=compute_target,
                            environment=env,
                            arguments=args)

['--data-folder', <azureml.data.dataset_consumption_config.DatasetConsumptionConfig object at 0x000001398AC73BE0>, '--num-epochs', 40]


In [12]:
exp_run = exp.submit(config)

In [13]:
RunDetails(exp_run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [14]:
model = exp_run.register_model(model_name="kp_model",
                            model_path="outputs",
                            model_framework="PyTorch",
                            model_framework_version="1.11",
                            description="Keypoint model for object grasping")
print(f"Model name: {model.name}    model version:  {model.version}")

Model name: kp_model    model version:  2


In [15]:
# model.download(exit_ok=True)
model.download()

'outputs'