In [5]:
# %pip install -r yolov5/requirements.txt --user 
# %pip install scikit-learn scikit-image azureml-core --user

In [6]:
from util import labeledImage, normalize_coordinates, convert_to_yolo_format
from sklearn.model_selection import train_test_split
import os, shutil, yaml


In [7]:
path_to_Mse544_cv = 'C:/Users/final/Documents/CHEM_543_SQ25/CHEM543_Week7'

source_images_dir = f'{path_to_Mse544_cv}/molecules/'
source_labels_dir = f'{path_to_Mse544_cv}/molecules/labels/'

labeled_images = []
tag = 'molecule' 

for file in os.listdir(source_images_dir):
    # find all jpeg file and it's ImageJ label
    if file.endswith(".jpeg"):
        image_path = os.path.join(source_images_dir, file)
        label_path = os.path.join(source_labels_dir, file.split('.')[0] + '.txt')
        labeled_images.append(labeledImage(image_path))
        labeled_images[-1].add_labels_from_file(tag, label_path)


In [8]:
train_and_val_set, test_set = train_test_split(labeled_images, test_size=0.1)
train_set, val_set = train_test_split(train_and_val_set, test_size=(2/9))

len(train_set), len(val_set), len(test_set)

(42, 12, 7)

In [9]:
# Create the molecule_images directory if it doesn't exist
output_dir = os.path.join(os.getcwd(),'molecule_images')
if not os.path.exists(output_dir): os.mkdir(output_dir)

train_dir = os.path.join(output_dir, 'train') 
val_dir   = os.path.join(output_dir, 'val') 
test_dir  = os.path.join(output_dir, 'test') 

# Create the sub-directories
for d in [train_dir, val_dir, test_dir]:
    if not os.path.exists(d): os.mkdir(d)
    
    images_sub_dir = os.path.join(d, 'images')
    labels_sub_dir = os.path.join(d, 'labels')
    
    for sub_dir in [images_sub_dir, labels_sub_dir]:
        if not os.path.exists(sub_dir): os.mkdir(sub_dir)


In [10]:
# make unified yolo tags 
tags = [tag]

# zip the dataset
dataset = [(train_dir, train_set),(val_dir, val_set),(test_dir, test_set)]

for d, s in dataset:
    images_sub_dir = os.path.join(d, 'images')
    labels_sub_dir = os.path.join(d, 'labels')

    # copy over the images
    for img in s:
        shutil.copyfile(img.path, os.path.join(images_sub_dir, img.name))
    
    # covert ImageJ labels to yolo format and save it to labels_sub_dir
    convert_to_yolo_format(s, labels_sub_dir, tags)


successfully generated labels for image  ma-2014-00250j_0002.jpeg
successfully generated labels for image  ma-2013-00355w_0009.jpeg
successfully generated labels for image  cm-2015-032569_0005.jpeg
successfully generated labels for image  mz-2012-00065t_0005.jpeg
successfully generated labels for image  ma-2011-02540j_0003.jpeg
successfully generated labels for image  cm-2010-00417z_0001.jpeg
successfully generated labels for image  ja-2016-05418c_0001.jpeg
successfully generated labels for image  ja-2009-09111p_0004.jpeg
successfully generated labels for image  cm-2017-00595k_0001.jpeg
successfully generated labels for image  ma-2014-01736m_0007.jpeg
successfully generated labels for image  ja-2009-057986_0003.jpeg
successfully generated labels for image  cm-2015-032569_0010.jpeg
successfully generated labels for image  cm-2015-01520c_0001.jpeg
successfully generated labels for image  cm-2015-030982_0001.jpeg
successfully generated labels for image  ja-2011-10687r_0014.jpeg
successful

In [11]:
# generate yolo yaml file
yolo_yaml = os.path.join(output_dir, 'molecule_detection_yolov5.yaml')

with open(yolo_yaml, 'w') as yamlout:
    yaml.dump(
        {'train': train_dir,
         'val': val_dir,
         'nc': len(tags),
         'names': tags},
        yamlout,
        default_flow_style=None,
        sort_keys=False
    )


In [12]:
# %run yolov5/train.py --img 640 --batch 16 --epochs 1 --data ./molecule_images/molecule_detection_yolov5.yaml --weights yolov5s.pt

In [13]:
from azureml.core import Workspace, Experiment, Environment, ScriptRunConfig
from azureml.core.conda_dependencies import CondaDependencies

In [14]:
yolov5_env = Environment(name="yolov5_env")

# Start from a base docker environments defined by Microsoft
yolov5_env.docker.base_image  = "docker.io/hstirrat/yolov5-env:fixed"

conda_dep = CondaDependencies()
# Indicate which version of python needs to be installed
conda_dep.add_conda_package('python=3.9')

# install all the yolov5 requirement at the image build time
with open('./yolov5/requirements.txt', 'r') as f:
    line = f.readline()
    
    while line != '':    
        # If the line is a comment or empty, skip it    
        if line.startswith('#') or len(line.split()) == 0:
            line = f.readline()
            continue
        # Otherwise add the corresponding package name as a dependency
        conda_dep.add_pip_package(line.split()[0])
        # Then move on to the next line in the requirements.txt file
        line = f.readline()

yolov5_env.python.conda_dependencies=conda_dep


In [15]:
yolov5_env.get_image_details

<bound method Environment.get_image_details of {
    "assetId": null,
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "docker.io/hstirrat/yolov5-env:fixed",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "buildContext": null,
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": "2g"
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "yolov5_env",
    "python": {
        "baseCondaEnvironment": null,
        "condaDependencies": {
            "cha

In [16]:
subscription_id = '6b86c32f-3e1d-4be7-bbd4-dd16443808b2'
resource_group  = 'rg-amlclass-notjello'
workspace_name  = 'azureml-notjello'
ws = Workspace(subscription_id, resource_group, workspace_name)

experiment = Experiment(workspace=ws, name='molecule_detection_yolo_training')


In [17]:
# Overall configuration for the script to be run on the compute cluster
config = ScriptRunConfig(source_directory='./deploy_yolo_training/',   ## folder in which the script is located
                         script='training_on_aml.py',       ## script name
                         compute_target='GPU-notjello',
                         environment=yolov5_env)   

In [29]:
%cd C:/Users/final/Documents/CHEM_543_SQ25/CHEM543_Week8
os.getcwd()

C:\Users\final\Documents\CHEM_543_SQ25\CHEM543_Week8


'C:\\Users\\final\\Documents\\CHEM_543_SQ25\\CHEM543_Week8'

In [31]:
run = experiment.submit(config)
aml_url = run.get_portal_url()
print(aml_url)


https://ml.azure.com/runs/molecule_detection_yolo_training_1748046012_e75fb9d9?wsid=/subscriptions/6b86c32f-3e1d-4be7-bbd4-dd16443808b2/resourcegroups/rg-amlclass-notjello/workspaces/azureml-notjello&tid=f6b6dd5b-f02f-441a-99a0-162ac5060bd2
