In [315]:
# !git clone https://github.com/WongKinYiu/yolov7.git /home/jovyan/yolov7

# mkdir weights
# curl https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7_training.pt > weights/yolov7_training.pt

In [449]:
import sys
import os

import kfp
from kfp import compiler, dsl, components
from kfp.components import InputPath, OutputPath
from typing import NamedTuple

In [450]:
# BASE_DIR = "/workspace"
# WEIGHTS = f"{BASE_DIR}/weights/yolov7_training.pt"

# print(f"BASE_DIR: {os.getcwd()}")
# print(f"WEIGHTS: {WEIGHTS}")

In [451]:
# !python -m ipykernel install --user --name yolov7 --display-name yolov7
!{sys.executable} -m pip install -r requirements.txt --quiet
!{sys.executable} -m pip install kfp --upgrade --user --quiet

In [459]:
def prepare_labelstudio_data_for_yolo(
    model_name: str,
    project: str,
    labels: list,
    namespace: str,
    domain: str,
    config_template_url: str,
    transfer_weights_url: str, 
    hyp_file_url: str,
    train_frac: float,
    validate_frac: float) -> NamedTuple('YOLOArgs',
                                        [('data_file', str),
                                         ('config_file', str),
                                         ('weights_file', str),
                                         ('hyp_file', str),
                                         ('names_file', str)]):
    
    '''
    Prepares Labelstudio Data for YOLO training
    
    Example weights_url: https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7_training.pt
    '''
    
    print(f'Prepares Labelstudio Data for training of YOLOv7')
    import chevron
    from google.cloud import storage
    import json
    import os
    from pathlib import Path
    import numpy as np
    from urllib.parse import urlparse, ParseResult
    #from random import shuffle
    from collections import namedtuple

    def create_directory(directory_name, basedir=None):
        if basedir is None:
            pth = Path(directory_name)
        else:
            pth = Path(os.path.join(basedir, directory_name))
        pth.mkdir(parents=True, exist_ok=True)
        return pth.as_posix()
    
    # Directory structure
    basedir = create_directory("/workspace")              # BASE
    configdir = create_directory('config', basedir)       # BASE/model
    datadir = create_directory('dataset', basedir)        # BASE/data
    imagesdir = create_directory('images', datadir)       # BASE/dataset/images
    trainimagesdir = create_directory('train', imagesdir) # BASE/dataset/images/train
    valimagesdir = create_directory('val', imagesdir)     # BASE/dataset/images/val
    testimagesdir = create_directory('test', imagesdir)   # BASE/dataset/images/test
    labelsdir = create_directory('labels', datadir)       # BASE/dataset/labels
    trainlabelsdir = create_directory('train', labelsdir) # BASE/dataset/labels/train
    vallabelsdir = create_directory('val', labelsdir)     # BASE/dataset/labels/val
    testlabelsdir = create_directory('test', labelsdir)   # BASE/dataset/labels/test
    modeldir = create_directory('model', basedir)
    
    bucket_name = f"{namespace}.{domain}"
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)

    my_prefix = f"label-studio/projects/{project}/results/" # the name of the subfolder
    blobs = list(bucket.list_blobs(prefix = my_prefix, delimiter = '/'))
    
    if len(blobs) == 0:
        print("NO RESULTS FOUND FOR {}".format(project))
        output = namedtuple('YOLOArgs', ['data_file', 'config_file', 'transfer_weights_file', 'hyp_file', 'names_file'])
        return output("", "", "", "", "")
    
    test_frac = 1.0 - train_frac - validate_frac
    # shuffle(blobs)
    # need to pre-define specific train/val/test indices (avoid risk of leakage - especially with video data)
    # This needs to be discussed further. Sampling is challenging.
    # shuffle will work for random images that have no temporal relationships provided we ALWAYS keep the same
    # test set as a hold out, i.e. don't resample every time we retrain.
    m = len(blobs)
    train_end = int(train_frac * m)
    validate_end = int(validate_frac * m) + train_end

    print(f'Number of images: {m}')
    print(f'Train set size: {int(train_frac * m)}')
    print(f'Validate set size: {int(validate_frac * m)}')
    print(f'Test set size: {int(test_frac * m)}')

    def strip_scheme(url):
        parsed_result = urlparse(url)
        print(parsed_result)
        return ParseResult('', parsed_result[1:]).geturl()

    def save_to_yolo_fmt(file_name, annotation, local_image_folder, local_label_folder):
        # <object-class> - integer number of object from 0 to (classes-1)
        # <x> <y> <width> <height> - float values relative to width and height of image, it can be equal from (0.0 to 1.0]
        # for example: <x> = <absolute_x> / <image_width> or <height> = <absolute_height> / <image_height>
        # attention: <x> <y> - are center of rectangle (are not top-left corner)
        
        # file_name is the name of the label studio file to which an annotation is stored (usually a numerical ID)
        # annotation is the parsed json content of a label studio annotation file
        # local_image/label_folder tells us where to save annotations and images for training
       
        image_data_url = annotation['task']['data']['image']
        try:
            _, ext = os.path.splitext(image_data_url)
            parsed_url = urlparse(image_data_url)
            blob = bucket.blob(parsed_url.path[1:])
            image_destination = os.path.join(local_image_folder, file_name + ext)
            print(image_destination)
            blob.download_to_filename(image_destination)
        except Exception as err_msg:
            print(f"ERROR: {err_msg}\n - Could not download {image_data_url}")
            return
        
        with open(os.path.join(local_label_folder,file_name + '.txt'), 'w') as f:
            for r in annotation['result']:
                scale = 100.0

                # Labelstudio has different format depending on original size in result:
                if ('original_width' not in r or r['original_width'] == 1) or ('original_height' not in r or r['original_height'] == 1):
                    scale = 10000.0
                
                w = r['value']['width']/scale
                h = r['value']['height']/scale
                x = (r['value']['x']+(r['value']['width']/2))/scale
                y = (r['value']['y']+(r['value']['height']/2))/scale
                
                for l in r['value']['rectanglelabels']:
                    if l in labels:
                        idx = labels.index(l)
                        # There are a lot of annotations of very small objects, sort them out
                        if (w > 0.005) and (h > 0.005):
                            f.write(f'{idx} {x} {y} {w} {h}\n')
                            print(f'Adding:   {idx} {x} {y} {w} {h}')
                        else:
                            print(f'Skipping: {idx} {x} {y} {w} {h}')
    
    print(trainimagesdir)
    print(valimagesdir)
    print(testimagesdir)
    
    print(trainlabelsdir)
    print(vallabelsdir)
    print(testlabelsdir)

    for i, blob in enumerate(blobs):
        if(blob.name != my_prefix): # ignoring the subfolder itself
            if i < train_end:
                file_name = blob.name.replace(my_prefix, "")
                print(f'adding {i}, {file_name} to train')
                annotation = json.loads(blob.download_as_string().decode())
                save_to_yolo_fmt(file_name, annotation, trainimagesdir, trainlabelsdir)
            elif i < validate_end:
                file_name = blob.name.replace(my_prefix, "")
                print(f'adding {i}, {file_name} to val')
                annotation = json.loads(blob.download_as_string().decode())
                save_to_yolo_fmt(file_name, annotation, valimagesdir, vallabelsdir)
            else:
                file_name = blob.name.replace(my_prefix, "")
                print(f'adding {i}, {file_name} to test') 
                annotation = json.loads(blob.download_as_string().decode())
                save_to_yolo_fmt(file_name, annotation, testimagesdir, testlabelsdir)
    
    # DATA
    print(f"Writing DATA file")
    data_file = os.path.join(configdir, f'data.yaml')
    print(f"- DST: {data_file}")
    
    num_classes = len(labels)
    
    train_data_file = os.path.join(datadir,"train.txt")
    val_data_file = os.path.join(datadir,"val.txt")
    test_data_file = os.path.join(datadir,"test.txt")
    
    with open(data_file, 'w') as out:
      out.write(f'train: {train_data_file}\n')
      out.write(f'val: {val_data_file}\n')
      out.write(f'test: {test_data_file}\n')
      out.write(f'nc: {num_classes}\n')
      out.write(f'names: [{", ".join(labels)}]')
    
    with open(names_file, 'w') as out:
      for l in labels:
        out.write(l + '\n')
    
    # /workspace/dataset/train.txt
    with open(train_data_file, 'w') as out:
        for f in os.listdir(trainimagesdir):
            out.write(f'{os.path.join(trainimagesdir, f)}\n')
    
    # /workspace/dataset/val.txt
    with open(val_data_file, 'w') as out:
        for f in os.listdir(valimagesdir):
            out.write(f'{os.path.join(valimagesdir, f)}\n')
    
    # /workspace/dataset/test.txt
    with open(test_data_file, 'w') as out:
        for f in os.listdir(testimagesdir):
            out.write(f'{os.path.join(testimagesdir, f)}\n')
    
    # CONFIG (from GCS)
    print(f"Writing YOLOv7 CONFIG file")
    print(f"- SRC: {config_template_url}")
    custom_config_file = os.path.join(configdir, "config.yaml")
    print(f"- DST: {custom_config_file}")
    parsed_url = urlparse(config_template_url)
    blob = bucket.blob(parsed_url.path[1:])
    with open(custom_config_file, 'w') as fout:
        parsed_config = chevron.render(blob.download_as_string().decode(), 
                                       {"num_classes": num_classes})
        # to configure anything else, create a copy, modify, and reference.
        # full parameterization would likely be more confusing than helpful
        fout.write(parsed_config)
        print(parsed_config)
    
    # HYP FILE (from GCS)
    print("Copying HYP file")
    print(f"- SRC: {hyp_file_url}")
    hyp_file = os.path.join(configdir, "hyp.yaml")
    print(f"- DST: {hyp_file}")
    parsed_url = urlparse(hyp_file_url) 
    blob = bucket.blob(parsed_url.path[1:])
    blob.download_to_filename(hyp_file)
    
    # PRETRAINED WEIGHTS (from GCS)
    transfer_weights_file = ""
    if transfer_weights_url:
        print(f"Copying weights for transfer learning")
        print(f"- SRC: {transfer_weights_url}")
        transfer_weights_file = os.path.join(configdir, 'transfer_weights.pt')
        print(f"- DST: {transfer_weights_file}")
        parsed_url = urlparse(transfer_weights_url) 
        blob = bucket.blob(parsed_url.path[1:])
        blob.download_to_filename(transfer_weights_file)
    
    output = namedtuple('YOLOArgs', ['data_file', 'config_file', 'transfer_weights_file', 'hyp_file', 'names_file'])
    return output(data_file, custom_config_file, transfer_weights_file, hyp_file, names_file)

prepare_labelstudio_data_for_yolo_op = components.create_component_from_func(
    prepare_labelstudio_data_for_yolo,
    packages_to_install=['google-cloud-storage', 'numpy', 'chevron'],
    output_component_file='prepare_labelstudio_data_for_yolo.yaml')

In [460]:
def save_yolo_data(
    model_name: str,
    namespace: str,
    domain: str):
    from google.cloud import storage
    import os

    def upload_blob(bucket_name, source_file_name, destination_blob_name):
        """Uploads a file to the bucket."""
        storage_client = storage.Client()
        bucket = storage_client.bucket(bucket_name)
        blob = bucket.blob(destination_blob_name)

        blob.upload_from_filename(source_file_name)

        print(
            "File {} uploaded to {}.".format(
                source_file_name, destination_blob_name
            )
        )
    
    bucket_name = f"{namespace}.{domain}"    
    destination_yolo_data = os.path.join('yolo_data', f'{model_name}.tar.gz')

    basedir = '/workspace'
    yolo_data = os.path.join(basedir, f'{model_name}.tar.gz')

    upload_blob(bucket_name, yolo_data, destination_yolo_data)

save_yolo_data_op = components.create_component_from_func(
    save_yolo_data,
    packages_to_install=['google-cloud-storage', 'six'],
    output_component_file='save_yolo_data.yaml')

In [479]:
def save_yolo_model(model_name: str,
                    namespace: str,
                    domain: str) -> NamedTuple('YOLOModel',[('outputs', dict)]):
    from google.cloud import storage
    import os
    from collections import namedtuple
    from pathlib import Path

    def upload_blob(bucket_name, source_file_name, destination_blob_name):
        """Uploads a file to the bucket."""
        storage_client = storage.Client()
        bucket = storage_client.bucket(bucket_name)
        blob = bucket.blob(destination_blob_name)
        blob.upload_from_filename(source_file_name)
        print("File {} uploaded to {}.".format(source_file_name, destination_blob_name))
    
    bucket_name = f"{namespace}.{domain}"
    destination_prefix = os.path.join('models', model_name)
    
    outputs = {}
    
    # 'yolov7-tiny.weights', 'yolov7-tiny.cfg', 'object.names', 'confusion_matrix.png',
    # 'F1_curve.png', 'P_curve.png', 'PR_curve.png', 'R_curve.png', 'results.png', 
    # 'hyp.yaml', 'opt.yaml', ...
    model_training_results = os.path.join("/workspace","model")
    for training_output_file in os.listdir(model_training_results):
        src = os.path.join(model_training_results, training_output_file)
        if os.path.isfile(src): # skip folders to avoid uploading all weights
            dst = os.path.join(destination_prefix, training_output_file)
            upload_blob(bucket_name, src, dst)
            outputs[src] = dst
    
    #Upload best weights file (runs/train/<model name>/weights/best.pt)
    src = os.path.join("/workspace", "model", "weights", "best.pt")
    dst = os.path.join(destination_prefix, "best_weights.pt")
    upload_blob(bucket_name, src, dst)
    outputs[src] = dst
    
    #Upload last weights file as well
    src = os.path.join("/workspace", "model", "weights", "last.pt")
    dst = os.path.join(destination_prefix, "last_weights.pt")
    upload_blob(bucket_name, src, dst)
    outputs[src] = dst
    
    output = namedtuple('YOLOModel', ['outputs'])
    return output(outputs)

save_yolo_model_op = components.create_component_from_func(
    save_yolo_model,
    packages_to_install=['google-cloud-storage', 'six'],
    output_component_file='save_yolo_model.yaml')

In [486]:
import os
import kubernetes as k8s
from kubernetes.client.models import V1EnvVar, V1ContainerPort

pipeline_name = 'Train YOLOv7 with Labelstudio and 1 T4 GPU'
pipeline_description = 'A pipeline to train YOLOv7 on a custom data set from Teknoir Labelstudio. Transfer learn from "transfer_weights."'

@dsl.pipeline(name = pipeline_name, description = pipeline_description)
def train_yolov7(model_name, labelstudio_project, labels, namespace, domain,
                 config_template_url='gs://teknoir-ai.teknoir.cloud/yolov7/cfg/yolov7-tiny-config-template.yaml',
                 transfer_weights_url='gs://teknoir-ai.teknoir.cloud/yolov7/weights/yolov7_training.pt',
                 hyp_file_url='gs://teknoir-ai.teknoir.cloud/yolov7/hyp/hyp.scratch.tiny.yaml',
                 train_frac=0.7, # fraction of samples to use for training (test_frac = 1 - train_frac - validate_frac)
                 validate_frac=0.2, # fraction of samples to use for validation (test_frac = 1 - train_frac - validate_frac)
                 batch_size=32,
                 img_size_train=640,
                 img_size_test=640,
                 epochs=300,
                 disk_space="10000Mi"):

    """Train YOLOv7 with Labelstudio"""
    
    mount_folder = "/workspace"
    gpu_instance_type = "nvidia-tesla-t4" # https://cloud.google.com/compute/docs/gpus
    
    # A working directory between steps
    wkdirop = dsl.VolumeOp(
        name=f'Create training workspace',
        resource_name=model_name,
        size=disk_space,
        modes=dsl.VOLUME_MODE_RWO
    )
    
    prepare = prepare_labelstudio_data_for_yolo_op(
        model_name=model_name,
        project=labelstudio_project,
        labels=labels,
        namespace=namespace,
        domain=domain,
        config_template_url=config_template_url,
        transfer_weights_url=transfer_weights_url,
        hyp_file_url=hyp_file_url,
        train_frac=train_frac,
        validate_frac=validate_frac
    ).add_pvolumes({mount_folder: wkdirop.volume})

    compress = dsl.ContainerOp(
        name="Compress YOLO data",
        image="alpine",
        command=["sh", "-c"],
        arguments=[f'apk add tree && tree {mount_folder} && cd {mount_folder} && touch {model_name}.tar.gz && tar -czvf {model_name}.tar.gz --exclude={model_name}.tar.gz .'],
        pvolumes={mount_folder: wkdirop.volume}
    ).after(prepare)
    
    save_data = save_yolo_data_op(
        model_name=model_name,
        namespace=namespace,
        domain=domain).after(compress).add_pvolumes({mount_folder: wkdirop.volume})
    
    train = dsl.ContainerOp(
        name='Train YOLOv7',
        image='gcr.io/teknoir/yolov7-training:main-amd64', # nvidia pytorch base image is VERY large
        command=['sh', '-c'],
        arguments=["echo 'WEIGHTS: {weights_file}' && \
            echo 'DATA: {data_file}' && \
            echo 'CONFIG: {config_file}' && \
            echo 'HYP: {hyp_file}' && \
            python3 train.py --workers 0 --device 0 --batch-size {batch_size} \
            --data {data_file} \
            --img {img_size_train} {img_size_test} \
            --cfg {config_file} \
            --name {model_name} \
            --weights '{weights_file}' \
            --hyp {hyp_file} \
            --epochs {epochs} \
            --project=/workspace --name=model --exist-ok".format(
            batch_size=batch_size,
            data_file=prepare.outputs["data_file"],
            img_size_train=img_size_train,
            img_size_test=img_size_test,
            config_file=prepare.outputs["config_file"],
            weights_file=prepare.outputs["weights_file"],
            hyp_file=prepare.outputs["hyp_file"],
            epochs=epochs,
            model_name=model_name
        )],
        pvolumes={mount_folder: wkdirop.volume},
        # pass in k8s container kwargs
        container_kwargs={'env': [V1EnvVar('MODEL_NAME', model_name)]}
    ).after(prepare) \
        .add_port(V1ContainerPort(container_port=8099)) \
        .add_port(V1ContainerPort(container_port=8079)) \
        .set_gpu_limit(1) \
        .add_node_selector_constraint('cloud.google.com/gke-accelerator', 
                                      gpu_instance_type)
 
    # @component
    # def html_visualization(html_artifact: Output[HTML]):
    #    html_content = '<!DOCTYPE html><html><body><h1>Hello world</h1></body></html>'
    #    with open(html_artifact.path, 'w') as f:
    #        f.write(html_content)
        
    # Export to ONNX
    # python export.py --weights runs/train/{model_name}/weights/best.pt --grid \
    #     --end2end --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 \
    #     --img-size 640 640 --max-wh 640
    # TensorRT export also discussed: https://github.com/WongKinYiu/yolov7

    show_after = dsl.ContainerOp(
        name="Show workspace after training",
        image="alpine",
        command=["sh", "-c"],
        arguments=[f'apk add tree && tree {mount_folder}'],
        pvolumes={mount_folder: wkdirop.volume}
    ).after(train)
 
    save_model = save_yolo_model_op(
        model_name=model_name,
        namespace=namespace,
        domain=domain
    ).add_pvolumes({mount_folder: wkdirop.volume}).after(train)

In [487]:
pipeline_version_file = pipeline_file = 'train_yolov7.yaml'
workflow = kfp.compiler.Compiler().compile(pipeline_func=train_yolov7, 
                                           package_path=pipeline_file)

In [488]:
import uuid
import json

client = kfp.Client(namespace='teknoir')

filter = json.dumps({'predicates': [{'key': 'name', 
                                     'op': 1, 
                                     'string_value': pipeline_name}]})
pipelines = client.pipelines.list_pipelines(filter=filter)

if not pipelines.pipelines:
    pipeline = client.pipeline_uploads.upload_pipeline(pipeline_file, 
                                                       name=pipeline_name, 
                                                       description=pipeline_description)
else:
    pipeline_version_name = pipeline_name + f' - {str(uuid.uuid4())[:6]}'
    pipeline_version = client.pipeline_uploads.upload_pipeline_version(pipeline_version_file,
                                                                       name=pipeline_version_name, 
                                                                       pipelineid=pipelines.pipelines[0].id)

ERROR:root:Failed to read a token from file '/var/run/secrets/kubeflow/pipelines/token' ([Errno 2] No such file or directory: '/var/run/secrets/kubeflow/pipelines/token').
