In [None]:
# from data_preparation import make_raw_data_and_eval_dataset
# make_raw_data_and_eval_dataset()

In [None]:
from clearml.automation.controller import PipelineDecorator
from clearml import TaskTypes


@PipelineDecorator.component(
    return_values=["the_dataset"],
    cache=False,
    task_type=TaskTypes.data_processing,
    packages=[
        "clearml",
        "tensorboard_logger",
        "timm",
        "fastai",
        "torch==1.11.0",
        "torchvision==0.12.0",
        "protobuf==3.19.*",
        "tensorboard",
        "google-cloud-storage>=1.13.2",
    ],
    repo="git@github.com:shpigi/clearml_evaluation.git",
    repo_branch="main",
)
def make_new_dataset_component(
    project, i_dataset: int, num_samples_per_chunk: int = 500
):
    import sys

    sys.path.insert(0, "/src/clearml_evaluation/")
    from image_classifier_training import pipeline_functions

    return pipeline_functions.make_new_dataset(
        project, i_dataset, num_samples_per_chunk=num_samples_per_chunk
    )


@PipelineDecorator.component(
    return_values=["run_model_path", "run_tb_path"],
    cache=False,
    task_type=TaskTypes.training,
    packages=[
        "clearml",
        "tensorboard_logger",
        "timm",
        "fastai",
        "torch==1.11.0",
        "torchvision==0.12.0",
        "protobuf==3.19.*",
        "tensorboard",
        "google-cloud-storage>=1.13.2",
    ],
    repo="git@github.com:shpigi/clearml_evaluation.git",
    repo_branch="main",
)
def train_image_classifier_component(
    clearml_dataset,
    backbone_name,
    image_resize: int,
    batch_size: int,
    run_model_uri,
    run_tb_uri,
    local_data_path,
    num_epochs: int,
):
    import sys

    sys.path.insert(0, "/src/clearml_evaluation/")
    from image_classifier_training import pipeline_functions

    run_model_path, run_tb_path = pipeline_functions.train_image_classifier(
        clearml_dataset,
        backbone_name,
        image_resize,
        batch_size,
        run_model_uri,
        run_tb_uri,
        local_data_path,
        num_epochs,
    )
    return run_model_path, run_tb_path


@PipelineDecorator.component(
    return_values=["run_eval_path"],
    cache=False,
    task_type=TaskTypes.testing,
    packages=[
        "clearml",
        "tensorboard_logger",
        "timm",
        "fastai",
        "torch==1.11.0",
        "torchvision==0.12.0",
        "protobuf==3.19.*",
        "tensorboard",
        "google-cloud-storage>=1.13.2",
    ],
    repo="git@github.com:shpigi/clearml_evaluation.git",
    repo_branch="main",
)


def eval_model_component(
    run_learner_path,
    run_id,
    dataset_name,
    dataset_project,
    run_eval_uri,
    image_resize:int,
    batch_size:int,
    local_data_path,
):
    import sys

    sys.path.insert(0, "/src/clearml_evaluation/")
    from image_classifier_training import pipeline_functions

    return pipeline_functions.eval_model(
        run_learner_path,
        run_id,
        dataset_name,
        dataset_project,
        run_eval_uri,
        image_resize,
        batch_size,
        local_data_path,
    )

from typing import Tuple, List


@PipelineDecorator.pipeline(
    name="fastai_image_classification_pipeline",
    project="lavi-testing",
    version="0.2",
    multi_instance_support=True,
)
def fastai_image_classification_pipeline(
    run_id: str,
    i_datasets: Tuple[int],
    backbone_names: List[str],
    image_resizes: List[int],
    batch_sizes: List[int],
    num_train_epochs: int,
):
    from clearml import Task
    import json

    class TaskURIs:
        def __init__(self, project, pipeline_name, run_id):
            path_pref = f"{project}/{pipeline_name}"
            self.tboard = f"{path_pref}/tboard/{run_id}"
            self.models = f"{path_pref}/models/{run_id}"
            self.evaluations = f"{path_pref}/evaluations/{run_id}"

    project_name = "lavi-testing"
    pipeline_name = "fastai_image_classification"

    pipeline_task = Task.current_task()
    print("pipeline task=", pipeline_task)
    #     config = {"run_id": run_id}
    #     config["backbone_name"] = backbone_name
    #     config["i_datasets"] = i_datasets

    #     config["per_sub_run_configs"] = []

    #     if pipeline_task:
    #         config = pipeline_task.connect_configuration(config, name="config")
    for i_dataset in i_datasets:
        sub_run_id = run_id + f"_{i_dataset}"
        print("sub_run_id:", sub_run_id)
        #         sub_run_configs = {"sub_run_id": sub_run_id}

        run_uris = TaskURIs(
            project=project_name, pipeline_name=pipeline_name, run_id=sub_run_id
        )

        #         sub_run_configs["uris"] = json.loads(json.dumps(vars(run_uris), default=str))

        print("make dataset")
        training_dataset = make_new_dataset_component(
            project=project_name, i_dataset=i_dataset, num_samples_per_chunk=500
        )
        #         sub_run_configs["uris"]["training_dataset"] = {
        #             "id": training_dataset.id,
        #             "name": training_dataset.name,
        #         }

        for backbone_name, image_resize, batch_size in zip(backbone_names, image_resizes, batch_sizes):
            print("train model")
            run_model_path, run_tb_path = train_image_classifier_component(
                clearml_dataset=training_dataset,
                backbone_name=backbone_name,
                image_resize=image_resize,
                batch_size=batch_size,
                run_model_uri=run_uris.models,
                run_tb_uri=run_uris.tboard,
                local_data_path="/data",
                num_epochs=num_train_epochs,
            )
            #         sub_run_configs["uris"]["run_model_path"] = str(run_model_path)

            print("evaluate model")
            run_eval_path = eval_model_component(
                run_learner_path=run_model_path,
                run_id=sub_run_id,
                dataset_name="pets_evaluation",
                dataset_project="lavi-testing",
                run_eval_uri=run_uris.evaluations,
                image_resize=image_resize,
                batch_size=int(batch_size * 1.5),
                local_data_path="/data",
            )
    #         sub_run_configs["uris"]["run_eval_path"] = str(run_eval_path)
    #         # clearml_task.close()
    #         config["per_sub_run_configs"].append(sub_run_configs)

    print("pipeline complete")

    # return config

In [None]:
%pdb on
from datetime import datetime

run_id = f"run_{datetime.utcnow().strftime('%Y_%m_%dT%H_%M_%S.%f')[:-3]}"
PipelineDecorator.set_default_execution_queue("default")
#PipelineDecorator.run_locally()

i_datasets = (0, 1, 2, 3)
train_params = [
    {"backbone_name": "resnet34", "image_resize": 128, "batch_size": 16},
    {"backbone_name": "efficientnetv2_rw_s", "image_resize": 128, "batch_size": 16},
    {"backbone_name": "resnet34", "image_resize": 128, "batch_size": 32},
    {"backbone_name": "efficientnetv2_rw_s", "image_resize": 128, "batch_size": 32},
    {"backbone_name": "resnet34", "image_resize": 224, "batch_size": 16},
    {"backbone_name": "efficientnetv2_rw_s", "image_resize": 288, "batch_size": 16},
]


def ld2dl(LD):
    return {k: [dic[k] for dic in LD] for k in LD[0]}


train_params_dl = ld2dl(train_params)

i_datasets = (0, 1)

fastai_image_classification_pipeline(
    run_id=run_id,
    i_datasets=i_datasets,
    backbone_names=train_params_dl["backbone_name"],
    image_resizes=train_params_dl["image_resize"],
    batch_sizes=train_params_dl["batch_size"],
    num_train_epochs=2,
)

In [None]:
train_params_dl

In [None]:
# from image_classifier_training.pipeline_functions import *
# def fastai_image_classification(
#     run_id: str,
#     i_datasets: Tuple[int],
#     backbone_name: str,
#     image_resize: int,
#     batch_size: int,
#     num_train_epochs: int = 5,
# ):
#     from clearml import Task
#     import json

#     class TaskURIs:
#         def __init__(self, project, pipeline_name, run_id):
#             path_pref = f"{project}/{pipeline_name}"
#             self.tboard = f"{path_pref}/tboard/{run_id}"
#             self.models = f"{path_pref}/models/{run_id}"
#             self.evaluations = f"{path_pref}/evaluations/{run_id}"

#     project_name = "lavi-testing"
#     pipeline_name = "fastai_image_classification"

#     pipeline_task = Task.current_task()
#     print("pipeline task=", pipeline_task)
#     #     config = {"run_id": run_id}
#     #     config["backbone_name"] = backbone_name
#     #     config["i_datasets"] = i_datasets

#     #     config["per_sub_run_configs"] = []

#     #     if pipeline_task:
#     #         config = pipeline_task.connect_configuration(config, name="config")
#     for i_dataset in i_datasets:
#         sub_run_id = run_id + f"_{i_dataset}"
#         print("sub_run_id:", sub_run_id)
#         #         sub_run_configs = {"sub_run_id": sub_run_id}

#         run_uris = TaskURIs(
#             project=project_name, pipeline_name=pipeline_name, run_id=sub_run_id
#         )

#         #         sub_run_configs["uris"] = json.loads(json.dumps(vars(run_uris), default=str))

#         print("make dataset")
#         training_dataset = make_new_dataset(
#             project=project_name, i_dataset=i_dataset, num_samples_per_chunk=500
#         )
#         #         sub_run_configs["uris"]["training_dataset"] = {
#         #             "id": training_dataset.id,
#         #             "name": training_dataset.name,
#         #         }

#         print("train model")
#         run_model_path, run_tb_path = train_image_classifier(
#             clearml_dataset=training_dataset,
#             backbone_name=backbone_name,
#             image_resize=image_resize,
#             batch_size=batch_size,
#             run_model_uri=run_uris.models,
#             run_tb_uri=run_uris.tboard,
#             local_data_path="/data",
#             num_epochs=num_train_epochs,
#         )
#         #         sub_run_configs["uris"]["run_model_path"] = str(run_model_path)

#         print("evaluate model")
#         run_eval_path = eval_model(
#             run_learner_path=run_model_path,
#             run_id=sub_run_id,
#             dataset_name="pets_evaluation",
#             dataset_project="lavi-testing",
#             run_eval_uri=run_uris.evaluations,
#             image_resize=image_resize,
#             local_data_path="/data",
#         )
#     #         sub_run_configs["uris"]["run_eval_path"] = str(run_eval_path)
#     #         # clearml_task.close()
#     #         config["per_sub_run_configs"].append(sub_run_configs)

#     print("pipeline complete")

In [None]:
from image_classifier_training.pipeline_functions import *
# def fastai_image_classification(
#     run_id: str,
#     i_datasets: Tuple[int],
#     backbone_name: str,
#     image_resize: int,
#     batch_size: int,
#     num_train_epochs: int = 5,
# ):
#     from clearml import Task
#     import json

#     class TaskURIs:
#         def __init__(self, project, pipeline_name, run_id):
#             path_pref = f"{project}/{pipeline_name}"
#             self.tboard = f"{path_pref}/tboard/{run_id}"
#             self.models = f"{path_pref}/models/{run_id}"
#             self.evaluations = f"{path_pref}/evaluations/{run_id}"

#     project_name = "lavi-testing"
#     pipeline_name = "fastai_image_classification"

#     pipeline_task = Task.current_task()
#     print("pipeline task=", pipeline_task)
#     #     config = {"run_id": run_id}
#     #     config["backbone_name"] = backbone_name
#     #     config["i_datasets"] = i_datasets

#     #     config["per_sub_run_configs"] = []

#     #     if pipeline_task:
#     #         config = pipeline_task.connect_configuration(config, name="config")
#     for i_dataset in i_datasets:
#         sub_run_id = run_id + f"_{i_dataset}"
#         print("sub_run_id:", sub_run_id)
#         #         sub_run_configs = {"sub_run_id": sub_run_id}

#         run_uris = TaskURIs(
#             project=project_name, pipeline_name=pipeline_name, run_id=sub_run_id
#         )

#         #         sub_run_configs["uris"] = json.loads(json.dumps(vars(run_uris), default=str))

#         print("make dataset")
#         training_dataset = make_new_dataset(
#             project=project_name, i_dataset=i_dataset, num_samples_per_chunk=500
#         )
#         #         sub_run_configs["uris"]["training_dataset"] = {
#         #             "id": training_dataset.id,
#         #             "name": training_dataset.name,
#         #         }

#         print("train model")
#         run_model_path, run_tb_path = train_image_classifier(
#             clearml_dataset=training_dataset,
#             backbone_name=backbone_name,
#             image_resize=image_resize,
#             batch_size=batch_size,
#             run_model_uri=run_uris.models,
#             run_tb_uri=run_uris.tboard,
#             local_data_path="/data",
#             num_epochs=num_train_epochs,
#         )
#         #         sub_run_configs["uris"]["run_model_path"] = str(run_model_path)

#         print("evaluate model")
#         run_eval_path = eval_model(
#             run_learner_path=run_model_path,
#             run_id=sub_run_id,
#             dataset_name="pets_evaluation",
#             dataset_project="lavi-testing",
#             run_eval_uri=run_uris.evaluations,
#             image_resize=image_resize,
#             local_data_path="/data",
#         )
#     #         sub_run_configs["uris"]["run_eval_path"] = str(run_eval_path)
#     #         # clearml_task.close()
#     #         config["per_sub_run_configs"].append(sub_run_configs)

#     print("pipeline complete")

# from datetime import datetime

# run_id = f"run_{datetime.utcnow().strftime('%Y_%m_%dT%H_%M_%S.%f')[:-3]}"
# # PipelineDecorator.set_default_execution_queue("default")
# PipelineDecorator.run_locally()

# # i_datasets = (0,1,2,3)
# # backbones = ["resvent"]
# # train_params = [{
# #     "backbone_name": "resnet34"
# #     "image_resize": 224

# # }]
# i_datasets = (0, 1)

# fastai_image_classification(
#     run_id=run_id,
#     i_datasets=i_datasets,
#     backbone_name="efficientnetv2_rw_s",
#     image_resize=288,
#     batch_size=16,
#     num_train_epochs=5,
# )

In [None]:
# import timm

In [None]:
# timm.list_models(pretrained=True)

In [None]:
import timm 
import torch

model = timm.create_model('efficientnetv2_rw_s')
x     = torch.randn(1, 3, 224, 224)
model(x).shape

In [None]:
?PipelineDecorator.run_locally