In [None]:
SMOKE_TEST = True
# SMOKE_TEST = False

# Number of models to train - None means all models (ignored in smoke test mode)
NUM_MODELS = None

SKIP_TRAINED = True

# Text Detection Training


In [2]:
import warnings

# Ignore all UserWarnings emitted from any submodule of torch
warnings.filterwarnings(
    "ignore",
    category=UserWarning,
    module=r"torch.*"
)
# Ignore all UserWarnings emitted from any submodule of torch
warnings.filterwarnings(
    "ignore",
    category=UserWarning,
    module=r"mmcv.*"
)

In [None]:
ROOT_CONFIG_FOLDER = 'configs/textdet'
CONFIG_FOLDER_TO_CKPT = {
    "dbnetpp_custom": "https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015_20221101_124139-4ecb39ac.pth",
    "dbnet_custom": "https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015/dbnet_resnet50-oclip_1200e_icdar2015_20221102_115917-bde8c87a.pth",
    "psenet_custom": "https://download.openmmlab.com/mmocr/textdet/psenet/psenet_resnet50-oclip_fpnf_600e_icdar2015/psenet_resnet50-oclip_fpnf_600e_icdar2015_20221101_131357-2bdca389.pth",
    "panet_custom": "https://download.openmmlab.com/mmocr/textdet/panet/panet_resnet18_fpem-ffm_600e_icdar2015/panet_resnet18_fpem-ffm_600e_icdar2015_20220826_144817-be2acdb4.pth",
    "textsnake_custom": "https://download.openmmlab.com/mmocr/textdet/textsnake/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500_20221101_134814-a216e5b2.pth",
    "fcenet_custom": "https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_icdar2015/fcenet_resnet50-oclip_fpn_1500e_icdar2015_20221101_150145-5a6fc412.pth"
}

In [None]:
#@title Train single model

from pathlib import Path
from mmengine.runner import Runner
import time
from mmengine import Config
from dotenv import load_dotenv

if SMOKE_TEST:
    load_dotenv() # NOTE: make sure to reload notebook when changing .env to use new env variables
    %cd ~/bonting-identification

    # model_config = 'dbnetpp_custom/dbnetpp_cegdr.py'
    # model_config = 'dbnet_custom/dbnet_cegdr.py'
    # model_config = 'psenet_custom/psenet_cegdr.py'
    model_config = 'panet_custom/panet_cegdr.py'
    # model_config = 'textsnake_custom/textsnake_cegdr.py'
    # model_config = 'fcenet_custom/fcenet_cegdr.py'

    cfg = Config.fromfile(Path(ROOT_CONFIG_FOLDER) / model_config)
    cfg['load_from'] = CONFIG_FOLDER_TO_CKPT[Path(model_config).parent.name]
    cfg.visualizer.name = f'{time.localtime()}'

    # Optionally, smoke test on 1 epoch
    cfg.train_cfg['max_epochs'] = 1

    runner = Runner.from_cfg(cfg)
    result = runner.train()

In [None]:
# !rm -rf work_dirs/*


In [None]:
#@title Train all models

import os
from mmengine.runner import Runner
import time
from mmengine import Config
import pandas as pd
from pathlib import Path
from dotenv import load_dotenv

if not SMOKE_TEST:
    load_dotenv() # NOTE: make sure to reload notebook when changing .env to use new env variables
    %cd ~/bonting-identification

    results = []
    model_configs = []
    ckpts = []

    # Determine how many models to train
    if NUM_MODELS is None:
        # Use all models when NUM_MODELS is None
        models_to_train = len(CONFIG_FOLDER_TO_CKPT)
    else:
        # Use specified number of models
        models_to_train = min(NUM_MODELS, len(CONFIG_FOLDER_TO_CKPT))
    
    for config_folder, model_url in list(CONFIG_FOLDER_TO_CKPT.items())[:models_to_train]:
        # Construct full path to config folder
        config_folder_path = Path(ROOT_CONFIG_FOLDER) / config_folder

        for model_config in [
            p for p in config_folder_path.glob('*.py')
            if not p.name.startswith('_base')
        ]:
            cfg = Config.fromfile(model_config)
            cfg['load_from'] = model_url
            cfg.visualizer.name = f'{time.localtime()}'

            # cfg.train_cfg['max_epochs'] = 1

            runner = Runner.from_cfg(cfg)
            result = runner.train()

            results.append(result)
            model_configs.append(Path(model_config).name.rstrip('.py'))
            ckpts.append(Path(model_url).parts[-2])


In [None]:
# results_df = pd.DataFrame(results)
# results_df.insert(0, 'model_config', model_configs)
# results_df.insert(1, 'ckpt', ckpts)
# results_df = results_df.set_index(['model_config', 'ckpt'])
# results_df.sort_values('cegdr/hmean', ascending=False, inplace=True)
# results_df


In [None]:
# save_path = Path('reports/eval/cegdr/textdet/mmocr_finetuned_det_results.csv')
# save_path.parent.mkdir(parents=True, exist_ok=True)
# print(f'Saving results to:\n{save_path}')
# results_df.to_csv(save_path, index=True, header=True)
