# MMPose fine tune test
Running this on some of our data before spending a ton of time setting up a new pipeline

First, we'll make sure torch, torchvision, mmpose etc are installed correctly. Making sure CUDA is in place

In [1]:
import torch, torchvision

print('torch version:', torch.__version__, '; CUDA available: ', torch.cuda.is_available())
print('torchvision version:', torchvision.__version__)

# Check MMPose installation
import mmpose

print('mmpose version:', mmpose.__version__)

# Check mmcv installation
from mmcv.ops import get_compiling_cuda_version, get_compiler_version

print('cuda version:', get_compiling_cuda_version())
print('compiler information:', get_compiler_version())

torch version: 2.1.0 ; CUDA available:  True
torchvision version: 0.16.0
mmpose version: 1.2.0
cuda version: 11.8
compiler information: GCC 9.3


Now we're going to fine-tune a few of the pre-trained animal pose models. 

## Create new dataset type

It looks like we first have to create a new type of dataset to account for the MARS pose configurations

This just defines the loader, which inherits from the BaseCodoStyleDataset class. 
From what I can tell, it loads the keypoint and skeleton definition using the python file that I edited, then loads the json file when it's called.

the actual loader comes from a mix of their demo and the loader from MARS

In [2]:
import json, yaml
import numpy as np
from typing import List, Callable, Optional, Sequence, Union # why can't we just use list? I don't know...

from mmpose.registry import DATASETS
from mmpose.datasets.datasets.base import BaseCocoStyleDataset
import os

# register the module so it can be called. Not sure where the register is kept. local?
@DATASETS.register_module()
class MARS_Datasets(BaseCocoStyleDataset):
    METAINFO: dict = dict(from_file='/home/kevin/git/Work_Tools/three_dee_reconstruction/MARS_annotation_dict.py')

    def _load_annotations(self) -> List[dict]:
        """Load data from annotations in MARS format"""

        # look to see if the file exists
        if not os.path.exists(self.ann_file):
            return FileNotFoundError()

        with open(self.ann_file) as anno_file:
            anns = []
            for line in anno_file:
                anns.append(json.loads(line)) # each is a separate json structure, need to put into a list

        data_list = []
        ann_id = 0 # simple iterator -- annotation #

        # iterate through each of the images
        for idx, ann in enumerate(anns):
            if 'annotatedResult' not in ann.keys(): # if it didn't get annotated, skip this loop
                continue

            temp_result = ann['annotatedResult']['annotationsFromAllWorkers'] # pull out the actual annotations

            img_h = eval(temp_result[0]['annotationData']['content'])['annotatedResult']['inputImageProperties']['height'] # height
            img_w = eval(temp_result[0]['annotationData']['content'])['annotatedResult']['inputImageProperties']['width'] # width

            # bounding box -- not in the metadata for these results
            if 'bbox' in ann.keys():
                x, y, w, h = ann['bbox']
                x1 = np.clip(x, 0, img_w - 1)
                y1 = np.clip(y, 0, img_h - 1)
                x2 = np.clip(x + w, 0, img_w - 1)
                y2 = np.clip(y + h, 0, img_h - 1)
                bbox = np.array([x1, y1, x2, y2], dtype=np.float32).reshape(1,4)
            else: # they should all just have the size of the image for the time being...
                bbox = np.array([0, 0, img_w, img_h], dtype=np.float32).reshape(1,4)
        

            # image info
            img_id = ann['source-ref'] # the full S3 path
            img_name = os.path.split(img_id)[-1] # get rid of everything but the image name

            # load labeled keypoints as [1, K]
            for w, worker_result in enumerate(temp_result):
                # pull out the results
                annot = eval(worker_result['annotationData']['content'])['annotatedResult'] # it's stored as a string for security reasons apparently

                # keypoints_dict = {item{'label'}:[item['x'],item['y']] from item in worker_result['keypoints']}
                keypoints = np.array([[item['x'],item['y']] for item in annot['keypoints']])
        
        

                # put into a data_info dict (for mmlab)
                data_info = {
                    'id': worker_result['workerId'],
                    'img_id': ann_id, # just an incrementer
                    'img_path': os.path.join(self.data_prefix['img'], img_name),  # there it is...
                    'bbox': bbox, # just the image size for now
                    'bbox_score': np.ones(1, dtype=np.float32), # 100% certain
                    'keypoints': np.expand_dims(keypoints, axis=0),
                    'keypoints_visible': np.ones([1,keypoints.shape[0]]) # potentially will need to change training for this
                }

            data_list.append(data_info)
            ann_id += 1
        
        return data_list, None
        

## Create the config file

mmmpose builds its models using config files rather than coding as far as I can tell. This means that we set up our model to train etc using the config file editor

In [3]:
from mmengine import Config

cfg = Config.fromfile(
    '/home/kevin/git/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w48_8xb64-210e_ap10k-256x256.py' # pull in a base HRNet trained from ap10k
)


# set bbasic configs
cfg.data_root = '/home/kevin/Downloads/OpenField_Data/'
cfg.work_dir = 'work_dirs/hrnet_w48_ap10k-256x256'
cfg.randomness = dict(seed=0)

# set log interval
cfg.train_cfg.val_interval=1

# set num of epochs
cfg.train_cfg.max_epochs = 40


# These batch sizes are set up in the demo, but we'll keep the default values for now
# cfg.train_dataloader.batch_size = 16
# cfg.val_dataloader.batch_size = 16
# cfg.test_dataloader.batch_size = 16

# set dataset configs
cfg.dataset_type = 'MARS_Datasets'

# train datasets
cfg.train_dataloader.dataset.type = cfg.dataset_type
cfg.train_dataloader.dataset.ann_file = 'output_open_body_train.manifest'
cfg.train_dataloader.dataset.data_root = cfg.data_root
cfg.train_dataloader.dataset.data_prefix = dict(img='images/')

# test dataset
cfg.test_dataloader.dataset.type = cfg.dataset_type
cfg.test_dataloader.dataset.ann_file = 'output_open_body_test.manifest'
cfg.test_dataloader.dataset.data_root = cfg.data_root
cfg.test_dataloader.dataset.data_prefix = dict(img='images/')
cfg

# validation dataset
cfg.val_dataloader.dataset.type = cfg.dataset_type
cfg.val_dataloader.dataset.ann_file = 'output_open_body_val.manifest'
cfg.val_dataloader.dataset.data_root = cfg.data_root
cfg.val_dataloader.dataset.data_prefix = dict(img='images/')

# evaluation (for some reason that word looks weird)
cfg.val_evaluator = dict(type='PCKAccuracy')
cfg.test_evaluator = cfg.val_evaluator

cfg.default_hooks.checkpoint.save_best = 'PCK'
cfg.default_hooks.checkpoint.max_keep_ckpts = 3


# change the output layer so that we have the right number of keypoints
cfg.model.head.out_channels = 10 # 10 keypoints for the body


# the checkpoint in the demo doesn't align with what's in their model zoo, so we'll use the model zoo version
cfg.model.backbone.init_cfg.checkpoint = 'https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_ap10k_256x256-d95ab412_20211029.pth'


print(cfg.pretty_text)

auto_scale_lr = dict(base_batch_size=512)
backend_args = dict(backend='local')
codec = dict(
    heatmap_size=(
        64,
        64,
    ),
    input_size=(
        256,
        256,
    ),
    sigma=2,
    type='MSRAHeatmap')
custom_hooks = [
    dict(type='SyncBuffersHook'),
]
data_mode = 'topdown'
data_root = '/home/kevin/Downloads/OpenField_Data/'
dataset_type = 'MARS_Datasets'
default_hooks = dict(
    badcase=dict(
        badcase_thr=5,
        enable=False,
        metric_type='loss',
        out_dir='badcase',
        type='BadCaseAnalysisHook'),
    checkpoint=dict(
        interval=10,
        max_keep_ckpts=3,
        rule='greater',
        save_best='PCK',
        type='CheckpointHook'),
    logger=dict(interval=50, type='LoggerHook'),
    param_scheduler=dict(type='ParamSchedulerHook'),
    sampler_seed=dict(type='DistSamplerSeedHook'),
    timer=dict(type='IterTimerHook'),
    visualization=dict(enable=False, type='PoseVisualizationHook'))
default_scope = 'mmpose'
en

# Train and Evaluation

Now to see if this is as easy as it seems to be...

In [4]:
from mmengine.config import DictAction
from mmengine.runner import Runner

cfg.model.setdefault('data_preprocessor', cfg.get('preprocess_cfg', {}))

# build the "runner"
runner = Runner.from_cfg(cfg)

# train it
model_trained = runner.train()

# test it
results = runner.test()

11/01 11:55:13 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.10.13 (main, Sep 11 2023, 13:44:35) [GCC 11.2.0]
    CUDA available: True
    numpy_random_seed: 0
    GPU 0,1: Quadro RTX 6000
    CUDA_HOME: None
    GCC: gcc (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0
    PyTorch: 2.1.0
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201703
  - Intel(R) oneAPI Math Kernel Library Version 2023.1-Product Build 20230303 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v3.1.1 (Git Hash 64f6bcbcbab628e96f33a62c3e975f8535a7bde4)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX512
  - CUDA Runtime 11.8
  - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,co

# Inferences and visualizations

modified from the mmpose example code

In [8]:
def visualize_img(img_path, detector, pose_estimator, visualizer,
                  show_interval, out_file):
    """Visualize predicted keypoints (and heatmaps) of one image."""

    # predict bbox
    scope = detector.cfg.get('default_scope', 'mmdet')
    if scope is not None:
        init_default_scope(scope)
    detect_result = inference_detector(detector, img_path)
    pred_instance = detect_result.pred_instances.cpu().numpy()
    bboxes = np.concatenate(
        (pred_instance.bboxes, pred_instance.scores[:, None]), axis=1)
    bboxes = bboxes[np.logical_and(pred_instance.labels == 0,
                                   pred_instance.scores > 0.3)]
    bboxes = bboxes[nms(bboxes, 0.3)][:, :4]

    # predict keypoints
    pose_results = inference_topdown(pose_estimator, img_path, bboxes)
    data_samples = merge_data_samples(pose_results)

    # show the results
    img = mmcv.imread(img_path, channel_order='rgb')

    visualizer.add_datasample(
        'result',
        img,
        data_sample=data_samples,
        draw_gt=False,
        draw_heatmap=True,
        draw_bbox=False,
        show=False,
        wait_time=show_interval,
        out_file=out_file,
        kpt_thr=0.3)

TypeError: TopdownPoseEstimator.predict() missing 2 required positional arguments: 'inputs' and 'data_samples'

In [None]:
from mmpose.apis.inferencers import MMPoseInferencer

# set up input args
input_dir = cfg.work_dir
vis_out_dir = path.join(input_dir,'visualizations')
pred_out_dir = path.join(input_dir, 'predictions')





# initialize an "inferencer"
inferencer = MMPoseInferencer()