In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils import data as data_utils
from torchvision import datasets, models, transforms

from sklearn.model_selection import KFold, train_test_split

import albumentations as A
import albumentations_experimental as AE
from albumentations.pytorch import ToTensorV2
import cv2
from tqdm import tqdm

# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random, sys
import pandas as pd
import time
import copy

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.engine import DefaultTrainer

from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

# Connect your script to Neptune
# import neptune
# import neptune_config

from detectron2.structures import BoxMode

** fvcore version of PathManager will be deprecated soon. **
** Please migrate to the version in iopath repo. **
https://github.com/facebookresearch/iopath 



In [2]:
class Trainer(DefaultTrainer):
    """
    We use the "DefaultTrainer" which contains a number pre-defined logic for
    standard training workflow. They may not work for you, especially if you
    are working on a new research project. In that case you can use the cleaner
    "SimpleTrainer", or write your own training loop.
    """

    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        """
        Create evaluator(s) for a given dataset.
        This uses the special metadata "evaluator_type" associated with each builtin dataset.
        For your own dataset, you can simply create an evaluator manually in your
        script and do not have to worry about the hacky if-else logic here.
        """
        if output_folder is None:
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
        evaluator_list = []
        evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
        if evaluator_type in ["coco", "coco_panoptic_seg"]:
            evaluator_list.append(COCOEvaluator(dataset_name, cfg, True, output_folder))
        if len(evaluator_list) == 0:
            raise NotImplementedError(
                "no Evaluator for the dataset {} with the type {}".format(
                    dataset_name, evaluator_type
                )
            )
        if len(evaluator_list) == 1:
            return evaluator_list[0]
        return DatasetEvaluators(evaluator_list)

In [4]:
def train_val_split(imgs, keypoints, random_state=42):
    d = dict()
    for file in imgs:
        key = ''.join(file.split('-')[:-1])
        if key not in d.keys():
            d[key] = [file]
        else:
            d[key].append(file)
            
    np.random.seed(random_state)
    trains = []
    validations = []
    for key, value in d.items():
        r = np.random.randint(len(value), size=2)
        for i in range(len(value)):
            if i in r:
                validations.append(np.where(imgs == value[i])[0][0])
            else:
                trains.append(np.where(imgs == value[i])[0][0])
    return (
        imgs[trains], imgs[validations],
        keypoints[trains], keypoints[validations]
    )


In [5]:
def train_val_split2(augmented, train):
    train_imgs = train.iloc[:, 0].to_numpy()
    train_keypoints = train.iloc[:, 1:].to_numpy()
    aug_imgs = augmented.iloc[:, 0].to_numpy()
    aug_keypoints = augmented.iloc[:, 1:].to_numpy()
    return aug_imgs, train_imgs, aug_keypoints, train_keypoints

In [6]:
def get_data_dicts(data_dir, imgs, keypoints, phase):
#     train_dir = os.path.join(data_dir, "augmented" if phase=="train" else "train_imgs")
    train_dir = os.path.join(data_dir, "train_imgs")
    dataset_dicts = []

    for idx, item in tqdm(enumerate(zip(imgs, keypoints))):
        img, keypoint = item[0], item[1]

        record = {}
        filepath = os.path.join(train_dir, img)
        record["height"], record["width"] = cv2.imread(filepath).shape[:2]
        record["file_name"] = filepath
        record["image_id"] = idx

        keypoints_v = []
        for i, keypoint_ in enumerate(keypoint):
            keypoints_v.append(keypoint_) # if coco set, should be added 0.5
            if i % 2 == 1:
                keypoints_v.append(2)

        x = keypoint[0::2]
        y = keypoint[1::2]
        x_min, x_max = min(x), max(x)
        y_min, y_max = min(y), max(y)

        obj = {
            "bbox": [x_min, y_min, x_max, y_max],
            "bbox_mode": BoxMode.XYXY_ABS,
            "category_id": 0,
            "keypoints": keypoints_v
        }

        record["annotations"] = [obj]
        dataset_dicts.append(record)
    return dataset_dicts

In [7]:
data_dir = "../data/"
# aug_df = pd.read_csv(os.path.join(data_dir, "augmented.csv"))
train_df = pd.read_csv(os.path.join(data_dir, "train_df_modified.csv"))

keypoint_names = train_df.columns.to_list()[1:]
keypoint_flip_map = []
for i in range(0, len(keypoint_names) // 2, 2):
    keypoint_flip_map.append((keypoint_names[i], keypoint_names[i+1]))

columns = train_df.columns[1:].to_list()[::2]
keypoint_names = [
    label.replace("_x", '').replace("_y", '') for label in columns
]

In [8]:
imgs = train_df.iloc[:, 0].to_numpy()
keypoints = train_df.iloc[:, 1:].to_numpy()
imgs_train, imgs_val, keypoints_train, keypoints_val = \
    train_val_split(imgs, keypoints, random_state=42)

imgs_d = {
    "train": imgs_train,
    "val": imgs_val
}
keypoints_d = {
    "train": keypoints_train,
    "val": keypoints_val
}

for d in ["train", "val"]:
    DatasetCatalog.register(
        "keypoints_" + d,
        lambda d=d: get_data_dicts(
            data_dir, imgs_d[d], keypoints_d[d], phase=d
        )
    )
    MetadataCatalog.get("keypoints_" + d).set(
        thing_classes=["human"]
    )
    MetadataCatalog.get("keypoints_" + d).set(
        keypoint_names=keypoint_names
    )
    MetadataCatalog.get("keypoints_" + d).set(
        keypoint_flip_map=keypoint_flip_map
    )
    MetadataCatalog.get("keypoints_" + d).set(
        evaluator_type="coco"
    )

motions_metadata = MetadataCatalog.get("keypoints_train")
print(motions_metadata)

Metadata(evaluator_type='coco', keypoint_flip_map=[('nose_x', 'nose_y'), ('left_eye_x', 'left_eye_y'), ('right_eye_x', 'right_eye_y'), ('left_ear_x', 'left_ear_y'), ('right_ear_x', 'right_ear_y'), ('left_shoulder_x', 'left_shoulder_y'), ('right_shoulder_x', 'right_shoulder_y'), ('left_elbow_x', 'left_elbow_y'), ('right_elbow_x', 'right_elbow_y'), ('left_wrist_x', 'left_wrist_y'), ('right_wrist_x', 'right_wrist_y'), ('left_hip_x', 'left_hip_y')], keypoint_names=['nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear', 'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow', 'left_wrist', 'right_wrist', 'left_hip', 'right_hip', 'left_knee', 'right_knee', 'left_ankle', 'right_ankle', 'neck', 'left_palm', 'right_palm', 'spine2(back)', 'spine1(waist)', 'left_instep', 'right_instep'], name='keypoints_train', thing_classes=['human'])


In [9]:
# keypoint_rcnn_R_50_FPN_3x.yaml
# keypoint_rcnn_X_101_32x8d_FPN_3x.yaml
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("keypoints_train",)
cfg.DATASETS.TEST = ("keypoints_val",)
cfg.DATALOADER.NUM_WORKERS = 0
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
# cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.BASE_LR = 0.001  # pick a good LR
cfg.SOLVER.MAX_ITER = 5000    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = []         # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256   # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 24
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.

cfg.TEST.KEYPOINT_OKS_SIGMAS = kpt_oks_sigmas=np.ones((24, 1), dtype=float).tolist()
cfg.TEST.EVAL_PERIOD = 500

In [10]:
# evaluator = COCOEvaluator("keypoints_val", ("bbox", "keypoints"), False, output_dir="./output/", kpt_oks_sigmas=np.ones((24, 1), dtype=float).tolist())

In [11]:
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()
# trainer.test(model=trainer.model, cfg=cfg, evaluators=evaluator)

# trainer.test(cfg, trainer.model)

[32m[03/24 19:00:43 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

3660it [01:14, 49.28it/s]

[32m[03/24 19:01:57 d2.data.build]: [0mRemoved 0 images with no usable annotations. 3660 images left.





[32m[03/24 19:01:57 d2.data.build]: [0mRemoved 0 images with fewer than 1 keypoints.
[32m[03/24 19:01:58 d2.data.build]: [0mDistribution of instances among all 1 categories:
[36m|  category  | #instances   |
|:----------:|:-------------|
|   human    | 3660         |
|            |              |[0m
[32m[03/24 19:01:58 d2.data.common]: [0mSerializing 3660 elements to byte tensors and concatenating them all ...
[32m[03/24 19:01:58 d2.data.common]: [0mSerialized dataset takes 4.75 MiB
[32m[03/24 19:01:58 d2.data.dataset_mapper]: [0mAugmentations used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()]
[32m[03/24 19:01:58 d2.data.build]: [0mUsing training sampler TrainingSampler


Skip loading parameter 'roi_heads.keypoint_head.score_lowres.weight' to the model due to incompatible shapes: (512, 17, 4, 4) in the checkpoint but (512, 24, 4, 4) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.keypoint_head.score_lowres.bias' to the model due to incompatible shapes: (17,) in the checkpoint but (24,) in the model! You might want to double check if this is expected.


[32m[03/24 19:01:59 d2.engine.train_loop]: [0mStarting training from iteration 0


	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  ..\torch\csrc\utils\python_arg_parser.cpp:766.)
  return x.nonzero().unbind(1)


[32m[03/24 19:02:32 d2.utils.events]: [0m eta: 2:15:49  iter: 19  total_loss: 8.357  loss_cls: 0.079  loss_box_reg: 0.094  loss_keypoint: 8.186  loss_rpn_cls: 0.000  loss_rpn_loc: 0.004  time: 1.6294  data_time: 0.1867  lr: 0.000020  max_mem: 6125M
[32m[03/24 19:03:04 d2.utils.events]: [0m eta: 2:15:44  iter: 39  total_loss: 8.377  loss_cls: 0.080  loss_box_reg: 0.102  loss_keypoint: 8.196  loss_rpn_cls: 0.001  loss_rpn_loc: 0.005  time: 1.6219  data_time: 0.1884  lr: 0.000040  max_mem: 6230M
[32m[03/24 19:03:37 d2.utils.events]: [0m eta: 2:16:07  iter: 59  total_loss: 8.258  loss_cls: 0.074  loss_box_reg: 0.089  loss_keypoint: 8.065  loss_rpn_cls: 0.000  loss_rpn_loc: 0.005  time: 1.6305  data_time: 0.1874  lr: 0.000060  max_mem: 6230M
[32m[03/24 19:04:09 d2.utils.events]: [0m eta: 2:15:44  iter: 79  total_loss: 8.138  loss_cls: 0.055  loss_box_reg: 0.076  loss_keypoint: 8.014  loss_rpn_cls: 0.000  loss_rpn_loc: 0.004  time: 1.6247  data_time: 0.1879  lr: 0.000080  max_mem: 62

512it [00:10, 49.35it/s]

[32m[03/24 19:15:48 d2.data.build]: [0mDistribution of instances among all 1 categories:
[36m|  category  | #instances   |
|:----------:|:-------------|
|   human    | 512          |
|            |              |[0m





[32m[03/24 19:15:48 d2.data.common]: [0mSerializing 512 elements to byte tensors and concatenating them all ...
[32m[03/24 19:15:48 d2.data.common]: [0mSerialized dataset takes 0.66 MiB
[32m[03/24 19:15:48 d2.data.dataset_mapper]: [0mAugmentations used in training: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[03/24 19:15:48 d2.evaluation.coco_evaluation]: [0m'keypoints_val' is not registered by `register_coco_instances`. Therefore trying to convert it to COCO format ...
[32m[03/24 19:15:48 d2.evaluation.evaluator]: [0mStart inference on 512 images
[32m[03/24 19:15:52 d2.evaluation.evaluator]: [0mInference done 11/512. 0.2415 s / img. ETA=0:02:41
[32m[03/24 19:15:57 d2.evaluation.evaluator]: [0mInference done 26/512. 0.2437 s / img. ETA=0:02:43
[32m[03/24 19:16:02 d2.evaluation.evaluator]: [0mInference done 41/512. 0.2442 s / img. ETA=0:02:40
[32m[03/24 19:16:08 d2.evaluation.evaluator]: [0mInference done 56/512. 0.2443 

512it [00:10, 47.15it/s]

[32m[03/24 19:32:46 d2.data.common]: [0mSerializing 512 elements to byte tensors and concatenating them all ...





[32m[03/24 19:32:46 d2.data.common]: [0mSerialized dataset takes 0.66 MiB
[32m[03/24 19:32:46 d2.data.dataset_mapper]: [0mAugmentations used in training: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[03/24 19:32:46 d2.evaluation.evaluator]: [0mStart inference on 512 images
[32m[03/24 19:32:50 d2.evaluation.evaluator]: [0mInference done 11/512. 0.2415 s / img. ETA=0:02:41
[32m[03/24 19:32:55 d2.evaluation.evaluator]: [0mInference done 26/512. 0.2434 s / img. ETA=0:02:43
[32m[03/24 19:33:00 d2.evaluation.evaluator]: [0mInference done 41/512. 0.2433 s / img. ETA=0:02:39
[32m[03/24 19:33:05 d2.evaluation.evaluator]: [0mInference done 56/512. 0.2437 s / img. ETA=0:02:34
[32m[03/24 19:33:10 d2.evaluation.evaluator]: [0mInference done 71/512. 0.2436 s / img. ETA=0:02:29
[32m[03/24 19:33:15 d2.evaluation.evaluator]: [0mInference done 86/512. 0.2434 s / img. ETA=0:02:24
[32m[03/24 19:33:20 d2.evaluation.evaluator]: [0mInference

314it [00:06, 52.27it/s]

[32m[03/24 19:49:56 d2.evaluation.evaluator]: [0mInference done 56/512. 0.2431 s / img. ETA=0:02:34
[32m[03/24 19:50:01 d2.evaluation.evaluator]: [0mInference done 71/512. 0.2437 s / img. ETA=0:02:29
[32m[03/24 19:50:06 d2.evaluation.evaluator]: [0mInference done 86/512. 0.2434 s / img. ETA=0:02:24
[32m[03/24 19:50:11 d2.evaluation.evaluator]: [0mInference done 101/512. 0.2433 s / img. ETA=0:02:19
[32m[03/24 19:50:16 d2.evaluation.evaluator]: [0mInference done 116/512. 0.2435 s / img. ETA=0:02:14
[32m[03/24 19:50:21 d2.evaluation.evaluator]: [0mInference done 131/512. 0.2439 s / img. ETA=0:02:09
[32m[03/24 19:50:26 d2.evaluation.evaluator]: [0mInference done 146/512. 0.2439 s / img. ETA=0:02:04
[32m[03/24 19:50:31 d2.evaluation.evaluator]: [0mInference done 161/512. 0.2438 s / img. ETA=0:01:58
[32m[03/24 19:50:36 d2.evaluation.evaluator]: [0mInference done 176/512. 0.2442 s / img. ETA=0:01:54
[32m[03/24 19:50:42 d2.evaluation.evaluator]: [0mInference done 191/512. 0

512it [00:10, 48.61it/s]

[32m[03/24 20:06:29 d2.data.common]: [0mSerializing 512 elements to byte tensors and concatenating them all ...
[32m[03/24 20:06:29 d2.data.common]: [0mSerialized dataset takes 0.66 MiB
[32m[03/24 20:06:29 d2.data.dataset_mapper]: [0mAugmentations used in training: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[03/24 20:06:29 d2.evaluation.evaluator]: [0mStart inference on 512 images





[32m[03/24 20:06:33 d2.evaluation.evaluator]: [0mInference done 11/512. 0.2415 s / img. ETA=0:02:41
[32m[03/24 20:06:38 d2.evaluation.evaluator]: [0mInference done 26/512. 0.2419 s / img. ETA=0:02:42
[32m[03/24 20:06:43 d2.evaluation.evaluator]: [0mInference done 41/512. 0.2428 s / img. ETA=0:02:38
[32m[03/24 20:06:48 d2.evaluation.evaluator]: [0mInference done 56/512. 0.2432 s / img. ETA=0:02:34
[32m[03/24 20:06:53 d2.evaluation.evaluator]: [0mInference done 71/512. 0.2432 s / img. ETA=0:02:29
[32m[03/24 20:06:58 d2.evaluation.evaluator]: [0mInference done 86/512. 0.2431 s / img. ETA=0:02:24
[32m[03/24 20:07:03 d2.evaluation.evaluator]: [0mInference done 101/512. 0.2431 s / img. ETA=0:02:19
[32m[03/24 20:07:09 d2.evaluation.evaluator]: [0mInference done 116/512. 0.2431 s / img. ETA=0:02:13
[32m[03/24 20:07:14 d2.evaluation.evaluator]: [0mInference done 131/512. 0.2431 s / img. ETA=0:02:08
[32m[03/24 20:07:19 d2.evaluation.evaluator]: [0mInference done 146/512. 0.24

512it [00:10, 49.13it/s]

[32m[03/24 20:23:23 d2.data.common]: [0mSerializing 512 elements to byte tensors and concatenating them all ...
[32m[03/24 20:23:23 d2.data.common]: [0mSerialized dataset takes 0.66 MiB
[32m[03/24 20:23:23 d2.data.dataset_mapper]: [0mAugmentations used in training: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[03/24 20:23:23 d2.evaluation.evaluator]: [0mStart inference on 512 images





[32m[03/24 20:23:27 d2.evaluation.evaluator]: [0mInference done 11/512. 0.2422 s / img. ETA=0:02:41
[32m[03/24 20:23:32 d2.evaluation.evaluator]: [0mInference done 26/512. 0.2428 s / img. ETA=0:02:43
[32m[03/24 20:23:37 d2.evaluation.evaluator]: [0mInference done 41/512. 0.2431 s / img. ETA=0:02:39
[32m[03/24 20:23:42 d2.evaluation.evaluator]: [0mInference done 56/512. 0.2429 s / img. ETA=0:02:34
[32m[03/24 20:23:47 d2.evaluation.evaluator]: [0mInference done 71/512. 0.2435 s / img. ETA=0:02:29
[32m[03/24 20:23:52 d2.evaluation.evaluator]: [0mInference done 86/512. 0.2438 s / img. ETA=0:02:24
[32m[03/24 20:23:57 d2.evaluation.evaluator]: [0mInference done 101/512. 0.2439 s / img. ETA=0:02:19
[32m[03/24 20:24:03 d2.evaluation.evaluator]: [0mInference done 116/512. 0.2445 s / img. ETA=0:02:14
[32m[03/24 20:24:08 d2.evaluation.evaluator]: [0mInference done 131/512. 0.2443 s / img. ETA=0:02:09
[32m[03/24 20:24:13 d2.evaluation.evaluator]: [0mInference done 146/512. 0.24

512it [00:10, 49.35it/s]

[32m[03/24 20:40:13 d2.data.common]: [0mSerializing 512 elements to byte tensors and concatenating them all ...
[32m[03/24 20:40:13 d2.data.common]: [0mSerialized dataset takes 0.66 MiB
[32m[03/24 20:40:13 d2.data.dataset_mapper]: [0mAugmentations used in training: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[03/24 20:40:13 d2.evaluation.evaluator]: [0mStart inference on 512 images





[32m[03/24 20:40:16 d2.evaluation.evaluator]: [0mInference done 11/512. 0.2431 s / img. ETA=0:02:41
[32m[03/24 20:40:21 d2.evaluation.evaluator]: [0mInference done 26/512. 0.2421 s / img. ETA=0:02:42
[32m[03/24 20:40:27 d2.evaluation.evaluator]: [0mInference done 41/512. 0.2420 s / img. ETA=0:02:38
[32m[03/24 20:40:32 d2.evaluation.evaluator]: [0mInference done 56/512. 0.2426 s / img. ETA=0:02:33
[32m[03/24 20:40:37 d2.evaluation.evaluator]: [0mInference done 71/512. 0.2427 s / img. ETA=0:02:29
[32m[03/24 20:40:42 d2.evaluation.evaluator]: [0mInference done 86/512. 0.2440 s / img. ETA=0:02:24
[32m[03/24 20:40:47 d2.evaluation.evaluator]: [0mInference done 101/512. 0.2443 s / img. ETA=0:02:19
[32m[03/24 20:40:52 d2.evaluation.evaluator]: [0mInference done 116/512. 0.2444 s / img. ETA=0:02:14
[32m[03/24 20:40:57 d2.evaluation.evaluator]: [0mInference done 131/512. 0.2443 s / img. ETA=0:02:09
[32m[03/24 20:41:02 d2.evaluation.evaluator]: [0mInference done 146/512. 0.24

512it [00:10, 49.20it/s]

[32m[03/24 20:57:07 d2.data.common]: [0mSerializing 512 elements to byte tensors and concatenating them all ...





[32m[03/24 20:57:07 d2.data.common]: [0mSerialized dataset takes 0.66 MiB
[32m[03/24 20:57:07 d2.data.dataset_mapper]: [0mAugmentations used in training: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[03/24 20:57:07 d2.evaluation.evaluator]: [0mStart inference on 512 images
[32m[03/24 20:57:11 d2.evaluation.evaluator]: [0mInference done 11/512. 0.2420 s / img. ETA=0:02:42
[32m[03/24 20:57:16 d2.evaluation.evaluator]: [0mInference done 26/512. 0.2426 s / img. ETA=0:02:43
[32m[03/24 20:57:21 d2.evaluation.evaluator]: [0mInference done 41/512. 0.2427 s / img. ETA=0:02:39
[32m[03/24 20:57:26 d2.evaluation.evaluator]: [0mInference done 56/512. 0.2430 s / img. ETA=0:02:34
[32m[03/24 20:57:31 d2.evaluation.evaluator]: [0mInference done 71/512. 0.2429 s / img. ETA=0:02:29
[32m[03/24 20:57:37 d2.evaluation.evaluator]: [0mInference done 86/512. 0.2428 s / img. ETA=0:02:24
[32m[03/24 20:57:42 d2.evaluation.evaluator]: [0mInference

512it [00:10, 49.43it/s]

[32m[03/24 21:13:55 d2.data.common]: [0mSerializing 512 elements to byte tensors and concatenating them all ...
[32m[03/24 21:13:55 d2.data.common]: [0mSerialized dataset takes 0.66 MiB
[32m[03/24 21:13:55 d2.data.dataset_mapper]: [0mAugmentations used in training: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[03/24 21:13:55 d2.evaluation.evaluator]: [0mStart inference on 512 images





[32m[03/24 21:13:59 d2.evaluation.evaluator]: [0mInference done 11/512. 0.2426 s / img. ETA=0:02:41
[32m[03/24 21:14:04 d2.evaluation.evaluator]: [0mInference done 26/512. 0.2419 s / img. ETA=0:02:42
[32m[03/24 21:14:09 d2.evaluation.evaluator]: [0mInference done 41/512. 0.2428 s / img. ETA=0:02:39
[32m[03/24 21:14:15 d2.evaluation.evaluator]: [0mInference done 56/512. 0.2426 s / img. ETA=0:02:34
[32m[03/24 21:14:20 d2.evaluation.evaluator]: [0mInference done 71/512. 0.2431 s / img. ETA=0:02:29
[32m[03/24 21:14:25 d2.evaluation.evaluator]: [0mInference done 86/512. 0.2434 s / img. ETA=0:02:24
[32m[03/24 21:14:30 d2.evaluation.evaluator]: [0mInference done 101/512. 0.2437 s / img. ETA=0:02:19
[32m[03/24 21:14:35 d2.evaluation.evaluator]: [0mInference done 116/512. 0.2439 s / img. ETA=0:02:14
[32m[03/24 21:14:40 d2.evaluation.evaluator]: [0mInference done 131/512. 0.2439 s / img. ETA=0:02:09
[32m[03/24 21:14:45 d2.evaluation.evaluator]: [0mInference done 146/512. 0.24

512it [00:10, 49.52it/s]

[32m[03/24 21:30:45 d2.data.common]: [0mSerializing 512 elements to byte tensors and concatenating them all ...





[32m[03/24 21:30:45 d2.data.common]: [0mSerialized dataset takes 0.66 MiB
[32m[03/24 21:30:45 d2.data.dataset_mapper]: [0mAugmentations used in training: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[03/24 21:30:45 d2.evaluation.evaluator]: [0mStart inference on 512 images
[32m[03/24 21:30:48 d2.evaluation.evaluator]: [0mInference done 11/512. 0.2416 s / img. ETA=0:02:41
[32m[03/24 21:30:53 d2.evaluation.evaluator]: [0mInference done 26/512. 0.2422 s / img. ETA=0:02:42
[32m[03/24 21:30:59 d2.evaluation.evaluator]: [0mInference done 41/512. 0.2422 s / img. ETA=0:02:38
[32m[03/24 21:31:04 d2.evaluation.evaluator]: [0mInference done 56/512. 0.2422 s / img. ETA=0:02:34
[32m[03/24 21:31:09 d2.evaluation.evaluator]: [0mInference done 71/512. 0.2430 s / img. ETA=0:02:29
[32m[03/24 21:31:14 d2.evaluation.evaluator]: [0mInference done 86/512. 0.2436 s / img. ETA=0:02:24
[32m[03/24 21:31:19 d2.evaluation.evaluator]: [0mInference

512it [00:10, 46.81it/s]

[32m[03/24 21:47:41 d2.data.common]: [0mSerializing 512 elements to byte tensors and concatenating them all ...





[32m[03/24 21:47:41 d2.data.common]: [0mSerialized dataset takes 0.66 MiB
[32m[03/24 21:47:41 d2.data.dataset_mapper]: [0mAugmentations used in training: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[03/24 21:47:41 d2.evaluation.evaluator]: [0mStart inference on 512 images
[32m[03/24 21:47:45 d2.evaluation.evaluator]: [0mInference done 11/512. 0.2480 s / img. ETA=0:02:46
[32m[03/24 21:47:50 d2.evaluation.evaluator]: [0mInference done 26/512. 0.2463 s / img. ETA=0:02:47
[32m[03/24 21:47:55 d2.evaluation.evaluator]: [0mInference done 41/512. 0.2459 s / img. ETA=0:02:42
[32m[03/24 21:48:00 d2.evaluation.evaluator]: [0mInference done 56/512. 0.2460 s / img. ETA=0:02:38
[32m[03/24 21:48:06 d2.evaluation.evaluator]: [0mInference done 71/512. 0.2460 s / img. ETA=0:02:33
[32m[03/24 21:48:11 d2.evaluation.evaluator]: [0mInference done 86/512. 0.2460 s / img. ETA=0:02:27
[32m[03/24 21:48:16 d2.evaluation.evaluator]: [0mInference

In [12]:
# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

preds = []
files = []
test_dir = os.path.join(data_dir, "test_imgs")
test_list = os.listdir(test_dir)
test_list.sort()
except_list = []
for file in tqdm(test_list):
    filepath = os.path.join(test_dir, file)
    # print(filepath)
    im = cv2.imread(filepath)
    outputs = predictor(im)
    outputs = outputs["instances"].to("cpu").get("pred_keypoints").numpy()
    files.append(file)
    pred = []
    try:
        for out in outputs[0]:
            pred.extend([float(e) for e in out[:2]])
    except:
        except_list.append(filepath)
        print(filepath)
    preds.append(pred)

100%|██████████████████████████████████████████████████████████████████████████████| 1600/1600 [08:11<00:00,  3.25it/s]


In [13]:
df_sub = pd.read_csv(f"../data/sample_submission.csv")
df = pd.DataFrame(columns=df_sub.columns)
df["image"] = files
df.iloc[:, 1:] = preds

df.to_csv(f"submissions.csv", index=False)

In [14]:
print(1)

1
