In [1]:
from ignite.contrib.handlers.neptune_logger import (
    GradsScalarHandler,
    NeptuneLogger,
    NeptuneSaver,
    WeightsScalarHandler,
    global_step_from_engine,
)
from ignite.engine import Events, create_supervised_evaluator, create_supervised_trainer, Engine
from ignite.handlers import Checkpoint
from ignite.metrics import Accuracy, Loss
from ignite.utils import setup_logger

In [2]:
import cv2
import time
from helper import F2Metric
from tqdm import tqdm

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torch
import torchvision
from torch.utils.data import DataLoader, Dataset
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN

In [3]:
BASE_DIR = "/home/featurize/data/train_images"

In [4]:
df = pd.read_csv("/home/featurize/work/patric/train-validation-split/train-0.1.csv")
df['annotations'] = df['annotations'].apply(eval)
df['image_path'] = "video_" + df['video_id'].astype(str) + "/" + df['video_frame'].astype(str) + ".jpg"
df.head()

Unnamed: 0,video_id,sequence,video_frame,sequence_frame,image_id,annotations,n_annotations,has_annotations,image_path,subsequence_id,is_train
0,0,40258,0,0,0-0,[],0,False,video_0/0.jpg,1,True
1,0,40258,1,1,0-1,[],0,False,video_0/1.jpg,1,True
2,0,40258,2,2,0-2,[],0,False,video_0/2.jpg,1,True
3,0,40258,3,3,0-3,[],0,False,video_0/3.jpg,1,True
4,0,40258,4,4,0-4,[],0,False,video_0/4.jpg,1,True


In [5]:
df_train, df_val = df[df['is_train']], df[~df['is_train']]

In [6]:
len(df_train), len(df_val)

(21576, 1925)

In [7]:
df_train = df_train[df_train.annotations.str.len() > 0 ].reset_index(drop=True)
df_val = df_val[df_val.annotations.str.len() > 0 ].reset_index(drop=True)

In [8]:
len(df_train), len(df_val)

(4396, 523)

In [9]:
class ReefDataset:

    def __init__(self, df, transforms=None):
        self.df = df
        self.transforms = transforms

    def can_augment(self, boxes):
        """Check if bounding boxes are OK to augment

        For example: image_id 1-490 has a bounding box that is partially outside of the image
        It breaks albumentation
        Here we check the margins are within the image to make sure the augmentation can be applied
        """
        
        box_outside_image = ((boxes[:, 0] < 0).any() or (boxes[:, 1] < 0).any() 
                             or (boxes[:, 2] > 1280).any() or (boxes[:, 3] > 720).any())
        return not box_outside_image

    def get_boxes(self, row):
        """Returns the bboxes for a given row as a 3D matrix with format [x_min, y_min, x_max, y_max]"""

        boxes = pd.DataFrame(row['annotations'], columns=['x', 'y', 'width', 'height']).astype(float).values

        # Change from [x_min, y_min, w, h] to [x_min, y_min, x_max, y_max]
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        return boxes

    def get_image(self, row):
        """Gets the image for a given row"""

        image = cv2.imread(f'{BASE_DIR}/{row["image_path"]}', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        return image

    def __getitem__(self, i):

        row = self.df.iloc[i]
        image = self.get_image(row)
        boxes = self.get_boxes(row)
        
        n_boxes = boxes.shape[0]
        
        # Calculate the area
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        
        target = {
            'boxes': torch.as_tensor(boxes, dtype=torch.float32),
            'area': torch.as_tensor(area, dtype=torch.float32),
            
            'image_id': torch.tensor([i]),
            
            # There is only one class
            'labels': torch.ones((n_boxes,), dtype=torch.int64),
            
            # Suppose all instances are not crowd
            'iscrowd': torch.zeros((n_boxes,), dtype=torch.int64)
        }

        if self.transforms and self.can_augment(boxes):
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': target['labels']
            }
            sample = self.transforms(**sample)
            image = sample['image']

            if n_boxes > 0:
                target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
        else:
            image = ToTensorV2(p=1.0)(image=image)['image']

        return image, target

    def __len__(self):
        return len(self.df)

In [10]:
def get_train_transform():
    return A.Compose([
        A.Flip(0.5),
        # A.Normalize(),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})


def get_valid_transform():
    return A.Compose([
        # A.Normalize(),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

In [11]:
# Define datasets
ds_train = ReefDataset(df_train, get_train_transform())
ds_val = ReefDataset(df_val, get_valid_transform())

In [12]:
batch_size = 8
lr = 0.0003
max_epochs = 20

In [13]:
def collate_fn(batch):
    return tuple(zip(*batch))

dl_train = DataLoader(ds_train, batch_size=8, shuffle=False, num_workers=4, collate_fn=collate_fn)
dl_val = DataLoader(ds_val, batch_size=8, shuffle=False, num_workers=4, collate_fn=collate_fn)

### 模型 & 优化器

In [14]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2)
model.cuda()
optimizer = torch.optim.Adam(lr=0.0003, params=model.parameters())
scaler = torch.cuda.amp.GradScaler()
print('cool')

cool


In [15]:
epochs = 12
lr = 0.003

In [16]:
DEVICE = "cuda"

In [17]:
npt_logger = NeptuneLogger(
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIxMmYwNGVmYS03ODgxLTQ1MzEtOWQ2NS0yMTQwMzBmZGQ0YzcifQ==",
    project_name="chenglu.she/patric",
    name="baseline",
    params={
        "batch_size": batch_size,
        "max_epochs": max_epochs,
        "lr": lr,
        "optimizer": optimizer.__class__.__name__,
        "model": model.__class__.__name__
    },
)

https://app.neptune.ai/chenglu.she/patric/e/PAT-21


In [18]:
def update_model(engine, batch):
    model.train()
    images, targets = batch
    images = list(image.to(DEVICE) for image in images)
    targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]

    # Predict
    with torch.cuda.amp.autocast(enabled=True):
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

    optimizer.zero_grad()
    scaler.scale(losses).backward()
    scaler.step(optimizer)
    scaler.update()

    loss_items = {k: v.item() for k, v in loss_dict.items()}
    loss_items['sum'] = losses.item()

    # loss_classifier, loss_box_reg, loss_objectness, loss_rpn_box_reg, sum
    return loss_items


@torch.no_grad()
def evaluate_loss(engine, batch):
    model.train()
    images, targets = batch
    images = list(image.to(DEVICE) for image in images)
    targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]

    loss_dict = model(images, targets)
    losses = sum(loss for loss in loss_dict.values())
    loss_dict['sum'] = losses

    # loss_classifier, loss_box_reg, loss_objectness, loss_rpn_box_reg, sum
    return loss_dict


@torch.no_grad()
def evaluate_metric(engine, batch):
    model.eval()
    images, targets = batch
    images = list(image.to(DEVICE) for image in images)
    targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]

    output_dict = {
        "model_outputs": model(images, targets), # [{'boxes': torch.Tensor(N, 4), 'labels': torch.Tensor(N), 'scores': torch.Tensor(N)}, {....}, {....}]
        "images": images,
        "targets": targets
    }

    return output_dict

In [19]:
trainer = Engine(update_model)
loss_evaluator = Engine(evaluate_loss)
evaluator = Engine(evaluate_metric)

In [20]:
pbar = tqdm(initial=0, leave=False, total=len(dl_train), desc=f"ITERATION - loss: {0:.2f}")

@trainer.on(Events.ITERATION_COMPLETED(every=1))
def log_training_loss(engine):
    pbar.desc = f"ITERATION - loss: {engine.state.output['sum']:.2f}"
    pbar.update(1)

@trainer.on(Events.EPOCH_COMPLETED(every=1))
def log_training_loss(engine):
    pbar.refresh()

ITERATION - loss: 0.00:   0%|          | 0/550 [00:00<?, ?it/s]

In [21]:
def create_loss_metric(name):
    return Loss(
        loss_fn=lambda y, _: y,
        output_transform=lambda o: (o[name], o[name]),
        batch_size=lambda *_: 1,
    )

metrics = {
    "loss_classifier": create_loss_metric('loss_classifier'),
    "loss_box_reg": create_loss_metric('loss_box_reg'),
    "loss_objectness": create_loss_metric('loss_objectness'),
    "loss_rpn_box_reg": create_loss_metric('loss_rpn_box_reg'),
    "sum": create_loss_metric('sum'),
}

for name, metric in metrics.items():
    metric.attach(loss_evaluator, name)

score = F2Metric()
score.attach(evaluator, 'score')

@trainer.on(Events.EPOCH_COMPLETED)
def _evaluate_loss(engine):
    loss_evaluator.run(dl_val)
    evaluator.run(dl_val)

In [22]:
npt_logger.attach_output_handler(
    trainer,
    event_name=Events.ITERATION_COMPLETED(every=12),
    tag="training",
    output_transform=lambda loss: {"batchloss": loss["sum"]},
)

npt_logger.attach_output_handler(
    loss_evaluator,
    event_name=Events.EPOCH_COMPLETED,
    tag="val",
    metric_names=["loss_classifier", "loss_box_reg", "loss_objectness", "loss_rpn_box_reg", "sum"],
    global_step_transform=global_step_from_engine(trainer),
)

npt_logger.attach_output_handler(
    evaluator,
    event_name=Events.EPOCH_COMPLETED,
    tag="val",
    metric_names=["score"],
    global_step_transform=global_step_from_engine(trainer),
)

<ignite.engine.events.RemovableEventHandle at 0x7f89e9370610>

In [23]:
trainer.logger = setup_logger("Trainer")

In [24]:
trainer.run(dl_train, max_epochs=max_epochs)

2021-12-08 13:40:07,765 Trainer INFO: Engine run starting with max_epochs=20.
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
ITERATION - loss: 0.18: 100%|██████████| 550/550 [05:07<00:00,  2.24it/s]2021-12-08 13:46:36,195 Trainer INFO: Epoch[1] Complete. Time taken: 00:06:28
ITERATION - loss: 0.16: 1100it [11:38,  2.19it/s]                        2021-12-08 13:53:03,346 Trainer INFO: Epoch[2] Complete. Time taken: 00:06:27
ITERATION - loss: 0.22: 1650it [18:06,  2.24it/s]2021-12-08 13:59:25,657 Trainer INFO: Epoch[3] Complete. Time taken: 00:06:22
ITERATION - loss: 0.19: 2200it [24:29,  2.22it/s]2021-12-08 14:05:48,684 Trainer INFO: Epoch[4] Complete. Time taken: 00:06:23
ITERATION - loss: 0.23: 2750it [30:52,  2.23it/s]2021-12-08 14:12:12,471 Trainer INFO: Epoch[5] Complete. Time taken: 00:06:24
ITERATION - loss: 0.23: 3300it [37:13,  2.28it/s]2021-12-08 14:18:33,956 Trainer INFO: Epoch[6] Complete. Time taken: 00:06:21
ITERATION - loss: 0.21: 3850it [43:38,  2

State:
	iteration: 11000
	epoch: 20
	epoch_length: 550
	max_epochs: 20
	max_iters: <class 'NoneType'>
	output: <class 'dict'>
	batch: <class 'tuple'>
	metrics: <class 'dict'>
	dataloader: <class 'torch.utils.data.dataloader.DataLoader'>
	seed: <class 'NoneType'>
	times: <class 'dict'>