In [None]:
!git clone https://github.com/pankratozzi/Yolov7-training
!pip install -qq pytorch_accelerated func_to_script pycocotools
    
import sys
sys.path.append("/kaggle/working/Yolov7-training/")

In [None]:
import warnings
warnings.filterwarnings("ignore")
import os

import pandas as pd
import numpy as np

from tqdm import trange
from tqdm.contrib.concurrent import process_map

import random
from functools import partial
from pathlib import Path
from glob import glob

from func_to_script import script
from PIL import Image
import matplotlib.pyplot as plt

import torch
from pytorch_accelerated.callbacks import (
    TrainerCallback,
    EarlyStoppingCallback,
    ModelEmaCallback,
    ProgressBarCallback,
    SaveBestModelCallback,
    get_default_callbacks,
)

from pytorch_accelerated.utils import local_process_zero_only
from pytorch_accelerated.schedulers import CosineLrScheduler
from torch.utils.data import Dataset

from yolov7 import create_yolov7_model
from yolov7.dataset import (
    Yolov7Dataset,
    create_base_transforms,
    create_yolov7_transforms,
    yolov7_collate_fn,
)
from yolov7.evaluation import CalculateMeanAveragePrecisionCallback
from yolov7.loss_factory import create_yolov7_loss
from yolov7.mosaic import MosaicMixupDataset, create_post_mosaic_transform
from yolov7.trainer import Yolov7Trainer, filter_eval_predictions
from yolov7.utils import SaveBatchesCallback, Yolov7ModelEma

from yolov7.plotting import show_image

In [None]:
def seed_everything(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything()

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device.upper()} device.")

root = "/kaggle/input/wider-data/WIDER/"
pretrained = True  # True to get COCO weights
image_size = 640
batch_size = 2
num_epochs = 16
num_classes = 1

In [None]:
model = create_yolov7_model(
        architecture="yolov7", num_classes=num_classes, pretrained=pretrained
    ).to(device)

In [None]:
def read_annots(file, max_faces=20):
    def get_coords(line):
        coor = line.split(" ")
        xywh = [int(coor[i]) for i in range(4)] if(len(coor) > 4) else None
        return xywh
    
    data = []
    
    with open(file, "r") as f:
        lines = f.readlines()
        for i, line in enumerate(lines):
            if ".jpg" in line:
                annot = {
                    "path": line.strip(),
                    "box_num": int(lines[i+1]),
                    "boxes": [],
                    "label": [],
                }
                if max_faces >= annot["box_num"]:
                    for j in range(annot["box_num"]):
                        box = get_coords(lines[i+2+j].replace("\n", ""))
                        if box is not None:
                            x,y,w,h = box
                            
                            # xmin, ymin, xmax, ymax
                            box = [x, y, x+w, y+h]
                            annot["boxes"].append(box)
                            annot["label"].append(0)  # 1
                    if len(annot["boxes"]) > 0:
                        data.append(annot)
    return data

In [None]:
train = read_annots("../input/wider-data/WIDER/wider_face_train_bbx_gt.txt")
valid = read_annots("../input/wider-data/WIDER/wider_face_val_bbx_gt.txt")

In [None]:
x_train = pd.DataFrame.from_records(train, columns=list(train[0].keys())).explode("boxes").reset_index(drop=True)
train_points = pd.DataFrame(x_train["boxes"].values.tolist(), columns=["xmin", "ymin", "xmax", "ymax"], index=x_train.index)
x_train = pd.concat([x_train, train_points], axis=1)
x_train = x_train[~((x_train.xmin == x_train.xmax) | (x_train.ymin == x_train.ymax))]
x_train["class_id"] = 0
image_ids = {p: i for i, p in enumerate(x_train["path"].unique())}
x_train["image_id"] = x_train["path"].map(image_ids)

x_train["w"] = x_train["xmax"] - x_train["xmin"]
x_train["h"] = x_train["ymax"] - x_train["ymin"]

x_train.head()

In [None]:
x_valid = pd.DataFrame.from_records(valid, columns=list(train[0].keys())).explode("boxes").reset_index(drop=True)
valid_points = pd.DataFrame(x_valid["boxes"].values.tolist(), columns=["xmin", "ymin", "xmax", "ymax"], index=x_valid.index)
x_valid = pd.concat([x_valid, valid_points], axis=1)
x_valid = x_valid[~((x_valid.xmin == x_valid.xmax) | (x_valid.ymin == x_valid.ymax))]

x_valid["class_id"] = 0
image_ids = {p: i for i, p in enumerate(x_valid["path"].unique())}
x_valid["image_id"] = x_valid["path"].map(image_ids)

x_valid["w"] = x_valid["xmax"] - x_valid["xmin"]
x_valid["h"] = x_valid["ymax"] - x_valid["ymin"]

x_valid.head()

In [None]:
test_paths = glob("../input/wider-data/WIDER/WIDER_test/"+"*/*.jpg")

In [None]:
from yolov7.anchors import (calculate_resized_gt_wh, 
                            calculate_resized_gt_wh, 
                            calculate_best_possible_recall,
                            calculate_best_anchor_ratio, 
                            estimate_anchors
                           )
from scipy.cluster.vq import kmeans
LOSS_ANCHOR_MULTIPLE_THRESHOLD = 4.0


def find_image_sizes(path):
    image = Image.open(root+"WIDER_train/"+path)
    image = np.array(image)
    return path, (image.shape[1], image.shape[0])

image_sizes = process_map(find_image_sizes, [path for path in x_train.path.unique()])

image_sizes_df = pd.DataFrame(dict(image_sizes)).T.reset_index().rename(columns={"index": "path", 0: "image_w", 1: "image_h"})
x_train = x_train.merge(image_sizes_df, on="path", how="left")

# use one bbox from image to calculate anchors, otherwise dims in gt_wh and image_sizes don't match
raw_gt_wh = x_train.groupby("path").agg({"w": "last", "h": "last"}).values
image_sizes = image_sizes_df.groupby("path").agg({"image_w": "last", "image_h": "last"}).values

gt_wh = calculate_resized_gt_wh(raw_gt_wh, image_sizes, target_image_size=640)

current_anchors = model.detection_head.anchor_grid.clone().cpu().view(-1, 2)

recall = calculate_best_possible_recall(current_anchors, gt_wh)
print(f"Best possible recall with given images: {recall.item()}")

if recall < 0.98:

    proposed_anchors = estimate_anchors(9, gt_wh)
    recall = calculate_best_possible_recall(proposed_anchors, gt_wh)
    print(f"Best possible new recall with given images: {recall.item()}")

    model.update_anchors(np.sort(proposed_anchors, axis=0))  # sort as inbuilt method throws error
else:
    print(f"Default anchors are suitable for this task.")

In [None]:
class FaceDataset(Dataset):
    def __init__(self, data, train=True, transforms=None):
        self.annotations_df = data
        self.transforms = transforms
        self.sub_path = "WIDER_train/" if train else "WIDER_val/"

    def __len__(self):
        return self.annotations_df.image_id.nunique()

    def __getitem__(self, ix):
        image_id = self.annotations_df["image_id"].unique()[ix]
        image_info = self.annotations_df[self.annotations_df.image_id == image_id]
        path = image_info["path"].iloc[0]
        
        image = Image.open(root + self.sub_path + path).convert("RGB")
        image = np.array(image)
        image_hw = image.shape[:2]

        xyxy_bboxes = image_info[["xmin", "ymin", "xmax", "ymax"]].values
        class_ids = image_info["class_id"].values

        if self.transforms is not None:
            transformed = self.transforms(
                image=image, bboxes=xyxy_bboxes, labels=class_ids
            )
            image = transformed["image"]
            xyxy_bboxes = np.array(transformed["bboxes"])
            class_ids = np.array(transformed["labels"])

        return image, xyxy_bboxes, class_ids, image_id, image_hw

In [None]:
train_ds = FaceDataset(x_train, transforms=create_base_transforms(image_size))
valid_ds = FaceDataset(x_valid, train=False)

mds = MosaicMixupDataset(
        train_ds,
        apply_mixup_probability=0.15,
        post_mosaic_transforms=create_post_mosaic_transform(
        output_height=image_size, output_width=image_size
        ),
    )
if pretrained:
    # disable mosaic if finetuning
    mds.disable()
    
train_yds = Yolov7Dataset(
    mds,
    create_yolov7_transforms(training=True, image_size=(image_size, image_size)),
)
eval_yds = Yolov7Dataset(
    valid_ds,
    create_yolov7_transforms(training=False, image_size=(image_size, image_size)),
)

In [None]:
param_groups = model.get_parameter_groups()

loss_func = create_yolov7_loss(model, image_size=image_size)

optimizer = torch.optim.SGD(
    param_groups["other_params"], lr=0.01, momentum=0.9, nesterov=True  # 0.937
)

calculate_map_callback = (
    CalculateMeanAveragePrecisionCallback.create_from_targets_df(
        targets_df=x_valid[
            ["image_id", "xmin", "ymin", "xmax", "ymax", "class_id"]
        ],
        image_ids=set(x_valid.image_id.unique()),
        iou_threshold=0.2,
    )
)

In [None]:
class PlotImageCallback(TrainerCallback):
    def __init__(self, image_paths):
        self.image_paths = image_paths
        
    def plot_image(self):
        idx = np.random.randint(len(self.image_paths))
        path = self.image_paths[idx]
        image = Image.open(path).convert("RGB").resize((image_size, image_size))
        image = np.array(image)
        image_tensor = torch.FloatTensor(image / 255.).permute(2,0,1).unsqueeze(0).to(device)
        
        model.eval()
        with torch.no_grad():
            out = model(image_tensor)
        output = model.postprocess(out, conf_thres=0.001, max_detections=100, multiple_labels_per_box=True)  # postprocess 3 fpn outputs
        output = filter_eval_predictions(output, confidence_threshold=0.2, nms_threshold=0.65)  # nms
        output = output[0].cpu().detach().numpy()
        boxes = output[:, :4].tolist()
        labels = output[:, -1].astype(int).tolist()

        show_image(image, bboxes=boxes, 
                   class_labels=["face"]*len(boxes), 
                   bbox_format="xyxy")
        
        model.train()

    def on_eval_epoch_end(self, trainer, **kwargs):
        self.plot_image()

In [None]:
trainer = Yolov7Trainer(
        model=model,
        optimizer=optimizer,
        loss_func=loss_func,
        filter_eval_predictions_fn=partial(
            filter_eval_predictions, confidence_threshold=0.01, nms_threshold=0.3
        ),
        callbacks=[
                    PlotImageCallback(test_paths),
                    calculate_map_callback,
                    ModelEmaCallback(
                        decay=0.9999,
                        model_ema=Yolov7ModelEma,
                        callbacks=[ProgressBarCallback, calculate_map_callback],
                    ),
                    SaveBestModelCallback(watch_metric="map", greater_is_better=True),
                    SaveBatchesCallback("./batches", num_images_per_batch=2),
                    EarlyStoppingCallback(
                        early_stopping_patience=3,
                        watch_metric="map",
                        greater_is_better=True,
                        early_stopping_threshold=0.001,
                    ),
                    *get_default_callbacks(progress_bar=False),  # True 
                ],
    )

In [None]:
# calculate scaled weight decay and gradient accumulation steps (simulates larger batch size)
total_batch_size = (
    batch_size * trainer._accelerator.num_processes
)  # batch size across all processes

nominal_batch_size = 64
num_accumulate_steps = max(round(nominal_batch_size / total_batch_size), 1)
base_weight_decay = 0.0005
scaled_weight_decay = (
    base_weight_decay * total_batch_size * num_accumulate_steps / nominal_batch_size
)

optimizer.add_param_group(
    {"params": param_groups["conv_weights"], "weight_decay": scaled_weight_decay}
)

In [None]:
trainer.train(
        num_epochs=num_epochs,
        train_dataset=train_yds,
        eval_dataset=eval_yds,
        per_device_batch_size=batch_size,
        create_scheduler_fn=CosineLrScheduler.create_scheduler_fn(
            num_warmup_epochs=2,  # 5
            num_cooldown_epochs=2,  # 5
            k_decay=2,
        ),
        collate_fn=yolov7_collate_fn,
        gradient_accumulation_steps=num_accumulate_steps,
    )

In [None]:
plotter = PlotImageCallback(test_paths)

for _ in range(10):
    plotter.plot_image()

# [SEE RESULTS: param tunning needed + more epochs](https://www.kaggle.com/code/pankratozzi/pytorch-yolov7-widerface-accelerated)