# Praca Inżynierska
## Jakub Karczewski

In [1]:
%load_ext tensorboard
%tensorboard --logdir tensor_board --host localhost --port 9000

Reusing TensorBoard on port 9000 (pid 14794), started 1:19:32 ago. (Use '!kill 14794' to kill it.)

In [2]:
import os
import albumentations as A
import cv2
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
import torchvision

from albumentations.pytorch.transforms import ToTensorV2
from functools import reduce
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

from utilities.metrics import mean_average_precision
from utilities.inference import infer_image

DIR_CLASSIC_INPUT = 'data/classic'
DF_CLASSIC_TRAIN = pd.read_csv('csv_dataframes/input_classic.csv')

DIR_FACES_INPUT = 'data/faces_converted'
DF_FACES = pd.read_csv('csv_dataframes/input_faces.csv')

DIR_CLASSIC_INPUT_ALL = 'data/all_shelves'
DIR_OUTPUT = 'data/inference'
DEVICE_VALIDATION = 'cuda'

pl.seed_everything(42)

42

## 1. Przygotowanie zbioru danych

In [3]:
class PharmacyDataset(Dataset):

    def __init__(self, dataframe, image_dir, transforms=None):
        super().__init__()

        unique_labels = dataframe['label'].unique()
        unique_labels_dataframe = pd.DataFrame(data={
            'label': unique_labels,
            'label_id': [i+1 for i in range(unique_labels.shape[0])]
        })

        dataframe = dataframe.merge(unique_labels_dataframe, on='label')

        self.image_names = dataframe['image'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms
        self.unique_labels_dataframe = unique_labels_dataframe

    def __getitem__(self, index: int):
        image_name = self.image_names[index]
        records = self.df[self.df['image'] == image_name]
        image = cv2.imread(f'{self.image_dir}/{image_name}', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0

        boxes = []
        for row in records.iterrows():
            row = row[1]
            boxes.append([row['xmin'], row['ymin'], row['xmax'], row['ymax']])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        target = {
            'boxes': boxes,
            'labels': torch.as_tensor(records['label_id'].values, dtype=torch.int64),
            'image_id': torch.as_tensor([index], dtype=torch.int64),
            'area': torch.as_tensor(area, dtype=torch.float32),
            'iscrowd': torch.zeros((records.shape[0],), dtype=torch.uint8)
        }

        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': target['labels']
            }
            sample = self.transforms(**sample)
            image = sample['image']

            target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)

        return image, target, image_name

    def __len__(self) -> int:
        return self.image_names.shape[0]

## 2. Przygotowanie modelu

In [4]:
def get_train_transform():
    return A.Compose([
        A.RandomBrightnessContrast(p=0.3),
        A.ShiftScaleRotate(p=0.3),
        ToTensorV2(p=1.0)
    ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))

def get_metrics_transform():
    return A.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))

def collate_fn(batch):
        return tuple(zip(*batch))

def perform_metrics_calculations(outputs: torch.Tensor, targets: torch.Tensor):
    l_targets = []
    l_preds = []

    for pred, target in zip(outputs, targets):
        # sometimes target['labels'] is longer than target['boxes']
        target['labels'] = target['labels'][:len(target['boxes'])]

        t_targets = torch.cat([
            torch.full((len(target['boxes']), 1), target['image_id'].item()).to('cuda'),
            torch.unsqueeze(target['labels'], 0).t(),
            target['boxes']
        ], dim=1)
        l_targets.append(t_targets)


        t_pred = torch.cat([
            torch.full((len(pred['boxes']), 1), target['image_id'].item()).to('cuda'),
            torch.unsqueeze(pred['labels'], 0).t(),
            torch.unsqueeze(pred['scores'], 0).t(),
            pred['boxes']
        ], dim=1)
        l_preds.append(t_pred)

    t_target = reduce(lambda a, b: torch.cat([a, b], dim=0), l_targets)
    t_pred = reduce(lambda a, b: torch.cat([a, b], dim=0), l_preds)

    return {
        'target': t_target,
        'pred': t_pred
    }

class FasterRcnn(pl.LightningModule):
    def __init__(
            self,
            train_dataset: torch.utils.data.dataset.Subset,
            valid_dataset: torch.utils.data.dataset.Subset,
            test_dataset: torch.utils.data.dataset.Subset,
            num_classes: int,
            batch_size: int,
            optimizer = torch.optim.AdamW,
            optimizer_keys = None,
            scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau,
            scheduler_keys = None,
            # lr is only used when found, otherwise optimizer_keys lr is used
            lr = None
    ):
        super(FasterRcnn, self).__init__()
        self.batch_size = batch_size
        self.train_dataset = train_dataset
        self.valid_dataset = valid_dataset
        self.test_dataset = test_dataset
        self.optimizer = optimizer
        self.optimizer_keys = optimizer_keys
        self.scheduler = scheduler
        self.scheduler_keys = scheduler_keys
        self.lr = lr

        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
            pretrained=True,
            pretrained_backbone=True,
            trainable_backbone_layers=3
        )

        # get number of input features for the classifier
        in_features = model.roi_heads.box_predictor.cls_score.in_features

        # replace the pre-trained head with a new one
        model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)


        self.model = model

    def change_roi_heads(self, new_num_classes):
        in_features = self.model.roi_heads.box_predictor.cls_score.in_features
        self.model.roi_heads.box_predictor = FastRCNNPredictor(in_features, new_num_classes)

    def forward(self, x):
        return self.model(x)

    def train_dataloader(self) -> DataLoader:
       train_loader = DataLoader(self.train_dataset,
                                 batch_size=self.batch_size,
                                 num_workers=6,
                                 shuffle=True,
                                 collate_fn=collate_fn)
       return train_loader

    def val_dataloader(self) -> DataLoader:
        valid_loader = DataLoader(self.valid_dataset,
                                  batch_size=self.batch_size,
                                  num_workers=6,
                                  shuffle=False,
                                  collate_fn=collate_fn)
        return valid_loader

    def test_dataloader(self) -> DataLoader:
        test_loader = DataLoader(self.test_dataset,
                                  batch_size=self.batch_size,
                                  num_workers=6,
                                  shuffle=False,
                                  collate_fn=collate_fn)
        return test_loader

    def configure_optimizers(self):
        params = [p for p in self.model.parameters() if p.requires_grad]

        if self.lr is not None:
            self.optimizer_keys['lr'] = self.lr

        optimizer = self.optimizer(params=params, **self.optimizer_keys)
        return ({
            'optimizer': optimizer,
            'lr_scheduler': self.scheduler(optimizer=optimizer, **self.scheduler_keys),
            'interval': 'epoch',
            'monitor': 'val_mAP'
        })

    def training_step(self, batch, batch_idx):
        images, targets, img_name = batch
        targets = [{k: v for k, v in t.items()} for t in targets]
        # separate losses
        loss_dict = self.model(images, targets)
        # total loss
        losses = sum(loss for loss in loss_dict.values())

        # loss_dict contains training metrics:
        # - loss_objectness: błąd informujący o rozróżnianiu obiektu od tła
        # - loss_rpn_box_reg: błąd pozycjonowania proposal box'u przez RPN w wyniku regresji
        # - loss_box_reg: błąd pozycjonowania anochor box'u w proposal box'ach
        # - loss_classifier: błąd zaklasyfikowania obiektu do danej klasy
        self.log_dict(loss_dict, prog_bar=True, logger=True)
        self.log('main_loss', losses, prog_bar=False, logger=True)

        return {'loss': losses}

    def validation_step(self, batch, batch_idx):
        images, targets, img_name = batch
        targets = [{k: v for k, v in t.items()} for t in targets]

        outputs = self.model(images, targets)

        return perform_metrics_calculations(outputs, targets)

    def validation_epoch_end(self, val_step_outputs):
        t_target = reduce(lambda a, b: torch.cat([a, b], dim=0), [x['target'] for x in val_step_outputs])
        t_pred = reduce(lambda a, b: torch.cat([a, b], dim=0), [x['pred'] for x in val_step_outputs])

        val_mean_average_precision = mean_average_precision(
            t_pred, t_target, iou_threshold=0.5, ap_calculation='COCO'
        )
        self.log('val_mAP', val_mean_average_precision.item(), prog_bar=True, logger=True)

    def test_step(self, batch, batch_idx):
        images, targets, img_name = batch
        targets = [{k: v for k, v in t.items()} for t in targets]

        outputs = self.model(images, targets)

        return perform_metrics_calculations(outputs, targets)

    def test_epoch_end(self, test_step_outputs):
        t_target = reduce(lambda a, b: torch.cat([a, b], dim=0), [x['target'] for x in test_step_outputs])
        t_pred = reduce(lambda a, b: torch.cat([a, b], dim=0), [x['pred'] for x in test_step_outputs])

        val_mean_average_precision = mean_average_precision(
            t_pred, t_target, iou_threshold=0.5, ap_calculation='COCO'
        )
        self.log('test_mAP', val_mean_average_precision.item(), prog_bar=True, logger=True)

## 3. Trening modelu (opcjonalnie)
Poniższy kod dokonuje:
- podziału danych na zbiory: treningowy, walidacyjny i testowy,
- treningu modelu dla zadanych konfiguracji

Metryki z procesu treningu są widoczne w TensorBoard.

Zapisane modelu są dostępne w folderze `saved_models` (ze względu na rozmiar modeli nie są one zapisywane na git).

In [5]:
dataset_train = PharmacyDataset(DF_CLASSIC_TRAIN, DIR_CLASSIC_INPUT, transforms=get_train_transform())
dataset_metrics = PharmacyDataset(DF_CLASSIC_TRAIN, DIR_CLASSIC_INPUT, transforms=get_metrics_transform())
num_classes = len(DF_CLASSIC_TRAIN['label'].unique()) + 1

train_cases = int(len(dataset_train) * 0.7)
val_cases = int(len(dataset_train) * 0.15)
test_cases = len(dataset_train) - train_cases - val_cases
lengths = [train_cases, val_cases, test_cases]
train_dataset, _, _ = random_split(dataset_train, lengths=lengths)
_, valid_dataset, test_dataset = random_split(dataset_metrics, lengths=lengths)


In [6]:
configs = [
    {
        'optimizer': torch.optim.AdamW,
        'optimizer_keys': {'lr': 0.0001, 'weight_decay': 0.1},
        'scheduler': torch.optim.lr_scheduler.ReduceLROnPlateau,
        'scheduler_keys': {'factor': 0.1, 'mode': 'max', 'patience': 10}
    },
]

for config in configs:
    run_category_name = config['optimizer'].__name__
    run_version_name = '-'.join([f'{k}={v}' for k, v in config['optimizer_keys'].items()])

    model = FasterRcnn(
        train_dataset=train_dataset,
        valid_dataset=valid_dataset,
        test_dataset=test_dataset,
        num_classes=num_classes,
        batch_size=1,
        optimizer=config['optimizer'],
        optimizer_keys=config['optimizer_keys'],
        scheduler=config['scheduler'],
        scheduler_keys=config['scheduler_keys'],
    )

    tb_logger = pl.loggers.TensorBoardLogger(
        save_dir="./tensor_board",
        name=run_category_name,
        version=run_version_name
    )

    early_stopping = pl.callbacks.EarlyStopping(
        patience=15,
        mode='max',
        monitor='val_mAP',
        min_delta=0.0,
        verbose=True
    )

    model_checkpoint = pl.callbacks.ModelCheckpoint(
        dirpath='saved_models/',
        filename=f'{run_category_name}+{run_version_name}',
        mode='max',
        monitor='val_mAP',
        save_top_k=1
    )

    trainer = pl.Trainer(
        gpus=1,
        logger=tb_logger,
        accumulate_grad_batches=1,
        accelerator='dp',
        gradient_clip_val=0.5,
        max_epochs=150,
        auto_lr_find=True,
        deterministic=True,
        callbacks=[early_stopping, model_checkpoint]
    )

    trainer.fit(model)
    trainer.test()

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type       | Params
-------------------------------------
0 | model | FasterRCNN | 41.5 M
-------------------------------------
41.2 M    Trainable params
222 K     Non-trainable params
41.5 M    Total params
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type       | Params
-------------------------------------
0 | model | FasterRCNN | 41.5 M
-------------------------------------
41.2 M    Trainable params
222 K     Non-trainable params
41.5 M    Total params


Validation sanity check: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Testing: |          | 0/? [00:00<?, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_mAP': 0.7586678862571716}
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_mAP': 0.7863180041313171}
--------------------------------------------------------------------------------


Validation sanity check: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

Testing: |          | 0/? [00:00<?, ?it/s]

## 4. Załadowanie wytrenowanego modelu (opcjonalnie)
Poniższy kod dokonuje załadowania modelu na bazie zapisanych wag. Należy podstawić za zmienną `model_path` ścieżkę do wag, które chcemy załadować

In [5]:
# model with best performance
model_path = "saved_models/AdamW+lr=0.0001-weight_decay=0.1.ckpt"
load_pretrained = True

if load_pretrained:
    # model with .ckpt extension
    model = FasterRcnn.load_from_checkpoint(
        model_path,
        train_dataset=None,
        valid_dataset=None,
        test_dataset=None,
        num_classes=len(DF_CLASSIC_TRAIN['label'].unique()) + 1,
        batch_size=1,
        optimizer=None,
        optimizer_keys=None,
        scheduler=None,
        scheduler_keys=None
    )
model.freeze()
model.eval()
model.to('cuda')

print('Model ready for inference')

Model ready for inference


## 5. Test modelu (opcjonalnie)

Wszystkie obrazy zawarte w poniżej PharamcyDataset są poddawane działaniu modelu detekcji obiektów.

Obrazy z zaznaczonymi bounding box'ami są zapisywane w folderze `data/inference`.
W repozytorium zapisane są wszystkie otrzymane zdjęcia półek.

In [6]:
class ImageDataSet(Dataset):

    def __init__(self, root):
        self.root = root
        self.image_names = os.listdir(self.root)
        
    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, index):
        image_name = self.image_names[index]
        image = cv2.imread(f'{self.root}/{image_name}', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
                
        return (ToTensorV2()(image=image)['image'], image_name)

dataset = ImageDataSet(
    root=DIR_CLASSIC_INPUT_ALL,
)

data_loader = DataLoader(
    dataset,
    batch_size=1,
    num_workers=6,
    shuffle=True,
    collate_fn=collate_fn
)

unique_labels = DF_CLASSIC_TRAIN['label'].unique()
unique_labels_dataframe = pd.DataFrame(data={
    'label': unique_labels,
    'label_id': [i+1 for i in range(unique_labels.shape[0])]
})

In [7]:
l = []
for (images, names) in iter(data_loader):
    images = list(image.to('cuda') for image in images)  
    
    for (image, image_name) in zip(images, names):
        # image -> tensor of shape(3, 1200, 1600)
        infer_image(
            model=model,
            image=image,
            show_image=False,
            labels_dict=unique_labels_dataframe,
            score_threshold=0.5,
            save_dir=DIR_OUTPUT,
            save_name=image_name
        )