# Praca Inżynierska
## Jakub Karczewski

In [1]:
%load_ext tensorboard
%tensorboard --logdir ./tensor_board --host localhost --port 9000

import cv2
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset, random_split
import pytorch_lightning as pl
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

from metrics import object_detection_mean_average_precision
from inference import display_inferred_image

DIR_INPUT = '../data_classic'
DIR_OUTPUT = '../images_inferred'

Launching TensorBoard...

## 1. Przygotowanie danych

In [2]:
train_df = pd.read_csv(f'{DIR_INPUT}/Apteka-export.csv')

In [3]:
class PharmacyDataset(Dataset):

    def __init__(self, dataframe, image_dir, transforms=None):
        super().__init__()

        unique_labels = dataframe['label'].unique()
        unique_labels_dataframe = pd.DataFrame(data={
            'label': unique_labels,
            'label_id': [i+1 for i in range(unique_labels.shape[0])]
        })

        dataframe = dataframe.merge(unique_labels_dataframe, on='label')

        self.image_names = dataframe['image'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms
        self.unique_labels_dataframe = unique_labels_dataframe

    def __getitem__(self, index: int):
        image_name = self.image_names[index]
        records = self.df[self.df['image'] == image_name]

        image = cv2.imread(f'{self.image_dir}/{image_name}', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0

        boxes = []
        for row in records.iterrows():
            row = row[1]
            boxes.append([row['xmin'], row['ymin'], row['xmax'], row['ymax']])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        target = {
            'boxes': boxes,
            'labels': torch.as_tensor(records['label_id'].values, dtype=torch.int64),
            'image_id': torch.as_tensor([index], dtype=torch.int64),
            'area': torch.as_tensor(area, dtype=torch.float32),
            'iscrowd': torch.zeros((records.shape[0],), dtype=torch.uint8)
        }

        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': target['labels']
            }
            sample = self.transforms(**sample)
            image = sample['image']

            target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)

        return image, target, image_name

    def __len__(self) -> int:
        return self.image_names.shape[0]

## 2. Przygotowanie modelu

In [4]:
def get_train_transform():
    return A.Compose([
        A.Flip(p=0.5),
        A.RandomBrightnessContrast(p=0.4),
        A.ShiftScaleRotate(p=0.3),
        ToTensorV2(p=1.0)
    ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))

def get_validation_transform():
    return A.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))

def collate_fn(batch):
        return tuple(zip(*batch))

class FasterRcnn(pl.LightningModule):
    def __init__(self, df: pd.DataFrame, learning_rate: float, batch_size: int):
        super(FasterRcnn, self).__init__()
        self.learning_rate = learning_rate
        self.batch_size = batch_size

        num_classes = len(df['label'].unique()) + 1  # num classes + background

        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

        # get number of input features for the classifier
        in_features = model.roi_heads.box_predictor.cls_score.in_features

        # replace the pre-trained head with a new one
        model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

        self.model = model

    def forward(self, x):
        return self.model(x)

    def prepare_data(self):
        dataset = PharmacyDataset(train_df, DIR_INPUT, transforms=get_train_transform())
        train_cases = int(len(dataset) * 0.85)
        val_cases = len(dataset) - train_cases
        self.train_dataset, self.valid_dataset = random_split(dataset, lengths=[train_cases, val_cases], generator=torch.Generator().manual_seed(42))

    def train_dataloader(self) -> DataLoader:
       train_loader = DataLoader(self.train_dataset,
                                 batch_size=self.batch_size,
                                 num_workers=6,
                                 shuffle=True,
                                 collate_fn=collate_fn)
       return train_loader

    def val_dataloader(self) -> DataLoader:
        valid_loader = DataLoader(self.valid_dataset,
                                  batch_size=self.batch_size,
                                  num_workers=6,
                                  shuffle=False,
                                  collate_fn=collate_fn)
        return valid_loader

    def configure_optimizers(self):
        params = [p for p in self.model.parameters() if p.requires_grad]

        # optimizer = torch.optim.SGD(params, lr=self.learning_rate, momentum=0.9, weight_decay=0.0005)
        optimizer = torch.optim.AdamW(params, lr=self.learning_rate, weight_decay=1e-2)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, mode='min', patience=10)

        return ({
            'optimizer': optimizer,
            'lr_scheduler': scheduler,
            'interval': 'epoch',
            'monitor': 'val_main_loss'
        })

    def training_step(self, batch, batch_idx):
        images, targets, img_name = batch
        targets = [{k: v for k, v in t.items()} for t in targets]
        # separate losses
        loss_dict = self.model(images, targets)
        # total loss
        losses = sum(loss for loss in loss_dict.values())

        # loss_dict contains training metrics:
        # - loss_objectness: błąd informujący o rozróżnianiu obiektu od tła
        # - loss_rpn_box_reg: błąd pozycjonowania proposal box'u przez RPN w wyniku regresji
        # - loss_box_reg: błąd pozycjonowania anochor box'u w proposal box'ach
        # - loss_classifier: błąd zaklasyfikowania obiektu do danej klasy
        self.log_dict(loss_dict, prog_bar=True, logger=True)
        self.log('main_loss', losses, prog_bar=False, logger=True)

        return {'loss': losses}

    def training_epoch_end(self, training_step_outputs):
        # TODO: implement mAP here
        x = 1

    def validation_step(self, batch, batch_idx):
        images, targets, img_name = batch
        targets = [{k: v for k, v in t.items()} for t in targets]
        outputs = self.model(images, targets)
        res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}

        # switch model to train mode to receive same metrics as in test for validation purpose
        self.model.train()
        loss_dict = self.model(images, targets)
        loss_dict = {f"val_{k}": v for k, v in loss_dict.items()}
        # total loss
        losses = sum(loss for loss in loss_dict.values())
        self.log_dict(loss_dict, prog_bar=True, logger=True)
        self.log('val_main_loss', losses, prog_bar=True, logger=True)

        return None

## 3. (Opcjonalnie) Trening modelu

In [5]:
model = FasterRcnn(train_df, learning_rate=0.007, batch_size=1)

tb_logger = pl.loggers.TensorBoardLogger(
    save_dir="./tensor_board",
    name=f'test',
    version=f'lr=0.auto'
)

early_stopping = pl.callbacks.EarlyStopping(
    patience=10,
    mode='min',
    monitor='val_main_loss',
    min_delta=0.0,
    verbose=True
)

model_checkpoint = pl.callbacks.ModelCheckpoint(
    dirpath='saved_models/',
    filename=f'test_auto',
    mode='min',
    monitor='val_main_loss',
    save_top_k=1
)

trainer = pl.Trainer(
    gpus=1,
    logger=tb_logger,
    accumulate_grad_batches=1,
    accelerator='dp',
    gradient_clip_val=0.5,
    max_epochs=800,
    auto_lr_find=True,
    callbacks=[early_stopping, model_checkpoint]
)
trainer.tune(model)


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type       | Params
-------------------------------------
0 | model | FasterRCNN | 41 M  
LR finder stopped early due to diverging loss.
Learning rate set to 4.786300923226385e-05


HBox(children=(HTML(value='Finding best initial lr'), FloatProgress(value=0.0), HTML(value='')))




In [6]:
trainer.fit(model)


  | Name  | Type       | Params
-------------------------------------
0 | model | FasterRCNN | 41 M  


HBox(children=(HTML(value='Validation sanity check'), FloatProgress(value=1.0, bar_style='info', layout=Layout…

HBox(children=(HTML(value='Training'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), max…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

1

## 4. (Opcjonalnie) Załadowanie wytrenowanego modelu

In [7]:
load_pretrained = True

if load_pretrained:
    # model with .ckpt extension
    model = FasterRcnn.load_from_checkpoint('example_model.ckpt', df=train_df, learning_rate=0.007, batch_size=1)
model.freeze()
model.eval()
model.to('cuda')

print('Model ready for inference')

Model ready for inference


## 5. Test modelu

In [8]:
dataset = PharmacyDataset(train_df, DIR_INPUT, transforms=get_validation_transform())
data_loader = DataLoader(
    dataset,
    batch_size=1,
    num_workers=6,
    shuffle=True,
    collate_fn=collate_fn
)

In [10]:
for (images, targets, image_names) in iter(data_loader):
    images = list(image.to('cuda') for image in images)
    targets = [{k: v.to('cuda') for k, v in t.items()} for t in targets]

    for (image, target, image_name) in zip(images, targets, image_names):
        # image -> tensor of shape(3, 1200, 1600)
        # target -> dict with fields: boxes(tensor of shape(N, 4)), image_id(tensor of shape(1)),...
        boxes = target['boxes'].cpu().numpy().astype(np.int32)
        sample = image.permute(1,2,0).cpu().numpy()

        display_inferred_image(
            model=model,
            image=image,
            show_image=False,
            labels_dict=dataset.unique_labels_dataframe,
            score_threshold=0.5,
            save_dir=DIR_OUTPUT,
            save_name=image_name
        )