# # Jacob Morin - COS 432

Original Pseudo Labeling Notebook: https://www.kaggle.com/nvnnghia/fasterrcnn-pseudo-labeling


My changes to this file were made to include the augementations from my previous training notebook. Some augmentations were removed because I wanted to submit this notebook and interent would have been required to update the albumentations.

In [None]:
#!pip freeze
#! pip install --upgrade albumentations

In [None]:
import pandas as pd
import numpy as np
import cv2
import os
import re
import math
import random

from PIL import Image

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import ast
import torch
import torchvision

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SequentialSampler

from matplotlib import pyplot as plt


DIR_INPUT = '/kaggle/input/global-wheat-detection'
DIR_TRAIN = f'{DIR_INPUT}/train'
DIR_TEST = f'{DIR_INPUT}/test'

DIR_WEIGHTS = '/kaggle/input/rcnn12152'

WEIGHTS_FILE = f'{DIR_WEIGHTS}/fasterrcnn_resnet50_fpn-12152.pth'

In [None]:
test_df = pd.read_csv(f'{DIR_INPUT}/sample_submission.csv')
test_df

In [None]:
class WheatTestDataset(Dataset):

    def __init__(self, dataframe, image_dir, transforms=None):
        super().__init__()

        self.image_ids = dataframe['image_id'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms

    def __getitem__(self, index: int):

        image_id = self.image_ids[index]
        records = self.df[self.df['image_id'] == image_id]

        image = cv2.imread(f'{self.image_dir}/{image_id}.jpg', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0

        if self.transforms:
            sample = {
                'image': image,
            }
            sample = self.transforms(**sample)
            image = sample['image']

        return image, image_id

    def __len__(self) -> int:
        return self.image_ids.shape[0]

In [None]:
# Albumentations
def get_train_transform():
    return A.Compose([
        #A.Resize(1024, 1024),
        #A.ShiftScaleRotate (shift_limit=0.0625, scale_limit=0.0, rotate_limit=360, interpolation=1, border_mode=cv2.BORDER_CONSTANT, value=None, mask_value=None, always_apply=False, p=.5),
        
        #Enable these
        A.Flip(0.5),
        A.ShiftScaleRotate (shift_limit=0.0, scale_limit=0.0, rotate_limit=10, interpolation=1, border_mode=cv2.BORDER_CONSTANT, value=None, mask_value=None, always_apply=False, p=.5),
        A.RandomSizedCrop (min_max_height=[768,1024], height=1024, width=1024, w2h_ratio=1.0, interpolation=1, always_apply=False, p=.5),
        #A.ColorJitter (brightness=0.3, contrast=0.2, saturation=0.1, hue=0.1, always_apply=False, p=.5),
        
        
        #A.HueSaturationValue(hue_shift_limit=1, sat_shift_limit=1, val_shift_limit=1, always_apply=False, p=.5),
        #A.Resize(512, 512),
        #A.CenterCrop (10, 10, always_apply=False, p=1.0), #For testing use, will generate samples with mostly zero boxes
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

def get_valid_transform():
    return A.Compose([
        #A.Resize(512, 512),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})


In [None]:
# Albumentations
def get_test_transform():
    return A.Compose([
        # A.Resize(512, 512),
        ToTensorV2(p=1.0)
    ])


In [None]:
# load a model; pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False)

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

num_classes = 2  # 1 class (wheat) + background

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Load the trained weights
model.load_state_dict(torch.load(WEIGHTS_FILE))
model.eval()

x = model.to(device)

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

test_dataset = WheatTestDataset(test_df, DIR_TEST, get_test_transform())

test_data_loader = DataLoader(
    test_dataset,
    batch_size=4,
    shuffle=False,
    num_workers=4,
    drop_last=False,
    collate_fn=collate_fn
)

In [None]:
def format_prediction_string(boxes, scores):
    pred_strings = []
    for j in zip(scores, boxes):
        pred_strings.append("{0:.4f} {1} {2} {3} {4}".format(j[0], j[1][0], j[1][1], j[1][2], j[1][3]))

    return " ".join(pred_strings)

# # **Detection and make Pseudo labels for test dataset**

In [None]:
detection_threshold = 0.5
results = []

testdf_psuedo = []
for images, image_ids in test_data_loader:

    images = list(image.to(device) for image in images)
    outputs = model(images)

    for i, image in enumerate(images):

        boxes = outputs[i]['boxes'].data.cpu().numpy()
        scores = outputs[i]['scores'].data.cpu().numpy()
        
        boxes = boxes[scores >= detection_threshold].astype(np.int32)
        scores = scores[scores >= detection_threshold]
        image_id = image_ids[i]
        
        boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
        boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
        
        for box in boxes:
            #print(box)
            result = {
                'image_id': 'pred'+image_id,
                'width': 1024,
                'height': 1024,
                'source': 'pred',
                'x': box[0],
                'y': box[1],
                'w': box[2],
                'h': box[3]
            }
            testdf_psuedo.append(result)
            


In [None]:
test_df_pseudo = pd.DataFrame(testdf_psuedo, columns=['image_id', 'width', 'height', 'source', 'x', 'y', 'w', 'h'])
test_df_pseudo.head()

# # **Retrain model with pseudo labels

In [None]:
train_df = pd.read_csv(f'{DIR_INPUT}/train.csv')
train_df['x'] = -1
train_df['y'] = -1
train_df['w'] = -1
train_df['h'] = -1

def expand_bbox(x):
    r = np.array(re.findall("([0-9]+[.]?[0-9]*)", x))
    if len(r) == 0:
        r = [-1, -1, -1, -1]
    return r

train_df[['x', 'y', 'w', 'h']] = np.stack(train_df['bbox'].apply(lambda x: expand_bbox(x)))
train_df.drop(columns=['bbox'], inplace=True)
train_df['x'] = train_df['x'].astype(np.float)
train_df['y'] = train_df['y'].astype(np.float)
train_df['w'] = train_df['w'].astype(np.float)
train_df['h'] = train_df['h'].astype(np.float)

train_df.head()

In [None]:
image_ids = train_df['image_id'].unique()
valid_ids = image_ids[-665:]
train_ids = image_ids #[:-665]

valid_df = train_df[train_df['image_id'].isin(valid_ids)]
#train_df = train_df[train_df['image_id'].isin(train_ids)]

In [None]:
frames = [train_df, test_df_pseudo]

train_df = pd.concat(frames)
train_df.tail()

# # **Wheat dataset for Training**

In [None]:
class WheatDataset(Dataset):

    def __init__(self, dataframe, image_dir, transforms=None):
        super().__init__()

        self.image_ids = dataframe['image_id'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms
        

    def __getitem__(self, index: int):
        
        #self.mosaic=True;
        #if random.randint(0,1) ==0:
            #self.mosaic = False
            
        rnd = random.random()
        #rnd = .4
        if rnd< 0.33:
            # Load mosaic image
            image, target, image_id = load_mosaic(self, index)
        
        elif (rnd > 0.33 and rnd < 0.66):
            image, target, image_id = load_mixup(self, index)
        
        else:
            # Load image normally
            image, target, image_id = load_image(self, index,)
        
        return image, target, image_id

    def __len__(self) -> int:
        return self.image_ids.shape[0]

In [None]:
def load_image(self, index):
    image_id = self.image_ids[index]
    records = self.df[self.df['image_id'] == image_id]

    if 'pred' in image_id:
        image_id = image_id[4:]
        image = cv2.imread(f'{DIR_TEST}/{image_id}.jpg', cv2.IMREAD_COLOR)
    else:
        image = cv2.imread(f'{self.image_dir}/{image_id}.jpg', cv2.IMREAD_COLOR)
        
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
    image /= 255.0

    boxes = records[['x', 'y', 'w', 'h']].values
    boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
    boxes[:, 3] = boxes[:, 1] + boxes[:, 3]

    area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
    area = torch.as_tensor(area, dtype=torch.float32)

    # there is only one class
    labels = torch.ones((records.shape[0],), dtype=torch.int64)

    # suppose all instances are not crowd
    iscrowd = torch.zeros((records.shape[0],), dtype=torch.int64)

    target = {}
    target['boxes'] = boxes
    target['labels'] = labels
    # target['masks'] = None
    target['image_id'] = torch.tensor([index])
    target['area'] = area
    target['iscrowd'] = iscrowd

    if self.transforms:
        sample = {
            'image': image,
            'bboxes': target['boxes'],
            'labels': labels
        }
        sampleTrans = self.transforms(**sample)
        
        #make sure there is at least one box in a sample, break the code otherwise
        while len(sampleTrans['bboxes'])==0:
            #print("No boxes, rerun")
            sampleTrans = self.transforms(**sample)

        #for loop to remove excessively narrow boxes, effectively only along the edges after albumentations
        bs = sampleTrans['bboxes']
        thresh = 4.5
        pix = 15;
        #print(bs)
        for box in bs:
            #print(box)
            height = box[2]-box[0]
            width = box[3]-box[1]
            prop = height/width
            #print(prop)
            if( (height<pix or width< pix or prop>thresh or prop<(1/thresh)) and len(bs)>1):
                #print('remove {0}'.format(box))
                bs.remove(box)

        if(len(bs)>=1):
            sampleTrans['bboxes'] = bs
        
        sample = sampleTrans
        image = sample['image']
        target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)

    return image, target, image_id
    

In [None]:
def load_mixup(self, index):
    # loads 2 images
    imglist = []
    idlist = []
    allboxes = [[0,0,0,0]]
    
    for i in range(2):
        if(i == 0):
            #load initial image normally
            image_id = self.image_ids[index]
            
        else:
            #load random next image
            image_id = self.image_ids[random.randint(0, len(self.image_ids) - 1)]

                
        #idlist.append(image_id)
        records = self.df[self.df['image_id'] == image_id]
        boxes = records[['x', 'y', 'w', 'h']].values
        #print(boxes)
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2] 
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        
        allboxes = np.vstack((allboxes, boxes))

        if 'pred' in image_id:
            image_id = image_id[4:]
            image = cv2.imread(f'{DIR_TEST}/{image_id}.jpg', cv2.IMREAD_COLOR)
        else:
            image = cv2.imread(f'{self.image_dir}/{image_id}.jpg', cv2.IMREAD_COLOR)
            
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        imglist.append(image)
    
        
    #print(allboxes)
    allboxes = allboxes[1:]
    
    #combine the two images
    mix_img = np.add(imglist[0],(imglist[1]))/2
        
    #print(mix_img)
        
    area = (allboxes[:, 3] - allboxes[:, 1]) * (allboxes[:, 2] - allboxes[:, 0])
    area = torch.as_tensor(area, dtype=torch.float32)

    # there is only one class
    labels = torch.ones((allboxes.shape[0],), dtype=torch.int64)

    # suppose all instances are not crowd
    iscrowd = torch.zeros((allboxes.shape[0],), dtype=torch.int64)

    target = {}
    target['boxes'] = allboxes
    target['labels'] = labels
    # target['masks'] = None
    target['image_id'] = torch.tensor([index])
    target['area'] = area
    target['iscrowd'] = iscrowd

    sample = {
            'image': mix_img,
            'bboxes': target['boxes'],
            'labels': labels
        }
    
    if self.transforms:
        
        sampleTrans = self.transforms(**sample)
        #sample = self.transforms(**sample)
        
        while len(sampleTrans['bboxes'])==0:
            #print("No boxes, rerun")
            sampleTrans = self.transforms(**sample)
            
        #for loop to remove excessively narrow boxes, effectively only along the edges after albumentations
        bs = sampleTrans['bboxes']
        thresh = 4.5
        pix = 15;
        #print(bs)
        for box in bs:
            #print(box)
            height = box[2]-box[0]
            width = box[3]-box[1]
            prop = height/width
            #print(prop)
            if( (height<pix or width< pix or prop>thresh or prop<(1/thresh)) and len(bs)>1):
                #print('remove {0}'.format(box))
                bs.remove(box)

        if(len(bs)>=1):
            sampleTrans['bboxes'] = bs
            
        sample = sampleTrans
        mix_img = sample['image']
        
        target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
            
    return mix_img, target, image_id

In [None]:
def load_mosaic(self, index):
    # loads images in a mosaic by loading them into a 2x2 grid first, then cropping to move the center
    imglist = []
    idlist = []
    allboxes = [[0,0,0,0]]
    
    for i in range(4):
        if(i == 0):
            #load initial image normally
            image_id = self.image_ids[index]
            
            padw, padh = 0, 0
            
        else:
            #load random next image
            image_id = self.image_ids[random.randint(0, len(self.image_ids) - 1)]
            
            if(i==1):
                padw, padh = 1024, 0
            if(i==2):
                padw, padh = 0, 1024
            if(i==3):
                padw, padh = 1024, 1024
                
        #idlist.append(image_id)
        records = self.df[self.df['image_id'] == image_id]
        boxes = records[['x', 'y', 'w', 'h']].values
        boxes[:, 0] = boxes[:, 0] + padw
        boxes[:, 1] = boxes[:, 1] + padh
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2] 
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        
        #print(boxes)
        
        allboxes = np.vstack((allboxes, boxes))

        if 'pred' in image_id:
            image_id = image_id[4:]
            image = cv2.imread(f'{DIR_TEST}/{image_id}.jpg', cv2.IMREAD_COLOR)
        else:
            image = cv2.imread(f'{self.image_dir}/{image_id}.jpg', cv2.IMREAD_COLOR)
            
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        imglist.append(image)
    
        
    #print(allboxes)
    allboxes = allboxes[1:]
    grid_img = make_grid(imglist)

    area = (allboxes[:, 3] - allboxes[:, 1]) * (allboxes[:, 2] - allboxes[:, 0])
    area = torch.as_tensor(area, dtype=torch.float32)

    # there is only one class
    labels = torch.ones((allboxes.shape[0],), dtype=torch.int64)

    # suppose all instances are not crowd
    iscrowd = torch.zeros((allboxes.shape[0],), dtype=torch.int64)

    target = {}
    target['boxes'] = allboxes
    target['labels'] = labels
    # target['masks'] = None
    target['image_id'] = torch.tensor([index])
    target['area'] = area
    target['iscrowd'] = iscrowd

    sample = {
            'image': grid_img,
            'bboxes': target['boxes'],
            'labels': labels
        }
    aug = crop_mosaic()
    sample = aug(**sample)
    
    if self.transforms:
        
        sampleTrans = self.transforms(**sample)
        #sample = self.transforms(**sample)
        
        while len(sampleTrans['bboxes'])==0:
            #print("No boxes, rerun")
            sampleTrans = self.transforms(**sample)
            
        #for loop to remove excessively narrow boxes, effectively only along the edges after albumentations
        bs = sampleTrans['bboxes']
        thresh = 4.5
        pix = 15;
        #print(bs)
        for box in bs:
            #print(box)
            height = box[2]-box[0]
            width = box[3]-box[1]
            prop = height/width
            #print(prop)
            if( (height<pix or width< pix or prop>thresh or prop<(1/thresh)) and len(bs)>1):
                #print('remove {0}'.format(box))
                bs.remove(box)

        if(len(bs)>=1):
            sampleTrans['bboxes'] = bs
            
        sample = sampleTrans
        grid_img = sample['image']
        target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
            
    return grid_img, target, image_id

In [None]:
def crop_mosaic():
    return A.Compose([
        A.RandomSizedCrop (min_max_height=[900,1100], height=1024, width=1024, w2h_ratio=1.0, interpolation=1, always_apply=True, p=1.0),
        #ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

In [None]:
def make_grid(imagelist):
    #stacks the 4 images in the list to form a 2x2 grid
    lrt = np.hstack((imagelist[0],imagelist[1]))
    lrb = np.hstack((imagelist[2],imagelist[3]))
    grid_image = np.vstack((lrt,lrb))
    #print(grid_image.shape)
    return grid_image

In [None]:
class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0
        
        
def collate_fn(batch):
    return tuple(zip(*batch))

train_dataset = WheatDataset(train_df, DIR_TRAIN, get_train_transform())
valid_dataset = WheatDataset(valid_df, DIR_TRAIN, get_valid_transform())


# split the dataset in train and test set
indices = torch.randperm(len(train_dataset)).tolist()

train_data_loader = DataLoader(
    train_dataset,
    batch_size=6,
    shuffle=True,
    num_workers=4,
    collate_fn=collate_fn
)

valid_data_loader = DataLoader(
    valid_dataset,
    batch_size=3,
    shuffle=True,
    num_workers=4,
    collate_fn=collate_fn
)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# **Show a sample**

In [None]:
images, targets, image_ids = next(iter(train_data_loader))
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

boxes = targets[2]['boxes'].cpu().numpy().astype(np.int32)
sample = images[2].permute(1,2,0).cpu().numpy()

fig, ax = plt.subplots(1, 1, figsize=(16, 8))

for box in boxes:
    cv2.rectangle(sample,
                  (box[0], box[1]),
                  (box[2], box[3]),
                  (220, 0, 0), 3)
    
ax.set_axis_off()
ax.imshow(sample)

# # ****retrain Faster RCNN****

In [None]:
model.train()
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.01, momentum=0.9, weight_decay=0.0001)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.5)
#lr_scheduler = None

num_epochs = 5

loss_hist = Averager()
itr = 1

for epoch in range(num_epochs):
    loss_hist.reset()
    
    for images, targets, image_ids in train_data_loader:
        
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()

        loss_hist.send(loss_value)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if itr % 50 == 0:
            print(f"Iteration #{itr} loss: {loss_value}")

        itr += 1
    
    # update the learning rate
    if lr_scheduler is not None:
        lr_scheduler.step()

    print(f"Epoch #{epoch} loss: {loss_hist.value}")
    


In [None]:
torch.save(model.state_dict(), 'fasterrcnn_resnet50_fpn2nd.pth')

# **final Inference and submission

In [None]:
model.eval()

for images, image_ids in test_data_loader:

    images = list(image.to(device) for image in images)
    outputs = model(images)

    for i, image in enumerate(images):

        boxes = outputs[i]['boxes'].data.cpu().numpy()
        scores = outputs[i]['scores'].data.cpu().numpy()
        
        boxes = boxes[scores >= detection_threshold].astype(np.int32)
        scores = scores[scores >= detection_threshold]
        image_id = image_ids[i]
        
        boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
        boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
        
        
            
        result = {
            'image_id': image_id,
            'PredictionString': format_prediction_string(boxes, scores)
        }

        
        results.append(result)


In [None]:
test_df = pd.DataFrame(results, columns=['image_id', 'PredictionString'])
test_df.head()

In [None]:
sample = images[1].permute(1,2,0).cpu().numpy()
boxes = outputs[1]['boxes'].data.cpu().numpy()
scores = outputs[1]['scores'].data.cpu().numpy()

boxes = boxes[scores >= detection_threshold].astype(np.int32)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(16, 8))

for box in boxes:
    cv2.rectangle(sample,
                  (box[0], box[1]),
                  (box[2], box[3]),
                  (220, 0, 0), 2)
    
ax.set_axis_off()
ax.imshow(sample)

In [None]:
test_df.to_csv('submission.csv', index=False)