In [None]:
import os

import numpy as np
import pandas as pd

# Wheat Images

In [None]:
from PIL import Image
from wheat_utils import thumb_grid
import matplotlib.pyplot as plt

In [None]:
TRAIN_DIR = "../input/global-wheat-detection/train/"
TRAIN_CSV_DIR = "../input/global-wheat-detection/train.csv"

In [None]:
img_filenames = os.listdir(TRAIN_DIR)

grid_ids = np.random.choice(img_filenames, 9)
grid_imgs = []
for grid_id in grid_ids:
    grid_imgs.append(np.array(Image.open(TRAIN_DIR + grid_id)))

plt.figure(figsize = (15, 15))
thumb_grid(grid_imgs, (3, 3), scale = 1)

# Bounding Boxes

In [None]:
from PIL import ImageDraw
import re

In [None]:
# Clean bounding boxes dataset

bboxes = pd.read_csv(TRAIN_CSV_DIR)

bboxes = bboxes.assign(**{'x': 0, 'y': 0, 'w': 0, 'h': 0})
bboxes[['x', 'y', 'w', 'h']] = np.stack(bboxes.apply(lambda boxes: np.array([int(float(i)) for i in boxes['bbox'].strip('][').split(', ')]), axis = 1))
bboxes = bboxes.drop("bbox", 1)


In [None]:
# grid_ids = np.random.choice(img_filenames, 9)

grid_imgs = []

for grid_id in grid_ids:
    
    img = Image.open(TRAIN_DIR + grid_id)
    
    grid_id = re.sub(".jpg", "", grid_id)
    bbox = bboxes[bboxes.image_id == grid_id]
    
    draw = ImageDraw.Draw(img)
    
    for i in bbox.index:
        draw.rectangle((bbox.x[i], bbox.y[i], bbox.x[i] + bbox.w[i], bbox.y[i] + bbox.h[i]), 
                       fill = "white")
    
    grid_imgs.append(np.array(img))

plt.figure(figsize = (15, 15))
thumb_grid(grid_imgs, (3, 3), scale = 1)

In [None]:
# Number of wheat heads per image

plt.figure(figsize = (15, 5))
plt.hist(bboxes.groupby(['image_id']).agg(['count']).iloc[:, 0], bins = 100);

# Segmentation Using PyTorch

In [None]:
import torch
import torchvision

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

In [None]:
img_ids = [re.sub(".jpg", "", filename) for filename in img_filenames]

train_val_split = 0.7
split = int(len(img_ids) * train_val_split)

train_ids = img_ids[:split]
val_ids = img_ids[split:]

train_bboxes = bboxes[bboxes['image_id'].isin(train_ids)]
val_bboxes = bboxes[bboxes['image_id'].isin(val_ids)]

In [None]:
class WheatDataset(object):
    
    def __init__(self, root, bboxes, transforms = None):
        self.root = root
        self.transforms = transforms
        self.bboxes = bboxes
        self.imgs = bboxes['image_id'].unique()

    def __getitem__(self, idx):
        
        img_id = self.imgs[idx]
        bbox = self.bboxes[self.bboxes.image_id == img_id]
        
        img_path = os.path.join(self.root, img_id + ".jpg")
        img = np.array(Image.open(img_path).convert("RGB")) / 255

        # get bounding box coordinates for each mask
        num_objs = bbox.shape[0]
        
        boxes = []
        for i in bbox.index:
#             boxes.append([bbox.x[i], bbox.y[i], bbox.w[i], bbox.h[i]])
            boxes.append([bbox.x[i], bbox.y[i], bbox.x[i] + bbox.w[i], bbox.y[i] + bbox.h[i]])

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not wheat
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
        
        if self.transforms is not None:
            sample = {
                'image': img,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transforms(**sample)
            img = sample['image']
            
            target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
            
        img = torch.as_tensor(img, dtype=torch.float32)
        
        return img, target, img_id

    def __len__(self):
        return len(self.imgs)

In [None]:
# Augmentations

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

def get_train_transform():
    return A.Compose([
        A.Flip(0.5),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

def get_valid_transform():
    return A.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

## Load ResNet50

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

num_classes = 2  # 1 class (wheat) + background

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [None]:
class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0

In [None]:
from torch.utils.data import DataLoader, Dataset

def collate_fn(batch):
    return tuple(zip(*batch))

train_dataset = WheatDataset(TRAIN_DIR, train_bboxes, get_train_transform())
valid_dataset = WheatDataset(TRAIN_DIR, val_bboxes, get_valid_transform())


# split the dataset in train and test set
indices = torch.randperm(len(train_dataset)).tolist()

train_data_loader = DataLoader(
    train_dataset,
    batch_size=16,
    shuffle=False,
    num_workers=4,
    collate_fn=collate_fn
)

valid_data_loader = DataLoader(
    valid_dataset,
    batch_size=8,
    shuffle=False,
    num_workers=4,
    collate_fn=collate_fn
)

# Train

In [None]:
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
# lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
lr_scheduler = None

num_epochs = 10

In [None]:
loss_hist = Averager()
itr = 1

for epoch in range(num_epochs):
    
    loss_hist.reset()
    
    for images, targets, image_ids in train_data_loader:
        
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()

        loss_hist.send(loss_value)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if itr % 10 == 0:
            print(f"Iteration #{itr} loss: {loss_value}")

        itr += 1
    
    # update the learning rate
    if lr_scheduler is not None:
        lr_scheduler.step()

    print(f"Epoch #{epoch} loss: {loss_hist.value}")

In [None]:
torch.save(model.state_dict(), 'santi_2.pth')

# Submission

In [None]:
TEST_DIR = "../input/global-wheat-detection/test/"
TEST_CSV_DIR = "../input/global-wheat-detection/sample_submission.csv"
WEIGHTS = "santi_2.pth"

In [None]:
test_df = pd.read_csv(TEST_CSV_DIR)

In [None]:
class WheatTestDataset(Dataset):

    def __init__(self, root, bboxes, transforms=None):
        super().__init__()

        self.imgs = bboxes['image_id'].unique()
        self.bboxes = bboxes
        self.root = root
        self.transforms = transforms

    def __getitem__(self, idx):

        img_id = self.imgs[idx]
        bbox = self.bboxes[self.bboxes['image_id'] == img_id]
        
        img_path = os.path.join(self.root, img_id + ".jpg")
        img = np.array(Image.open(img_path).convert("RGB")) / 255

        if self.transforms:
            sample = {
                'image': img,
            }
            sample = self.transforms(**sample)
            img = sample['image']
        
        img = torch.as_tensor(img, dtype=torch.float32)

        return img, img_id

    def __len__(self):
        return self.imgs.shape[0]

In [None]:
# Albumentations
def get_test_transform():
    return A.Compose([
        # A.Resize(512, 512),
        ToTensorV2(p=1.0)
    ])

In [None]:
# load a model; pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

num_classes = 2  # 1 class (wheat) + background

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Load the trained weights
model.load_state_dict(torch.load(WEIGHTS))
model.eval()

x = model.to(device)

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

test_dataset = WheatTestDataset(TEST_DIR, test_df, get_test_transform())

test_data_loader = DataLoader(
    test_dataset,
    batch_size=4,
    shuffle=False,
    num_workers=4,
    drop_last=False,
    collate_fn=collate_fn
)

In [None]:
def format_prediction_string(boxes, scores):
    pred_strings = []
    for j in zip(scores, boxes):
        pred_strings.append("{0:.4f} {1} {2} {3} {4}".format(j[0], j[1][0], j[1][1], j[1][2], j[1][3]))

    return " ".join(pred_strings)

In [None]:
detection_threshold = 0.5
results = []

for images, image_ids in test_data_loader:

    images = list(image.to(device) for image in images)
    outputs = model(images)

    for i, image in enumerate(images):

        boxes = outputs[i]['boxes'].data.cpu().numpy()
        scores = outputs[i]['scores'].data.cpu().numpy()
        
        boxes = boxes[scores >= detection_threshold].astype(np.int32)
        scores = scores[scores >= detection_threshold]
        image_id = image_ids[i]
        
        boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
        boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
        
        result = {
            'image_id': image_id,
            'PredictionString': format_prediction_string(boxes, scores)
        }

        
        results.append(result)

In [None]:
test_df = pd.DataFrame(results, columns=['image_id', 'PredictionString'])
test_df.head()

In [None]:
test_df.to_csv('santi_submission_1.csv', index=False)