In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import cv2
import os
import re

# Imports for image transforms
# Albumentations bounding box augmentation docs: https://albumentations.ai/docs/getting_started/bounding_boxes_augmentation/
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

# Torch imports
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SequentialSampler

from matplotlib import pyplot as plt

In [None]:
# Install Ax for Bayesian Optimization of hyperparameters
!pip install ax-platform

In [None]:
# Ax imports
from ax.plot.contour import plot_contour
from ax.plot.trace import optimization_trace_single_method
from ax.service.managed_loop import optimize
from ax.utils.notebook_plotting import render, init_notebook_plotting
# from ax.utils.tutorials.cnn_utils import train, evaluate

init_notebook_plotting()

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
INPUT_DIR = '/kaggle/input/global-wheat-detection/'
OUTPUT_DIR = '/kaggle/output/'
TRAIN_DIR = f'{INPUT_DIR}/train'
TEST_DIR = f'{INPUT_DIR}/test'

In [None]:
train_df = pd.read_csv(f'{INPUT_DIR}/train.csv')
train_df.head()

In [None]:
train_df.shape

In [None]:
# Create individual columns for data from train_df['bbox']
bbox_cols = ['x', 'y', 'w', 'h']
for c in bbox_cols:
    train_df[c] = -1

In [None]:
train_df.head()

In [None]:
train_df['bbox'][1]

In [None]:
"""
Reformats bbox entry for insertion into newly created x, y, w, h rows
Input: box (String) in form "[x, y, w, h]"
Output: data (list) in form [x, y, w, h]
"""
def extract_bbox_data(box):
    # NOTE: not sure if this properly accounts for image with no bbox
    data = [x.strip() for x in box.strip("[]").split(",")]
    if len(data) == 0:
        data = [-1, -1, -1, -1]
    return data

# Get string data from bbox into numerical data in bbox_cols
train_df[bbox_cols] = np.stack(train_df['bbox'].apply(lambda x: extract_bbox_data(x)))

In [None]:
train_df.head()

In [None]:
train_df.drop(columns='bbox', inplace=True)

In [None]:
train_df.head()

In [None]:
for c in bbox_cols:
    train_df[c] = train_df[c].astype(np.float)

In [None]:
image_ids = train_df['image_id'].unique()
image_ids.shape

In [None]:
3373 * .2

In [None]:
# Split train_df into 80% train, 20% validation
train_ids = image_ids[:-675]
valid_ids = image_ids[-675:]

In [None]:
valid_df = train_df[train_df['image_id'].isin(valid_ids)]
train_df = train_df[train_df['image_id'].isin(train_ids)]
# train_df = train_df[train_df['image_id'].isin(image_ids)]

In [None]:
train_df.shape, valid_df.shape
# train_df.shape

In [None]:
"""
Albumentations transforms for creating datasets from raw data.
These functions call A.compose() which returns a transform function that preforms
image augmentation.
Called by creating a data dictionary and calling transform_fn(**data)
Will be used later in our Dataset object definition
"""

bound_params = {
    'format': 'pascal_voc',
    'label_fields': ['labels']
}

"""
Transforms for training data. Flip image with probability of .5,
convert to torch Tensor with probability of 1.
"""
def train_transform():
    # Parameter p is the probability of performing the transform
    return A.Compose([A.Flip(p=0.5), A.Resize(512, 512), ToTensorV2(p=1.0)], bbox_params=bound_params)

# This actually ended up making it worse
# def train_transform_improved():
#     return A.Compose([
#     A.HorizontalFlip(p=0.5),
#     A.VerticalFlip(p=0.5),
#     A.OneOf([A.RandomContrast(),A.RandomGamma(),A.RandomBrightness()], p=1.0),
#     ToTensorV2(p=1.0)], bbox_params=bound_params)

"""
Transforms for validation data. Only convert to torch Tensor (p=1)
"""
def valid_transform():
    return A.Compose([ToTensorV2(p=1.0)], bbox_params=bound_params)

In [None]:
train_df.head()

In [None]:
# Testing dataset workflow for single image before defining Dataset class
t_image_id = 'b6ab77fd7'
t_image = cv2.imread(f'{TRAIN_DIR}/{t_image_id}.jpg', cv2.IMREAD_COLOR)
t_image = cv2.cvtColor(t_image, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0

In [None]:
t_box_data = train_df[train_df['image_id']==t_image_id]
t_boxes = t_box_data[bbox_cols].values
t_boxes[:,2] = t_boxes[:,0] + t_boxes[:,2]
t_boxes[:,3] = t_boxes[:,1] + t_boxes[:,3]
t_boxes = t_boxes.astype(np.int32)

In [None]:
fig, ax = plt.subplots(1,1,figsize=(16,8))
for box in t_boxes:
    cv2.rectangle(t_image, (box[0], box[1]), (box[2], box[3]), (200,0,0), 3)
ax.set_axis_off()
ax.imshow(t_image)

In [None]:
t_sample = {'image': t_image, 'bboxes': t_boxes, 'labels': torch.ones((t_box_data.shape[0],), dtype=torch.int64)}
t_trans = train_transform_improved()
trans_sample = t_trans(**t_sample)

In [None]:
trans_sample['bboxes'][0]

In [None]:
"""
torch Dataset object for our raw data. Dataset subclasses must overwrite the following:
__getitem__(): fetches a data sample for a given key
__len__(): returns he size of the dataset
"""
class WheatDataset(Dataset):
    
    """
    Parameters:
        df: dataframe containing image_id, width, height, source, x, y, w, h
        directory: directory where image corresponding to image_id is stored
        transforms: transform function
    """
    def __init__(self, df, directory, transforms=None):
        super().__init__()
        
        self.image_ids = df['image_id'].unique()
        self.df = df
        self.dir = directory
        self.transforms = transforms
        
    def __len__(self):
        return int(self.image_ids.shape[0])
    
    def __getitem__(self, index: int):
        image_id = self.image_ids[index]
        
        image = cv2.imread(f'{self.dir}/{image_id}.jpg', cv2.IMREAD_COLOR)
        # cv2 reads images into BGR format, must convert to RGB for f-RCNN
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        # f-RCNN requires images in [C,W,H] form with values in [0,1]
        image /= 255.0
        
        image_bbox_data = self.df[self.df['image_id'] == image_id]
        bboxes = image_bbox_data[bbox_cols].values
        # f-RCNN requires bboxes in pascal_voc format: [xmin, ymin, xmax, ymax]
        bboxes[:,2] = bboxes[:,0] + bboxes[:,2]
        bboxes[:,3] = bboxes[:,1] + bboxes[:,3]
        
        # we only have 1 class (wheat head) so label tensor is all ones
        labels = torch.ones((image_bbox_data.shape[0],), dtype=torch.int64)
        
        target = {
            'boxes': bboxes,
            'labels': labels,
            'image_id': torch.tensor([index])
        }
        
        if self.transforms:
            dataToTransform = {
                'image': image,
                'bboxes': bboxes,
                'labels': labels
            }
            transData = self.transforms(**dataToTransform)
            image = transData['image']
            target['boxes'] = torch.tensor([list(tup) for tup in transData['bboxes']], dtype=torch.float32)
        
        return image, target, image_id
            

In [None]:
"""
Model creation. Using frcnn with resnet50 backbone that has been pretrained on
COCO dataset
"""

# get pretrained data from internet once then save to file for quicker use
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
# torch.save(model, 'frcnn_pretrained.pth')
# model = torch.load('frcnn_pretrained.pth')

In [None]:
num_classes = 2 # wheat head + background (background required for fRCNN)
input_features = model.roi_heads.box_predictor.cls_score.in_features

# Replace pretrained head with new, untrained fRCNN predictor
model.roi_heads.box_predictor = FastRCNNPredictor(input_features, num_classes)

In [None]:
model

# Model Training

Training process:
* Create train and valid WheatDatasets
* Create DataLoaders from Datasets
* Get training parameters as params in model.parameters() that require gradient
* Set up SGD optimizer w/ lr=5e-3, momentum=.9, decay=5e-4
* Set num epochs
* Do numEpochs iterations of standard PyTorch training loop
* Infer bboxes on validation data and show sample as before 
* Save model to disk

Standard PyTorch training loop. For image, targets, image_ids in training dataloader:
1. Zero the parameter gradients
2. Call the model to get loss
3. Backprop loss
4. Step opimizer forward

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

model.to(device)

train_dataset = WheatDataset(train_df, TRAIN_DIR, train_transform())
valid_dataset = WheatDataset(valid_df, TRAIN_DIR, valid_transform())
# train_dataset = WheatDataset(train_df, TRAIN_DIR, train_transform_improved())

train_dl = DataLoader(dataset=train_dataset, batch_size=16, num_workers=4, collate_fn=collate_fn)
valid_dl = DataLoader(dataset=valid_dataset, batch_size=8, num_workers=4, collate_fn=collate_fn)

train_params = [param for param in model.parameters() if param.requires_grad]
optimizer = torch.optim.SGD(train_params, lr=5e-3, momentum=.9, weight_decay=5e-4)

## Vanilla Pytorch Training Loop:

In [None]:
# from IPython.display import display, clear_output # for outputting loss on same line
# model.train()
# epochs = 30
# overall_epoch_losses = []
# for epoch in range(epochs):
#     epoch_losses = []
#     for imgs, targets, img_ids in train_dl:
#         imgs = list(image.to(device) for image in imgs)
#         targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
# #         clear_output(wait=True)
#         optimizer.zero_grad()
#         loss_out = model(imgs, targets)
#         loss_total = sum([loss for loss in loss_out.values()])
#         loss_total.backward()
#         epoch_losses.append(loss_total.item())
# #         display(f'Batch Loss: {loss_total.item()}, epoch {epoch + 1} avg loss: {sum(epoch_losses)/len(epoch_losses)}')
#         print(f'Batch Loss: {loss_total.item()}, Epoch {epoch + 1} avg loss: {sum(epoch_losses)/len(epoch_losses)}')
#         optimizer.step()
#     overall_epoch_losses.append(sum(epoch_losses)/len(epoch_losses))
#     print(f'Epoch {epoch + 1} loss: {overall_epoch_losses[-1]}')
# print(f'Total loss: {sum(overall_epoch_losses)/len(overall_epoch_losses)}')

## Ax Training Loop With Hyperparameter Optimization:

In [None]:
def train(
    net: torch.nn.Module,
    train_loader: DataLoader,
    parameters: Dict[str, float],
    dtype: torch.dtype,
    device: torch.device,
) -> nn.Module:
    """
    Train CNN on provided data set.

    Args:
        net: initialized neural network
        train_loader: DataLoader containing training set
        parameters: dictionary containing parameters to be passed to the optimizer.
            - lr: default (0.001)
            - momentum: default (0.0)
            - weight_decay: default (0.0)
            - num_epochs: default (1)
        dtype: torch dtype
        device: torch device
    Returns:
        nn.Module: trained CNN.
    """
    # Initialize network
    net.to(dtype=dtype, device=device)  # pyre-ignore [28]
    net.train()
    # Define loss and optimizer
#     criterion = nn.NLLLoss(reduction="sum")
    optimizer = torch.optim.SGD(
        [p for p in net.parameters() if p.requires_grad],
        lr=parameters.get("lr", 0.001),
        momentum=parameters.get("momentum", 0.0),
        weight_decay=parameters.get("weight_decay", 0.0),
    )
    scheduler = optim.lr_scheduler.StepLR(
        optimizer,
        step_size=int(parameters.get("step_size", 30)),
        gamma=parameters.get("gamma", 1.0),  # default is no learning rate decay
    )
    num_epochs = parameters.get("num_epochs", 1)

    # Train Network
    # pyre-fixme[6]: Expected `int` for 1st param but got `float`.
    for _ in range(num_epochs):
        for imgs, targets, img_ids in train_loader:
            # move data to proper dtype and device
            imgs = list(image.to(device) for image in imgs)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(imgs, targets)
            loss = sum(l for l in outputs.values())
            loss.backward()
            optimizer.step()
            scheduler.step()
    return net


def calc_iou(trueBox, predBox):
    t_xmin, t_ymin, t_xmax, t_ymax = trueBox
    p_xmin, p_ymin, p_xmax, p_ymax = predBox
    
    overlap_area = 0.0
    union_area = 0.0
    
    xdiff = min(t_xmax, p_xmax) - min(t_xmin, p_xmin)
    ydiff = min(t_ymax, p_ymax) - min(t_ymin, p_ymin)
    
    t_area = (t_xmax - t_xmin) * (t_ymax - t_ymin)
    p_area = (p_xmax - p_xmin) * (p_ymax - p_ymin)
    
    if (ydiff > 0) and (xdiff > 0): overlap_area = xdiff * ydiff
    
    union_area = (t_area + p_area - overlap_area)
    
    return overlap_area / union_area
    
    
def find_best(trues, predicted_box, threshold=0.5):
    best_iou = -np.inf
    best_idx = -1
    
    for idx, true_box in enumerate(trues):
        curr_iou = calc_iou(true_box, predicted_box)
        
        if (curr_iou > threshold) and (curr_iou > best_iou):
            best_iou = curr_iou
            best_idx = idx
            
    return best_idx
    
def calc_boxes_precision(sorted_preds, true_targets, threshold=0.5):
    tp = 0 # true positives
    fp = 0 # false positives
    fn = 0 # false negatives
    
    false_negatives = []
    for idx, pred in enumerate(sorted_preds):
        t_idx_best = find_best(true_targets, pred, threshold=threshold)
        if t_idx_best >= 0:
            tp += 1
            true_targets = np.delete(true_targets, t_idx_best, axis=0)
        else:
            fn += 1
            false_negatives.append(pred)
    fp = len(true_targets)
    prec = tp / (tp + fp + fn)
    return precision, false_negatives, true_targets
    
def calc_total_precision(sorted_preds, true_targets, thresholds=iou_thresholds = [x for x in np.arange(0.5, 0.76, 0.05)]):
    threshCount = len(thresholds)
    total_prec = 0.0
    
    for thresh in thresholds:
        thresh_prec, _, _ = calc_boxes_precision(sorted_preds, true_targets, threshold=thresh)
        total_prec += thresh_prec / threshCount
        
    return total_prec

def evaluate(
    net: nn.Module, data_loader: DataLoader, dtype: torch.dtype, device: torch.device
) -> float:
    """
    Compute classification accuracy on provided dataset.

    Args:
        net: trained model
        data_loader: DataLoader containing the evaluation set
        dtype: torch dtype
        device: torch device
    Returns:
        float: classification accuracy
    """
    net.eval()
    total_images = 0
    total_score = 0
    thresh = 0.5
    with torch.no_grad():
        for imgs, targets, img_ids in data_loader:
            # move data to proper dtype and device
            imgs = list(image.to(device) for image in imgs)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            model_outputs = net(imgs)
            batch_precisions = []
            
            for i,img in enumerate(imgs):
                scores = model_outputs[i]['scores'].data.cpu().numpy()
                boxes = model_outputs[i]['boxes'].data.cpu().numpy().astype(np.int32)
#                 bboxes = bboxes[scores >= thresh].astype(np.int32)
#                 scores = scores[scores >= thresh]
                boxes_true = targets[i]['boxes'].cpu().numpy()
                
                sorted_pred_idx = np.argsort(scores)[::-1]
                sorted_boxes = boxes[sorted_pred_idx]
                
#                 prec, _, _ = calc_boxes_precision(sorted_boxes, boxes_true, threshold=0.5)
                img_prec = calc_total_precision(sorted_boxes, boxes_true)
                batch_precisions.append(img_prec)
                
            total_imgs += 1
            total_score += np.mean(batch_precisions)

    return total_Score / total_imgs


In [None]:
def train_evaluate(paramaterization):
    net = copy.deepcopy(model)
    net = train(
        net=net,
        train_loader=train_dl,
        parameters=paramaterization,
        dtype=dtype,
        device=device
    )
    return evaluate(
        net=net,
        data_loader=valid_dl,
        dtype=dtype,
        device=device
    )

In [None]:
optimize_parameters = [
    {"name": "lr", "type": "range", "bounds": [1e-6, 0.4], "value_type": "float","log_scale": True},
    {"name": "momentum", "type": "range", "bounds": [0.0, 1.0], "value_type": "float"},
    {"name": "weight_decay", "type": "range", "bounds": [0.0, 0.4], "value_type": "float"},
    {"name": "num_epochs", "type": "fixed", "value": 30, , "value_type": "int"}
]

best_parameters, values, experiment, opt_model = optimize(
    parameters=optimize_parameters,
    evaluation_function=train_evaluate,
    objective_name='precision'
)

In [None]:
render(plot_contour(model=opt_model, param_x='lr', param_y='momentum', metric_name='accuracy'))

In [None]:
render(plot_contour(model=opt_model, param_x='lr', param_y='weight_decay', metric_name='accuracy'))

In [None]:
render(plot_contour(model=opt_model, param_x='momentum', param_y='weight_decay', metric_name='accuracy'))

In [None]:
best_objectives = np.array([[trial.objective_mean for trial in experiment.trials.values()]])
best_objective_plot = optimization_trace_single_method(
    y=np.maximum.accumulate(best_objectives, axis=1),
    title="Model performance vs. # of iterations",
    ylabel="Overall Model Precision"
)
render(best_objective_plot)

In [None]:
expData = experiment.fetch_data()
expDf = expData.df
best_arm_name = expDf.arm_name[expDf['mean'] == expDf['mean'].max()].values[0]
best_arm = experiment.arms_by_name[best_arm_name]
best_arm

In [None]:
combined_dataset = torch.utils.data.ConcatDataset([
    train_dl.dataset.dataset,
    valid_dl.dataset.dataset
])

combined_dl = DataLoader(combined_dataset, batch_size=16, shuffle=True)

In [None]:
bayesOpt_model = train(
    net=copy.deepcopy(model),
    train_loader=combined_dl,
    parameters=best_arm.parameters,
    dtype=dtype,
    device=device
)

In [None]:
torch.save(model.state_dict(), 'fasterrcnn_resnet50_fpn_BAYESOPT.pth')

In [None]:
# images, targets, image_ids = next(iter(valid_data_loader))
# images = list(img.to(device) for img in images)
# targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
# boxes = targets[1]['boxes'].cpu().numpy().astype(np.int32)
# sample = images[1].permute(1,2,0).cpu().numpy()

# model.eval()
# outputs = model(images)
# outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]

In [None]:
# v_image_id = '33ea56e1c'
# v_image = cv2.imread(f'{TRAIN_DIR}/{v_image_id}.jpg', cv2.IMREAD_COLOR)
# v_image = cv2.cvtColor(v_image, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
# v_box_data = valid_df[valid_df['image_id']==v_image_id]
# v_boxes = v_box_data[bbox_cols].values
# v_boxes[:,2] = v_boxes[:,0] + v_boxes[:,2]
# v_boxes[:,3] = v_boxes[:,1] + v_boxes[:,3]
# v_boxes = v_boxes.astype(np.int32)

In [None]:
# fig, ax = plt.subplots(1,1,figsize=(16,8))
# for box in v_boxes:
#     cv2.rectangle(v_image, (box[0], box[1]), (box[2], box[3]), (200,0,0), 3)
# ax.set_axis_off()
# ax.imshow(v_image)