In [None]:
!pip install '/kaggle/input/torch-15/torch-1.5.0cu101-cp37-cp37m-linux_x86_64.whl'
!pip install '/kaggle/input/torch-15/torchvision-0.6.0cu101-cp37-cp37m-linux_x86_64.whl'
!pip install '/kaggle/input/torch-15/yacs-0.1.7-py3-none-any.whl'
!pip install '/kaggle/input/torch-15/fvcore-0.1.1.post200513-py3-none-any.whl'
!pip install '/kaggle/input/pycocotools/pycocotools-2.0-cp37-cp37m-linux_x86_64.whl'
!pip install '/kaggle/input/detectron2/detectron2-0.1.3cu101-cp37-cp37m-linux_x86_64.whl'

In [None]:
import numpy as np
import pandas as pd
import os
from torch.utils.data import Dataset
import cv2
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import itertools
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()
from detectron2.structures import BoxMode
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.engine import DefaultTrainer
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer,VisImage
from detectron2 import model_zoo
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader,build_detection_train_loader
from detectron2.evaluation import COCOEvaluator, DatasetEvaluators, verify_results
import detectron2.data.transforms as T
from detectron2.data import detection_utils as utils
from detectron2.data import DatasetMapper
import copy
import torch
import warnings
from PIL import ImageFile

# Data importation

In [None]:
class data(Dataset):
    def __init__(self,folder,csv_file):
        self.ids=os.listdir(folder)
        self.classes=["wheat"]
        self.num_classes=len(self.classes)
        self.num_images=len(self.ids)
        self.df=pd.read_csv(csv_file)
    def __len__(self):
        return self.num_images
    
    def __getitem__(self,item):
        id=self.ids[item]
        image_path=os.path.join(folder,id)
        image=cv2.imread(image_path)
        image=cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
        height=image.shape[0]
        width=image.shape[1]
        annotation=self.df[self.df["image_id"]==id[:-4]]
        bbox = list(annotation["bbox"])
        objects=[]
        dicts={}
        label=0
        dicts["filename"]=image_path
        dicts["height"]=height
        dicts["width"]=width
        dicts["image_id"]=id
        dicts["image"]=image
        for i in range(len(bbox)):
            b=bbox[i]
            b=b.strip('[')
            b=b.strip(']')
            b=b.split(",")
            b=[float(k) for k in b]
            b[2]=b[0]+b[2]
            b[3]=b[1]+b[3]
            b.append(label)
            objects.append(b)
        dicts["annotations"]=np.array(objects)
        return dicts
            

In [None]:
csv_file="../input/global-wheat-detection/train.csv"
folder="../input/global-wheat-detection/train"
dataset=data(folder,csv_file)
len(dataset)


In [None]:
item=np.random.choice(len(dataset),1,replace=False)
dicts=dataset[item[0]]
image=dicts["image"]
label=dicts["annotations"]
fig,ax=plt.subplots(figsize=(12,12))
ax.imshow(image.astype('uint8'))
for i in range(len(label)):
    data=label[i]
    xmin,ymin,xmax,ymax=data[0],data[1],data[2],data[3]
    rect = mpatches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                  fill=False, edgecolor="blue", linewidth=2)
    ax.add_patch(rect)

# Detectron2 data format

In [None]:
def get_wheat_dicts(mode):
    if mode=="train":
        items=[i for i in range(0,len(dataset)-10)]
    else:
        items=[i for i in range(len(dataset)-10,len(dataset))]
    dataset_dicts = []
    for item in items:
        data=dataset[item]
        record = {}
        record["file_name"] = data["filename"]
        record["image_id"] = data["image_id"]
        record["height"] = data["height"]
        record["width"] = data["width"]
        annos=data["annotations"]
        objs = []
        for i in range(len(annos)):
            anno=annos[i]
            xmin = int(anno[0])
            ymin = int(anno[1])
            xmax = int(anno[2])
            ymax = int(anno[3])
            label=int(anno[4])

            poly = [
          (xmin, ymin), (xmax, ymin),
          (xmax, ymax), (xmin, ymax)
                                      ]
            poly = list(itertools.chain.from_iterable(poly))
            obj = {
                "bbox": [xmin,ymin,xmax,ymax],
                "bbox_mode": BoxMode.XYXY_ABS,
                 "segmentation": [poly],
                "category_id": label,
                "iscrowd": 0
            }
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts

In [None]:
dataset_dicts=get_wheat_dicts(mode="val")


In [None]:
for d in ["train","val"]:
    DatasetCatalog.register("wheat_" + d, lambda d=d: get_wheat_dicts(d))
    MetadataCatalog.get("wheat_" + d).set(thing_classes=dataset.classes)
wheat_metadata = MetadataCatalog.get("wheat_train")

# Mapper

In [None]:
class DatasetMapper:
    """
    A callable which takes a dataset dict in Detectron2 Dataset format,
    and map it into a format used by the model.

    This is a custom version of the DatasetMapper. The only different with Detectron2's 
    DatasetMapper is that we extract attributes from our dataset_dict. 
    """

    def __init__(self, cfg, is_train=True):
        if cfg.INPUT.CROP.ENABLED and is_train:
            self.crop_gen = T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE)
            logging.getLogger(__name__).info("CropGen used in training: " + str(self.crop_gen))
        else:
            self.crop_gen = None
        
        self.tfm_gens = [T.RandomBrightness(0.8, 1.8),
                         T.RandomContrast(0.6, 1.3),
                         T.RandomSaturation(0.8, 1.4),
                         T.RandomRotation(angle=[90, 90]),
                         T.RandomLighting(0.7),
                         T.RandomFlip(prob=0.4, horizontal=False, vertical=True),
                         T.RandomCrop('relative_range', (0.4, 0.6))
                        ]

        # self.tfm_gens = utils.build_transform_gen(cfg, is_train)

        # fmt: off
        self.img_format     = cfg.INPUT.FORMAT
        self.mask_on        = cfg.MODEL.MASK_ON
        self.mask_format    = cfg.INPUT.MASK_FORMAT
        self.keypoint_on    = cfg.MODEL.KEYPOINT_ON
        self.load_proposals = cfg.MODEL.LOAD_PROPOSALS
        # fmt: on
        if self.keypoint_on and is_train:
            # Flip only makes sense in training
            self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
        else:
            self.keypoint_hflip_indices = None

        if self.load_proposals:
            self.min_box_side_len = cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE
            self.proposal_topk = (
                cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN
                if is_train
                else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST
            )
        self.is_train = is_train

    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
        utils.check_image_size(dataset_dict, image)

        if "annotations" not in dataset_dict:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image
            )
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))

        # USER: Remove if you don't use pre-computed proposals.
        if self.load_proposals:
            utils.transform_proposals(
                dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk
            )

        if not self.is_train:
            # USER: Modify this if you want to keep them for some reason.
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
                )
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.mask_format
            )
            # Create a tight bounding box from masks, useful when image is cropped
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()           
                          
            dataset_dict["instances"] = utils.filter_empty_instances(instances)
            
        return dataset_dict

In [None]:
class WheatTrainer(DefaultTrainer):
    
    @classmethod
    def build_train_loader(cls, cfg):
        return build_detection_train_loader(cfg, mapper=DatasetMapper(cfg))
    
    @classmethod
    def build_test_loader(cls, cfg, dataset_name):
        return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg))

# Training

In [None]:
MODEL_USE = 'faster_rcnn'
if MODEL_USE == 'faster_rcnn':
    #MODEL_PATH = 'COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml'
    #WEIGHT_PATH = '/kaggle/input/detectron2-faster-rcnn-101/model_final_f6e8b1.pkl'
    MODEL_PATH = 'COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml'
    WEIGHT_PATH = '/kaggle/input/detectron2modelszoo/faster-rcnn-X101-FPN.pkl'
elif MODEL_USE == 'retinanet':
    MODEL_PATH = 'COCO-Detection/retinanet_R_101_FPN_3x.yaml'
    WEIGHT_PATH = '/kaggle/input/detectron2-models-zoo/retinanet-R101.pkl' # Previously pretrained on 10000 iterations 
elif MODEL_USE == 'mask_rcnn':
    MODEL_PATH = 'COCO-InstanceSegmentation/mask_rcnn_X_101_FPN_3x.yaml'
    WEIGHT_PATH = '/kaggle/input/detectron2-models-zoo/mask-rcnn-X101-FPN.pkl'
elif MODEL_USE == 'cascade_mask_rcnn':
    MODEL_PATH = 'Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml'
    WEIGHT_PATH = '/kaggle/input/detectron2modelszoo/cascade-rcnn.pkl'

def config_cfg():
    
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(MODEL_PATH))
    cfg.MODEL.WEIGHTS = WEIGHT_PATH # model_zoo.get_checkpoint_url(WEIGHT_PATH)  
    cfg.MODEL.RETINANET.NUM_CLASSES = len(dataset.classes)
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(dataset.classes)

    cfg.DATASETS.TRAIN = ("wheat_train",)
    cfg.DATASETS.TEST = ()
    cfg.DATALOADER.NUM_WORKERS = 4

    cfg.SOLVER.IMS_PER_BATCH = 4
    cfg.SOLVER.LR_SCHEDULER_NAME = 'WarmupCosineLR'
    cfg.SOLVER.BASE_LS = 0.0002
    cfg.SOLVER.MAX_ITER = 10000
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
        
    return cfg


In [None]:
cfg = config_cfg()
#trainer = WheatTrainer(cfg)
trainer=DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

# Load the pretrained model

In [None]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file(MODEL_PATH))
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(dataset.classes)
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
cfg.MODEL.RETINANET.NUM_CLASSES = len(dataset.classes)
cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.45
cfg.DATASETS.TEST = ("wheat_val", )
predictor = DefaultPredictor(cfg)

In [None]:
classes=dataset.classes
def visualize_predictions(input_folder):
    image_names=os.listdir(input_folder)
    for name in image_names:
        image_path=os.path.join(input_folder,name)
        im=cv2.imread(image_path)
        outputs = predictor(im)
        instances=outputs["instances"].to("cpu")
        results=instances.get_fields()
        boxes=results["pred_boxes"]
        boxes=np.array(boxes.tensor)
        scores=results['scores']
        pred_classes=results["pred_classes"]
        for i in range(len(boxes)):
            xmin = int(boxes[i][0])
            ymin = int(boxes[i][1])
            xmax = int(boxes[i][2])
            ymax = int(boxes[i][3])
            color = (0,0,0)
            cv2.rectangle(im, (xmin, ymin), (xmax, ymax), color, 4)
            text_size = cv2.getTextSize(classes[pred_classes[i]] + ' : %.2f' % scores[i],
                                        cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
            cv2.rectangle(im, (xmin, ymin), (xmin + text_size[0] + 3, ymin + text_size[1] + 4),
                          color, -1)
            cv2.putText(
                    im, classes[pred_classes[i]] + ' : %.2f' % scores[i],
                   (xmin, ymin + text_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1,
                   (255, 255, 255), 2)
        plt.figure(figsize = (12, 12))
        plt.imshow(im)
    return 

In [None]:
input_folder="../input/global-wheat-detection/test"
visualize_predictions(input_folder)

In [None]:
def submit():
    image_names=os.listdir(input_folder)
    predictions=[]
    for name in image_names:
        prediction=[]
        image_path=os.path.join(input_folder,name)
        im=cv2.imread(image_path)
        outputs = predictor(im)
        instances=outputs["instances"].to("cpu")
        results=instances.get_fields()
        boxes=results["pred_boxes"]
        boxes=np.array(boxes.tensor)
        scores=results['scores']
        pred_classes=results["pred_classes"]
        for i in range(len(boxes)):
            score=round(float(scores[i]),4)
            xmin = int(boxes[i][0])
            ymin = int(boxes[i][1])
            xmax = int(boxes[i][2])
            ymax = int(boxes[i][3])
            width=xmax-xmin
            height=ymax-ymin
            prediction.append(str(score))
            prediction.append(str(xmin))
            prediction.append(str(ymin))
            prediction.append(str(width))
            prediction.append(str(height))
        res=""
        for p in prediction:
            res+=p
            res+=" "
        output={"image_id":name[:-4],"PredictionString":res}
        predictions.append(output) 
    df=pd.DataFrame(predictions, columns=['image_id', 'PredictionString'])
    return df

In [None]:
predictions=submit()

In [None]:
predictions

In [None]:
predictions.to_csv('submission.csv', index=False)