In [None]:
import gc
import os
from pathlib import Path
import random
import sys
import json
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
import scipy as sp


import matplotlib.pyplot as plt
import seaborn as sns

from IPython.core.display import display, HTML

# --- plotly ---
from plotly import tools, subplots
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.express as px
import plotly.figure_factory as ff
import plotly.io as pio
pio.templates.default = "plotly_dark"

# --- models ---
from sklearn import preprocessing
from sklearn.model_selection import KFold
import lightgbm as lgb
import xgboost as xgb
import catboost as cb

# --- setup ---
pd.set_option('max_columns', 50)

In [None]:
json_file = os.path.join("/kaggle/input/coco-text-data-train-2014/cocotext.v2", "cocotext.v2.json")
with open(json_file) as f:
    imgs_anns = json.load(f)

In [None]:
!nvidia-smi

In [None]:
!nvcc --version

In [None]:
import torch

torch.__version__

In [None]:
!pip install detectron2 -f \
  https://dl.fbaipublicfiles.com/detectron2/wheels/cu110/torch1.7/index.html

In [None]:
# You may need to restart your runtime prior to this, to let your installation take effect
# Some basic setup
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import matplotlib.pyplot as plt
import numpy as np
import cv2
from detectron2 import model_zoo

# import some common detectron2 utilities
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

In [None]:
plt.figure(figsize=(16,10))
im = cv2.imread("/kaggle/input/coco-text-data-train-2014/train2014/train2014/COCO_train2014_000000483569.jpg")
plt.imshow(im)

In [None]:
image_info={}
for idx in imgs_anns['imgs']:
    if imgs_anns['imgs'][idx]['set']=='train':
        image_info[idx]=imgs_anns['imgs'][idx]['file_name']

In [None]:
image_info2={}
for idx in imgs_anns['anns']:
    k=str(imgs_anns['anns'][idx]['image_id'])
    if k in image_info:
        if k not in image_info2:
            image_info2[k]=[{'file_name':image_info[k]}]
            image_info2[k].append(imgs_anns['anns'][idx])
        else:
            image_info2[k].append(imgs_anns['anns'][idx])

In [None]:
from detectron2.structures import BoxMode

def get_cvpr_dicts(img_dir,imgs_anns,type1):
    dataset_dicts = []
    for idx in imgs_anns:
        record = {}
        
        filename = os.path.join(img_dir, imgs_anns[idx][0]["file_name"])
        height, width = cv2.imread(filename).shape[:2]
        
        record["file_name"] = filename
        record["image_id"] = idx
        record["height"] = height
        record["width"] = width
      
        annos = imgs_anns[idx][1:]
        objs = []
        for anno in annos:
            bbox=anno['bbox']
            x1=bbox[0]
            y1=bbox[1]
            x2=bbox[0]+bbox[2]
            y2=bbox[1]+bbox[3]
            poly = anno['mask']

            obj = {
              "bbox": [x1, y1, x2, y2],
              "bbox_mode": BoxMode.XYXY_ABS,
              "segmentation": [poly],
              "category_id": 0,
            }
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts

In [None]:
for d in ["train"]:
    DatasetCatalog.register("cvpr_"+d, lambda d=d: get_cvpr_dicts("/kaggle/input/coco-text-data-train-2014/train2014/train2014/",image_info2,d))
    MetadataCatalog.get("cvpr_"+d).set(thing_classes=["text"])
cvpr_metadata = MetadataCatalog.get("cvpr_train")

In [None]:
training_indexes=list(range(len(image_info2)))

In [None]:
import random
dataset_dicts = get_cvpr_dicts("/kaggle/input/coco-text-data-train-2014/train2014/train2014/",image_info2,"train")
fig, axes = plt.subplots(3, 1, figsize=(30, 30))
axes = axes.flatten()
for index, anom_ind in enumerate(training_indexes[:3]):
    ax = axes[index]
    d = dataset_dicts[anom_ind]
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=cvpr_metadata, scale=1.2)
    out = visualizer.draw_dataset_dict(d)
    ax.imshow(out.get_image()[:, :, ::-1])

In [None]:
from detectron2.engine import DefaultTrainer

cfg = get_cfg()

In [None]:
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("cvpr_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 2000   # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

In [None]:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.6   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

In [None]:
import pandas as pd
data_annotations=pd.read_csv("/kaggle/input/cvpr-pricing-challenge/annotations.csv")
unique_images=list(data_annotations['img_name'].unique())

In [None]:
testing_indexes=list(range(len(unique_images)))
from detectron2.utils.visualizer import ColorMode
fig, axes = plt.subplots(3, 1, figsize=(60, 60))
axes = axes.flatten()
for index, anom_ind in enumerate(random.sample(testing_indexes, 3)):
    ax = axes[index]
    d = unique_images[anom_ind]
    img = cv2.imread("/kaggle/input/cvpr-pricing-challenge/images/"+d)
    outputs = predictor(img)
    v = Visualizer(img[:, :, ::-1],
                   MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), 
                   scale=1.2, 
                   instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels. This option is only available for segmentation models
    )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    ax.imshow(out.get_image()[:, :, ::-1])