In [None]:
%cp -r /kaggle/input/yolox-cots-models /kaggle/working/
%cd /kaggle/working/yolox-cots-models/yolox-dep

In [None]:
!pip install pip-21.3.1-py3-none-any.whl -f ./ --no-index
!pip install loguru-0.5.3-py3-none-any.whl -f ./ --no-index
!pip install ninja-1.10.2.3-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl -f ./ --no-index
!pip install onnx-1.8.1-cp37-cp37m-manylinux2010_x86_64.whl -f ./ --no-index
!pip install onnxruntime-1.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl -f ./ --no-index
!pip install onnxoptimizer-0.2.6-cp37-cp37m-manylinux2014_x86_64.whl -f ./ --no-index
!pip install thop-0.0.31.post2005241907-py3-none-any.whl -f ./ --no-index
!pip install tabulate-0.8.9-py3-none-any.whl -f ./ --no-index

In [None]:
# Install YOLOX
%cd /kaggle/working/yolox-cots-models/YOLOX
!pip install -r requirements.txt
!pip install -v -e . 

In [None]:
import yolox

In [None]:
%cd /kaggle/working/yolox-cots-models/yolox-dep/cocoapi/PythonAPI

!make
!make install
!python setup.py install
import pycocotools

In [None]:
from yolox.exp import get_exp
from yolox.utils import fuse_model, get_model_info, postprocess, vis

In [None]:
import argparse
import os,importlib
import time,glob,sys
import numpy as np
from loguru import logger
from tqdm import tqdm
import cv2

import torch

from yolox.data.data_augment import preproc

def make_parser():
    parser = argparse.ArgumentParser("YOLOX Demo!")
    # parser.add_argument('demo', default='image', help='demo type, eg. image, video and webcam')
    parser.add_argument("-expn", "--experiment-name", type=str, default=None)
    parser.add_argument("-n", "--name", type=str, default=None, help="model name")

    parser.add_argument('--path', default='/kaggle/input/vinbigdata-512-image-dataset/vinbigdata/test/*png', help='path to images or video')
    parser.add_argument('--wei_dir', default='YOLOX_outputs/yolox_weights/', help='weight location')
    parser.add_argument(
        '--save_result', action='store_true',
        help='whether to save the inference result of image/video'
    )

    # exp file
    parser.add_argument(
        "-f",

        "--exp_file",
        default="../input/yoloxvinbigdebug/yolox_s.py",
        type=str,
        help="",
    )
    parser.add_argument("-out", "--outdir", default=None, type=str, help="txtをおく")
    parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt for eval")
    parser.add_argument("--device", default="cpu", type=str, help="device to run our model, can either be cpu or gpu")
    parser.add_argument("--conf", default=0.001, type=float, help="test conf")
    parser.add_argument("--nms", default=0.4, type=float, help="test nms threshold")
    parser.add_argument("--tsize", default=640, type=int, help="test img size")
    parser.add_argument(
        "--fp16",
        dest="fp16",
        default=False,
        action="store_true",
        help="Adopting mix precision evaluating.",
    )
    parser.add_argument(
        "--fuse",
        dest="fuse",
        default=False,
        action="store_true",
        help="Fuse conv and bn for testing.",
    )
    parser.add_argument(
        "--trt",
        dest="trt",
        default=False,
        action="store_true",
        help="Using TensorRT model for testing.",
    )
    return parser


from torch.utils.data import  DataLoader, Dataset
class TestDataset(Dataset):
    def __init__(self, image_paths, imgsz=384):
        self.image_paths = image_paths
        self.test_size = (imgsz, imgsz)
        #self.rgb_means = (0.485, 0.456, 0.406) #legacy version
        #self.std = (0.229, 0.224, 0.225) #legacy version

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index):
        image_path = self.image_paths[index]
        image = cv2.imread(image_path)
        img, ratio = preproc(image, self.test_size)

        return img, image_path, ratio


def main(exp, args):

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    if args.conf is not None:
        exp.test_conf = args.conf
    if args.nms is not None:
        exp.nmsthre = args.nms
    if args.tsize is not None:
        exp.test_size = (args.tsize, args.tsize)


    models = []
    out_files = []

    exp_files = ["/kaggle/input/yoloxvinbigdebug/"]

    for exp_file in exp_files:
        exp = get_exp(exp_file+"yolox_s.py", args.name)

        model = exp.get_model()
        model.to(device)
        model.eval()
        ckpt_file = f"{exp_file}/best_ckpt.pth"
        ckpt = torch.load(ckpt_file, map_location=device)
        model.load_state_dict(ckpt["model"])

        if args.fuse:
            logger.info("\tFusing model...")
            model = fuse_model(model)

        out_path = f'/kaggle/working/inf.txt'

        with open(out_path, 'w') as f:
            pass
        out_files.append(out_path)
        models.append(model)

    test_dataset = TestDataset(image_paths = glob.glob(args.path), imgsz = args.tsize)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False,  num_workers=2, pin_memory=True)


    bar = tqdm(test_loader)
    with torch.no_grad():
        for batch_idx, batch_data in enumerate(bar):
            images, paths, ratios = batch_data 
            images = images.to(device)

            for is_hflip in [0]:
                if is_hflip:
                    images = images.flip(-1)

                for model, out_path in zip(models, out_files):
                    outputs = model(images)
                    
                    outputs = postprocess(outputs, 14, args.conf, args.nms)
                    #print(outputs)

                    for img_path, output, ratio in zip(paths, outputs, ratios):
                        img_id = img_path.split('/')[-1].split(".")[0]

                        if output==None:continue

                        output = output.to("cpu").detach()
                        bboxes = output[:, 0:4]
                        bboxes /= ratio
                        cls = output[:, 6]
                        scores = output[:, 4] * output[:, 5]
                            
                        with open(out_path, 'a') as f:
                            #print(img_id)
                            for box,clas, score in zip(bboxes.numpy(),cls.numpy(),scores.numpy()):
                                x1, y1, x2, y2 = box 
                                #print(box)
                                if is_hflip:
                                    f.write(f'{img_id} {int(clas)} {512-x2} {y1} {512-x1} {y2} {score}\n')
                                else:
                                    f.write(f'{img_id} {int(clas)} {x1} {y1} {x2} {y2} {score}\n')

#python /home/u094724e/vinbig/src/YOLOX/yolox_inf.py -f /home/u094724e/vinbig/YOLOX_outputs/exp001/yolox_s.py
if __name__ == "__main__":
    args = make_parser().parse_args()
    exp = get_exp("/kaggle/input/yoloxvinbigdebug/yolox_s.py", args.name)

    main(exp, args)

In [None]:
import pandas as pd
test_df = pd.read_csv("/kaggle/input/vinbigdata-512-image-dataset/vinbigdata/test.csv")
def post_process_bbox(cls_conf_bbox,width,height):
    """
    x1, y1, x2, y2(512*512)


    
    """
    box = cls_conf_bbox[2:]
    cls_conf = cls_conf_bbox[:2]

    box = [box[0]*width/512,box[1]*height/512,box[2]*width/512,box[3]*height/512]
    cls_conf_bbox = list(np.concatenate([cls_conf,box]))


    for idx in range(len(cls_conf_bbox)):
        cls_conf_bbox[idx] = str(int(float(cls_conf_bbox[idx]))) if idx%6!=1 else str(cls_conf_bbox[idx])

    return cls_conf_bbox


image_ids = []
PredictionStrings = []
now_id = None
string = ""

with open("/kaggle/working/inf.txt") as f:
    for s_line in f:
        s_line = s_line.strip().split(" ")
        img_id = s_line[0]
        cls_bbox_conf = np.array(s_line[1:]).astype(np.float64)
        cls_conf_bbox = cls_bbox_conf[[0, 5, 1, 2, 3, 4]]

        if now_id!=img_id:
            now_id = img_id
            image_ids.append(now_id)

            tmp = test_df[test_df.image_id==img_id]
            width = tmp["width"].values[0]
            height = tmp["height"].values[0]
            
            if now_id!=None:
                #print(string)
                PredictionStrings.append(string)

                string = ""
        cls_conf_bbox = post_process_bbox(cls_conf_bbox,width,height)

        #print(cls_bbox_conf)
        #print(cls_conf_bbox)

        if string!="":
            string += " "

        string += ' '.join(cls_conf_bbox)
        #print(string)
        #if len(image_ids)==3:
        #    break

PredictionStrings.append(string)
PredictionStrings =  PredictionStrings[1:]

pred_df = pd.DataFrame({'image_id':image_ids,
                        'PredictionString':PredictionStrings})
sub_df = pd.merge(test_df, pred_df, on = 'image_id', how = 'left').fillna("14 1 0 0 1 1")

sub_df[['image_id', 'PredictionString']].to_csv(f'submission.csv',index = False)

In [None]:
sub_df.head()