In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
import gc
from collections import defaultdict

import cv2
import numpy as np
import pandas as pd
import tqdm
import torch
from matplotlib import pyplot as plt
from pathlib import Path
from torch.utils import data
from torchvision import transforms
import torch.nn.functional as fnn
import torchvision.models as models

from detection import create_detection_model, DetectionDataset, Flip, PerspectiveTransform
from recognition import CRNN, RecognitionDataset, beam_search, LanguageModel
from detection_utils import PlateImageAdjuster, PlateImageExtractor
from recognition_utils import Resize, collate_fn_recognition_test
from classifier import ClassifierDataset

%matplotlib inline

## Detection

In [3]:
transformations = [
    (transforms.ToPILImage(), 'image'),
    (transforms.ToTensor(), 'image'),
                    ]

test_dataset = DetectionDataset('data', transformations, 'test')

test_dataloader = data.DataLoader(
        test_dataset, batch_size=2,
        num_workers=4, pin_memory=True,
        shuffle=False, drop_last=False,
        collate_fn=DetectionDataset.collate_fn
    )

In [4]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = create_detection_model()

with open('SGD_lr_3e-4_plateau_best.pth', 'rb') as fp:
    state_dict = torch.load(fp, map_location="cpu")
model.load_state_dict(state_dict)
model.to(device)
model.eval()
print('Detection model Loaded')

Detection model Loaded


In [5]:
THRESHOLD_MASK = 0.05
THRESHOLD_BOX = 0.92

path_test_ocr = Path('test_ocr_data')
path_test_ocr.mkdir(parents=True, exist_ok=True)

normalizer = PlateImageAdjuster()
extractor = PlateImageExtractor()

test_plates_filenames = []

for i, batch in enumerate(tqdm.tqdm(test_dataloader)):
    images = list(image.to(device) for image in batch[0])
    filenames = list(filename['filename'] for filename in batch[1])
    with torch.no_grad():
        preds = model(images)
    for pred, image_tensor, filename in zip(preds, images, filenames):
        ps = pred['scores'].detach().cpu().numpy()
        boxes = pred['boxes'].detach().cpu().numpy()
        masks = (pred['masks'].detach().cpu().squeeze(1).numpy() > THRESHOLD_MASK).astype(np.uint8)
        image = image_tensor.cpu().permute(1, 2, 0).numpy() * 255
        sorted_triads = sorted(list(zip(ps, boxes, masks)), key = lambda x: x[1][0])
        n = 0
        for p, box, mask in sorted_triads:
            if p > THRESHOLD_BOX:
            # Too small images are useless
                if (box[2] - box[0]) * (box[3] - box[1]) < 100:
                    continue
                plate_image = extractor(image, mask, box)
                plate_image = normalizer(plate_image)
                path = Path(filename)
                plate_file_name = ''.join(['_'.join([path.stem, str(n)]), path.suffix])
                cv2.imwrite(str(path_test_ocr / plate_file_name), plate_image)
                test_plates_filenames.append(plate_file_name)

                # Save bbox_image
                bbox_image = image[int(box[1]):int(box[3]), int(box[0]):int(box[2])]
                bbox_image_file_name = ''.join(['_'.join([path.stem, str(n), 'bbox']), path.suffix])
                bbox_image = normalizer(bbox_image)
                cv2.imwrite(str(path_test_ocr / bbox_image_file_name), bbox_image)
                n += 1
        if n == 0:
            j = np.argmax(ps)
            plate_image = extractor(image, masks[j], box[j])
            plate_image = normalizer(plate_image)
            path = Path(filename)
            plate_file_name = ''.join(['_'.join([path.stem, str(n)]), path.suffix])
            cv2.imwrite(str(path_test_ocr / plate_file_name), plate_image)
            test_plates_filenames.append(plate_file_name)
            
            # Save bbox_image
            bbox_image = image[int(boxes[j][1]):int(boxes[j][3]), int(boxes[j][0]):int(boxes[j][2])]
            bbox_image_file_name = ''.join(['_'.join([path.stem, str(n), 'bbox']), path.suffix])
            bbox_image = normalizer(bbox_image)
            cv2.imwrite(str(path_test_ocr / bbox_image_file_name), bbox_image)

	nonzero(Tensor input, *, Tensor out)
Consider using one of the following signatures instead:
	nonzero(Tensor input, *, bool as_tuple)
100%|██████████| 1594/1594 [06:14<00:00,  4.25it/s]


In [6]:
with open(path_test_ocr / 'test_plates_filenames.json', 'w') as f:
    json.dump(test_plates_filenames, f)

In [5]:
model = None
preds = None
gc.collect()
torch.cuda.empty_cache()

## Recognition

In [20]:
crnn = CRNN(rnn_bidirectional=True)

transformations = transforms.Compose([
    Resize(),
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                 std=[0.229, 0.224, 0.225])
                    ])

batch_size = 64
num_workers = 4

test_ocr_dataset = RecognitionDataset('test_ocr_data', transformations, crnn.alphabet, 'test')
test_ocr_dataloader = torch.utils.data.DataLoader(test_ocr_dataset, 
                                                  batch_size=batch_size, shuffle=False,
                                                  num_workers=num_workers, pin_memory=True, 
                                                  drop_last=False, collate_fn=collate_fn_recognition_test)

In [21]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

with open('Recognition_model_with_generated_test.pth', 'rb') as fp:
    state_dict = torch.load(fp, map_location="cpu")
crnn.load_state_dict(state_dict)
crnn.to(device)
crnn.eval()
print('Recognition model Loaded')

Recognition model Loaded


In [22]:
filenames_list = []
text_pred = []
text_conf = []
confidence = []
max_prob = []
min_prob = []

submission_preds = {}
lm = LanguageModel()

for batch in tqdm.tqdm(test_ocr_dataloader):
    with torch.no_grad():
        preds = crnn(batch['image'].to(device))
        preds_bbox = crnn(batch['image_bbox'].to(device))
    preds = preds + preds_bbox
    probs = fnn.softmax(preds, dim=2)
    preds_with_confidence = [beam_search(pred, crnn.alphabet, beam_width=20, lm=lm, alpha=0.3, beta=4) for pred in probs.permute(1, 0, 2).cpu().data.numpy()]
    texts_pred = [a[0] for a in preds_with_confidence]
    batch_confidence = [a.item() for a in probs.permute(1, 0, 2).std(dim=2).mean(dim=1)]
    
    filenames = batch['file_name']
    for filename, text, conf_score in zip(filenames, texts_pred, batch_confidence):
        test_file_name, num = filename.stem.split('_')
        test_file_name = ''.join(['test/', test_file_name, filename.suffix])
        if test_file_name not in submission_preds:
            submission_preds[test_file_name] = {}
        submission_preds[test_file_name][int(num)] = (text, conf_score)

100%|██████████| 55/55 [05:56<00:00,  6.47s/it]


In [23]:
CONF_THRESHOLD = 0.201

submission_dict = defaultdict(str)
for key in submission_preds:
    sorted_keys = sorted(submission_preds[key].keys())
    if len(sorted_keys) > 1:
        submission_dict[key] = ' '.join([submission_preds[key][k][0] 
                                             for k in sorted_keys if submission_preds[key][k][1] > CONF_THRESHOLD])
    else:
        submission_dict[key] = submission_preds[key][sorted_keys[0]][0]

In [24]:
submission = pd.read_csv('submission.csv')
submission['plates_string'] = submission.file_name.apply(lambda x: submission_dict[x])
submission

Unnamed: 0,file_name,plates_string
0,test/0.jpg,O195KC96
1,test/1.jpg,O001OO24 O005OO29
2,test/2.jpg,H030MB33
3,test/3.jpg,C139AP96
4,test/4.bmp,B955ET35
...,...,...
3183,test/3183.jpg,M143ME27
3184,test/3184.jpg,X411AX01 P481HC93
3185,test/3185.bmp,B692KT35
3186,test/3186.bmp,A184XE38


In [25]:
submission[submission.plates_string == '']

Unnamed: 0,file_name,plates_string


In [26]:
submission[submission.plates_string.isna()]

Unnamed: 0,file_name,plates_string


In [31]:
submission.to_csv('bidir_lm_with_conf_normalized.csv', index=False)