In [1]:
# Specify device
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]="true"
import tensorflow as tf
#tf.compat.v1.disable_eager_execution()

In [2]:
import importlib
import cv2
import json
import os
import sys

# change this property
NOMEROFF_NET_DIR = os.path.abspath('/var/www/nomeroff-net')
sys.path.append(NOMEROFF_NET_DIR)

In [3]:
def compare(model_path="../../../models/anpr_ocr_ua_2020_12_22_tensorflow_v2a_5.h5", 
           text_detector_name = "eu_ua_2004_2015",
           img_format = "png",
           root_dir='/var/www/nomeroff-net/datasets/ocr/ua/train',
           predicted_part_size=10000,
           acc_less_than = 0.7,
           replace_tamplate = {'moderation': {'isModerated': 1, 'moderatedBy': 'ApelSYN'}}):
    text_detector_module = importlib.import_module("NomeroffNet.TextDetectors."+text_detector_name)
    text_detector = getattr(text_detector_module, text_detector_name)()
    text_detector.load(model_path)

    ann_dir = os.path.join(root_dir, "ann")
    jsons = []
    jsons_paths = []
    for dirName, subdirList, fileList in os.walk(ann_dir):
        for fname in fileList:
            fname = os.path.join(ann_dir, fname)
            jsons_paths.append(fname)
            with open(fname) as jsonF:
                jsonData = json.load(jsonF)
            jsons.append(jsonData)
    print("LOADED {} ANNOTATIONS".format(len(jsons)))

    img_dir = os.path.join(root_dir, "img")
    imgs = []                
    for j in jsons:
        img_path =os.path.join(img_dir, "{}.{}".format(j["name"], img_format))
        img = cv2.imread(img_path)
        imgs.append(img)
    print("LOADED {} IMAGES".format(len(imgs)))

    predicted = []
    accs      = []
    N = int(len(imgs) / predicted_part_size) + 1
    for i in range(N):
        print("Processing part {} ...".format(i))
        part           = i*predicted_part_size
        #decoded        = [jsonData["moderation"]["predicted"] for jsonData in jsons[part:part+predicted_part_size]]
        decoded        = [jsonData["description"] for jsonData in jsons[part:part+predicted_part_size]]
        part_imgs      = imgs[part:part+predicted_part_size]
        predicted_part, net_out_value_part = text_detector.predict(part_imgs, return_acc=True)
        predicted     += predicted_part
        
        
        # get accuracy
        if acc_less_than >= 1:
            # not process acc
            accs  += [1 for _predicted in predicted_part]
            continue
        # process accuracy
        acc_part = []
        for _predicted, _net_out_value in zip(predicted_part, net_out_value_part):
            acc_part.append(text_detector.get_acc([_net_out_value], [_predicted]))
        accs  += acc_part
        

    print("PREDICTED {} IMAGES".format(len(predicted)))

    err_cnt = 0
    for i in range(len(jsons_paths)):
        json_path      = jsons_paths[i]
        predicted_item = predicted[i]
        jsonData       = jsons[i]
        acc            = accs[i]
        jsonData["moderation"]["predicted"] = predicted_item
        
        #print(jsonData["description"],  jsonData["moderation"]["predicted"])
        if jsonData["description"] == jsonData["moderation"]["predicted"] and acc > acc_less_than: 
            #jsonData.update(replace_tamplate)
            jsonData["moderation"]["isModerated"] = 1
        else:
            print("Predicted '{}' with acc {}, real: '{}' in file {}".format(
                jsonData["moderation"]["predicted"], 
                acc,
                jsonData["description"], 
                json_path))
            err_cnt = err_cnt+1
            jsonData["moderation"]["isModerated"] = 0
        with open(json_path, "w", encoding='utf8') as jsonWF:
            json.dump(jsonData, jsonWF,  ensure_ascii=False)
    
    
#     for i in range(len(jsons_paths)):
#         json_path      = jsons_paths[i]
#         predicted_item = predicted[i]
#         jsonData       = jsons[i]
#         acc            = accs[i]
#         jsonData["moderation"]["predicted"] = predicted_item
        
#         #print(jsonData["description"],  jsonData["moderation"]["predicted"])
#         if jsonData["description"] == jsonData["moderation"]["predicted"] and acc > acc_less_than :
#             #jsonData.update(replace_tamplate)
#             jsonData["moderation"]["isModerated"] = 1
#         else:
#             print("Predicted '{}' with acc {}, real: '{}' in file {}".format(
#                 jsonData["moderation"]["predicted"], 
#                 acc,
#                 jsonData["description"], 
#                 json_path))
#             err_cnt = err_cnt+1

#         #jsonData["description"] = predicted_item
            
#         with open(json_path, "w", encoding='utf8') as jsonWF:
#             json.dump(jsonData, jsonWF,  ensure_ascii=False)

    print("Error detection count: {}".format(err_cnt))
    print("Accuracy: {}".format(1-err_cnt/len(predicted)))
    #print(accs)

In [4]:
compare()

LOADED 104275 ANNOTATIONS
LOADED 104275 IMAGES
Processing part 0 ...
Processing part 1 ...
Processing part 2 ...
Processing part 3 ...
Processing part 4 ...
Processing part 5 ...
Processing part 6 ...
Processing part 7 ...
Processing part 8 ...
Processing part 9 ...
Processing part 10 ...
PREDICTED 104275 IMAGES
Predicted 'CB8980BX' with acc [[0.88469154]], real: 'CB8980BI' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/CB8980BI_3272.json
Predicted 'KA3256AB' with acc [[0.8262077]], real: 'KA3056AB' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/KA3096AB.json
Predicted 'AI2139IK' with acc [[0.6140448]], real: 'AI2139IK' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/AI2139IK_35548.json
Predicted 'AA3584X' with acc [[0.96865714]], real: 'AA3584IX' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/AA3584IX_69802.json
Predicted 'AH6790IK' with acc [[0.333175]], real: 'AH6790IK' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/AH6790IK_6900.json
Pred

Predicted 'AH9989HHB' with acc [[-1.716671]], real: 'AH9989HB' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/AH9989HB_4.json
Predicted 'BA6638BT' with acc [[0.5568584]], real: 'BA6638BT' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/BA6638BT_522.json
Predicted 'AA09701MC' with acc [[-14.8634615]], real: 'AA0970MC' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/AA0970MC_74751.json
Predicted 'AT3008CH' with acc [[0.583333]], real: 'AT3008CH' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/AT3008CH_0.json
Predicted 'KA1625AC' with acc [[0.824788]], real: 'KA1625AO' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/KA1625AO.json
Predicted 'AH7603KP' with acc [[0.855462]], real: 'AH7603KH' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/AH7603KH_0.json
Predicted 'A8706TA' with acc [[0.73190963]], real: 'AA8706TA' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/AA8706TA_70489.json
Predicted 'AE7429I' with acc [[0.33893824]], real: 'A

Predicted 'BA5262BM' with acc [[0.56356835]], real: 'BA5262BK' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/BA5262BK_65233.json
Predicted 'AC6065BK' with acc [[0.1537736]], real: 'AC6065BA' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/AC6065BA_73622.json
Predicted 'AC9902CE' with acc [[0.48212653]], real: 'AC9902CE' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/AC9902CE_41157.json
Predicted 'BH0649IAI' with acc [[-2.7807133]], real: 'BH0649AI' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/BH0649AI_53991.json
Predicted 'CE31537AK' with acc [[-14.440582]], real: 'CE3153AK' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/CE3153AK_20035.json
Predicted 'AH1253KT' with acc [[0.6943684]], real: 'AH1253KT' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/AH1253KT_37730.json
Predicted 'BH6071CO' with acc [[0.70755684]], real: 'BH6077CO' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/BH6077CO_42043.json
Predicted 'BI1711CT' with a

Predicted 'AB8589COX' with acc [[-12.19175]], real: 'AB8589CX' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/AB8589CX_47802.json
Predicted 'AT8835AB' with acc [[0.64584434]], real: 'AT8835AB' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/AT8835AB_37300.json
Predicted 'ACA5509CH' with acc [[-17.984373]], real: 'CA5509CH' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/CA5509CH_17472.json
Predicted 'BC4740CI' with acc [[0.9832799]], real: 'BK4740CI' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/BK4740CI_18158.json
Predicted 'KA8000O' with acc [[0.55225015]], real: 'KA8000OO' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/KA8000CO.json
Predicted 'CE3004AM' with acc [[0.9931608]], real: 'CE3004AK' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/CE3004AK_32152.json
Predicted 'AX3333AA' with acc [[0.9076431]], real: 'AA3333AA' in file /var/www/nomeroff-net/datasets/ocr/ua/train/ann/AA3333AA_57623.json
Predicted 'AH3170KI' with acc [[0.87