In [1]:
# Specify device
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]="true"

In [2]:
import importlib
import cv2
import json
import os
import sys

# change this property
NOMEROFF_NET_DIR = os.path.abspath('/var/www/nomeroff-net')
sys.path.append(NOMEROFF_NET_DIR)

In [14]:
def compare(model_path="../../../models/anpr_ocr_su_2020_11_25_tensorflow_v2.3.h5", 
           text_detector_name = "su",
           img_format = "png",
           root_dir='/home/www/nomeroff-net/datasets/ocr/su/train',
           predicted_part_size=300,
           acc_less_than = 0.7,
           replace_tamplate = {'moderation': {'isModerated': 1, 'moderatedBy': 'ApelSYN'}}):
    text_detector_module = importlib.import_module("NomeroffNet.TextDetectors."+text_detector_name)
    text_detector = getattr(text_detector_module, text_detector_name)()
    text_detector.load(model_path)

    ann_dir = os.path.join(root_dir, "ann")
    jsons = []
    jsons_paths = []
    for dirName, subdirList, fileList in os.walk(ann_dir):
        for fname in fileList:
            fname = os.path.join(ann_dir, fname)
            jsons_paths.append(fname)
            with open(fname) as jsonF:
                jsonData = json.load(jsonF)
            jsons.append(jsonData)
    print("LOADED {} ANNOTATIONS".format(len(jsons)))

    img_dir = os.path.join(root_dir, "img")
    imgs = []                
    for j in jsons:
        img_path =os.path.join(img_dir, "{}.{}".format(j["name"], img_format))
        img = cv2.imread(img_path)
        imgs.append(img)
    print("LOADED {} IMAGES".format(len(imgs)))

    predicted = []
    accs      = []
    N = int(len(imgs) / predicted_part_size) + 1
    for i in range(N):
        part           = i*predicted_part_size
        decoded        = [jsonData["moderation"]["predicted"] for jsonData in jsons[part:part+predicted_part_size]]
        part_imgs      = imgs[part:part+predicted_part_size]
        predicted_part, net_out_value_part = text_detector.predict(part_imgs, return_acc=True)
        predicted     += predicted_part
        
        
        # get accuracy
        if acc_less_than >= 1:
            # not process acc
            accs  += [1 for _predicted in predicted_part]
            continue
        # process accuracy
        acc_part = []
        for _predicted, _net_out_value in zip(predicted_part, net_out_value_part):
            acc_part.append(text_detector.get_acc([_net_out_value], [_predicted]))
        accs  += acc_part
        

    print("PREDICTED {} IMAGES".format(len(predicted)))

    err_cnt = 0
    for i in range(len(jsons_paths)):
        json_path      = jsons_paths[i]
        predicted_item = predicted[i]
        jsonData       = jsons[i]
        acc            = accs[i]
        jsonData["moderation"]["predicted"] = predicted_item
        
        #print(jsonData["description"],  jsonData["moderation"]["predicted"])
        if jsonData["description"] == jsonData["moderation"]["predicted"] and acc > acc_less_than: 
            #jsonData.update(replace_tamplate)
            jsonData["moderation"]["isModerated"] = 1
        else:
            print("Predicted '{}' with acc {}, real: '{}' in file {}".format(
                jsonData["moderation"]["predicted"], 
                acc,
                jsonData["description"], 
                json_path))
            err_cnt = err_cnt+1
            jsonData["moderation"]["isModerated"] = 0
        with open(json_path, "w", encoding='utf8') as jsonWF:
            json.dump(jsonData, jsonWF,  ensure_ascii=False)
    
    
#     for i in range(len(jsons_paths)):
#         json_path      = jsons_paths[i]
#         predicted_item = predicted[i]
#         jsonData       = jsons[i]
#         acc            = accs[i]
#         jsonData["moderation"]["predicted"] = predicted_item
        
#         #print(jsonData["description"],  jsonData["moderation"]["predicted"])
#         if jsonData["description"] == jsonData["moderation"]["predicted"] and acc > acc_less_than :
#             #jsonData.update(replace_tamplate)
#             jsonData["moderation"]["isModerated"] = 1
#         else:
#             print("Predicted '{}' with acc {}, real: '{}' in file {}".format(
#                 jsonData["moderation"]["predicted"], 
#                 acc,
#                 jsonData["description"], 
#                 json_path))
#             err_cnt = err_cnt+1

#         #jsonData["description"] = predicted_item
            
#         with open(json_path, "w", encoding='utf8') as jsonWF:
#             json.dump(jsonData, jsonWF,  ensure_ascii=False)

    print("Error detection count: {}".format(err_cnt))
    print("Accuracy: {}".format(1-err_cnt/len(predicted)))
    #print(accs)

In [15]:
compare()

LOADED 33505 ANNOTATIONS
LOADED 33505 IMAGES
PREDICTED 33505 IMAGES
Predicted 'Ш2794ХА' with acc [[0.4629029]], real: 'Ц2794ХА' in file /home/www/nomeroff-net/datasets/ocr/su/train/ann/121777_Ц2794ХА_1_1.json
Predicted 'Р7692ЛС' with acc [[0.6622969]], real: 'Р7692ЛС' in file /home/www/nomeroff-net/datasets/ocr/su/train/ann/58543_Р7692ОС_0_1.json
Predicted 'О221ЯТ' with acc [[0.5748838]], real: 'І0221ЯТ' in file /home/www/nomeroff-net/datasets/ocr/su/train/ann/140108_I0221ЯТ_1_1.json
Predicted '5555ІОВ' with acc [[-0.06373346]], real: '5555ІОВ' in file /home/www/nomeroff-net/datasets/ocr/su/train/ann/90816_5555ІОВ_1_1.json
Predicted 'Е1073Л' with acc [[0.9729601]], real: 'Е1073ТЛ' in file /home/www/nomeroff-net/datasets/ocr/su/train/ann/123066_Е1073ТЛ_0_1.json
Predicted 'О12358АП' with acc [[-12.225859]], real: 'О2358АП' in file /home/www/nomeroff-net/datasets/ocr/su/train/ann/22529_О2358АИ_1_1.json
Predicted 'Б8175ЧЧД' with acc [[-14.705119]], real: 'Б8175ЧД' in file /home/www/nomerof

Predicted 'Л5344ДО' with acc [[0.9909779]], real: 'Л5344ДЦ' in file /home/www/nomeroff-net/datasets/ocr/su/train/ann/108546_Л5344ДЦ_0_1.json
Predicted '64100СЖЕ' with acc [[-14.320002]], real: '6410СЖЕ' in file /home/www/nomeroff-net/datasets/ocr/su/train/ann/26487_6417СЖЕ_1_1.json
Predicted 'Е0498МИ' with acc [[0.72410244]], real: 'Б0498МИ' in file /home/www/nomeroff-net/datasets/ocr/su/train/ann/100753_Б0498МИ_0_1.json
Error detection count: 68
Accuracy: 0.9979704521713177
