In [1]:
# Specify device
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]="true"

In [2]:
import importlib
import cv2
import json
import os
import sys

# change this property
NOMEROFF_NET_DIR = os.path.abspath('/var/www/nomeroff-net23')
sys.path.append(NOMEROFF_NET_DIR)

In [3]:
def compare(model_path="/var/www/nomeroff-net23/models/anpr_ocr_by_2021_08_28_tensorflow_v2.3_12.h5", 
           text_detector_name = "by",
           img_format = "png",
           root_dir='/var/www/nomeroff-net/datasets/ocr/by/train',
           predicted_part_size=10000,
           acc_less_than = 0.7,
           replace_tamplate = {'moderation': {'isModerated': 1, 'moderatedBy': 'ApelSYN'}}):
    text_detector_module = importlib.import_module("NomeroffNet.TextDetectors."+text_detector_name)
    text_detector = getattr(text_detector_module, text_detector_name)
    text_detector.load(model_path)

    ann_dir = os.path.join(root_dir, "ann")
    jsons = []
    jsons_paths = []
    for dirName, subdirList, fileList in os.walk(ann_dir):
        for fname in fileList:
            fname = os.path.join(ann_dir, fname)
            jsons_paths.append(fname)
            with open(fname) as jsonF:
                jsonData = json.load(jsonF)
                if not ("predicted" in jsonData["moderation"]):
                    jsonData["moderation"]["predicted"] = ""
            jsons.append(jsonData)
    print("LOADED {} ANNOTATIONS".format(len(jsons)))

    img_dir = os.path.join(root_dir, "img")
    imgs = []
    dim = (128, 64)
    for j in jsons:
        img_path =os.path.join(img_dir, "{}.{}".format(j["name"], img_format))
        img = cv2.imread(img_path)
        if img is None:
            print(img_path)
        #print('img:',img.shape)
        if img.shape[0] > 64:
            img_crop = cv2.resize(img, dim, interpolation = cv2.INTER_AREA) 
        else:
            img_crop = img
        if len(imgs) % 1000 == 0:
            print("Loaded {} photos".format(len(imgs)))
        #print('img_rop:',img_crop.shape)
        imgs.append(img_crop)
    print("LOADED {} IMAGES".format(len(imgs)))

    predicted = []
    accs      = []
    N = int(len(imgs) / predicted_part_size) + 1
    for i in range(N):
        part           = i*predicted_part_size
        decoded        = [jsonData["moderation"]["predicted"] for jsonData in jsons[part:part+predicted_part_size]]
        part_imgs      = imgs[part:part+predicted_part_size]
        predicted_part, net_out_value_part = text_detector.predict(part_imgs, return_acc=True)
        predicted     += predicted_part
        
        
        # get accuracy
        if acc_less_than >= 1:
            # not process acc
            accs  += [1 for _predicted in predicted_part]
            continue
        # process accuracy
        acc_part = []
        for _predicted, _net_out_value in zip(predicted_part, net_out_value_part):
            acc_part.append(text_detector.get_acc([_net_out_value], [_predicted]))
        accs  += acc_part
        

    print("PREDICTED {} IMAGES".format(len(predicted)))

    err_cnt = 0
    for i in range(len(jsons_paths)):
        json_path      = jsons_paths[i]
        predicted_item = predicted[i]
        jsonData       = jsons[i]
        acc            = accs[i]
        jsonData["moderation"]["predicted"] = predicted_item
        
        #print(jsonData["description"],  jsonData["moderation"]["predicted"])
        if jsonData["description"] == jsonData["moderation"]["predicted"] and acc > acc_less_than: 
            #jsonData.update(replace_tamplate)
            jsonData["moderation"]["isModerated"] = 1
        else:
            print("Predicted '{}' with acc {}, real: '{}' in file {}".format(
                jsonData["moderation"]["predicted"], 
                acc,
                jsonData["description"], 
                json_path))
            if (jsonData["moderation"]["moderatedBy"] != "Oleg Cherniy") and (jsonData["moderation"]["moderatedBy"] != "ApelSYNML"):
            #if jsonData["moderation"]["moderatedBy"] != "Oleg Cherniy":
                err_cnt = err_cnt+1
                jsonData["moderation"]["isModerated"] = 0
        with open(json_path, "w", encoding='utf8') as jsonWF:
            json.dump(jsonData, jsonWF,  ensure_ascii=False)
    
    
#     for i in range(len(jsons_paths)):
#         json_path      = jsons_paths[i]
#         predicted_item = predicted[i]
#         jsonData       = jsons[i]
#         acc            = accs[i]
#         jsonData["moderation"]["predicted"] = predicted_item
        
#         #print(jsonData["description"],  jsonData["moderation"]["predicted"])
#         if jsonData["description"] == jsonData["moderation"]["predicted"] and acc > acc_less_than :
#             #jsonData.update(replace_tamplate)
#             jsonData["moderation"]["isModerated"] = 1
#         else:
#             print("Predicted '{}' with acc {}, real: '{}' in file {}".format(
#                 jsonData["moderation"]["predicted"], 
#                 acc,
#                 jsonData["description"], 
#                 json_path))
#             err_cnt = err_cnt+1

#         #jsonData["description"] = predicted_item
            
#         with open(json_path, "w", encoding='utf8') as jsonWF:
#             json.dump(jsonData, jsonWF,  ensure_ascii=False)

    print("Error detection count: {}".format(err_cnt))
    print("Accuracy: {}".format(1-err_cnt/len(predicted)))
    #print(accs)

In [4]:
compare()

LOADED 161177 ANNOTATIONS
Loaded 0 photos
Loaded 1000 photos
Loaded 2000 photos
Loaded 3000 photos
Loaded 4000 photos
Loaded 5000 photos
Loaded 6000 photos
Loaded 7000 photos
Loaded 8000 photos
Loaded 9000 photos
Loaded 10000 photos
Loaded 11000 photos
Loaded 12000 photos
Loaded 13000 photos
Loaded 14000 photos
Loaded 15000 photos
Loaded 16000 photos
Loaded 17000 photos
Loaded 18000 photos
Loaded 19000 photos
Loaded 20000 photos
Loaded 21000 photos
Loaded 22000 photos
Loaded 23000 photos
Loaded 24000 photos
Loaded 25000 photos
Loaded 26000 photos
Loaded 27000 photos
Loaded 28000 photos
Loaded 29000 photos
Loaded 30000 photos
Loaded 31000 photos
Loaded 32000 photos
Loaded 33000 photos
Loaded 34000 photos
Loaded 35000 photos
Loaded 36000 photos
Loaded 37000 photos
Loaded 38000 photos
Loaded 39000 photos
Loaded 40000 photos
Loaded 41000 photos
Loaded 42000 photos
Loaded 43000 photos
Loaded 44000 photos
Loaded 45000 photos
Loaded 46000 photos
Loaded 47000 photos
Loaded 48000 photos
Loaded 

Predicted '7199EA5' with acc [[    0.99342]], real: '2199EA5' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/12462_2089EAB_0.json
Predicted '2776IM1' with acc [[    0.55086]], real: '2776IM1' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/2776IM1.json
Predicted '9100BX7' with acc [[      0.351]], real: '9100BX1' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/9100BX1.json
Predicted '7173IK3' with acc [[    0.30118]], real: '7173IK3' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/7173IK3.json
Predicted '7264XK5' with acc [[    0.43912]], real: '7264KK5' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/7527_72666K5_0.json
Predicted '2222IXO7' with acc [[    0.55731]], real: '2222XO7' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/2222XO7_2222IO7.json
Predicted '3073HT7' with acc [[    0.15104]], real: '3073HT7' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/19245_30739TT_0.json
Predicted '3276EH4' with acc [[     0.3846]], real:

Predicted '7732AT6' with acc [[    0.76252]], real: '7733AT6' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/7733AT6_2733AI6.json
Predicted '3694BE3' with acc [[    0.45968]], real: '3594BE3' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/3594BE3.json
Predicted '0007BC2' with acc [[    0.95058]], real: '0007BO2' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/0007BO2_0007BC2.json
Predicted '1138BX6' with acc [[    0.51848]], real: '1138BX6' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/1138BX6.json
Predicted '72003BK3' with acc [[    0.58303]], real: '2003BK3' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/2003BK3.json
Predicted '4339AA4' with acc [[    0.99987]], real: '4339AA1' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/4339AA1_.json
Predicted '5391IX3' with acc [[    0.43991]], real: '5391IK3' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/5391IK3_.json
Predicted '0983AH3' with acc [[    0.10237]], real: '0983AH3' in

Predicted '55471EX3' with acc [[    0.63052]], real: '5541EX3' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/5541EX3.json
Predicted '27256OI7' with acc [[   0.051563]], real: '2256OI7' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/2256OI7.json
Predicted '694MK4' with acc [[    0.91288]], real: '1694MK4' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/1694MK4.json
Predicted '7789CP7' with acc [[    0.59804]], real: '7788CP7' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/2385_2788CPU_0.json
Predicted '8828BX5' with acc [[    0.64295]], real: '8828BX5' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/8828BX7_.json
Predicted '0613AE1' with acc [[     0.6285]], real: '6613AE1' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/6613AE1_.json
Predicted '5096EH5' with acc [[    0.80803]], real: '5096EH6' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/5096EH6_5090EH6.json
Predicted '3389HC7' with acc [[    0.82165]], real: '3889HC7' in 

Predicted '127HA7' with acc [[     0.4617]], real: '1271HA7' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/1050_1271HAT_0.json
Predicted 'AM96494' with acc [[   0.065146]], real: 'AM96495' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/4430_AM35495_0.json
Predicted '4007TP7' with acc [[    0.96163]], real: '4007IP7' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/4007IP7.json
Predicted '0857HM7' with acc [[    0.35558]], real: '0857HM7' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/15620_035707_0.json
Predicted '9849EX7' with acc [[    0.99988]], real: '9849EX5' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/9849EX5_9849EX7.json
Predicted '7037IE7' with acc [[    0.97119]], real: '7007IE7' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/7007IE7.json
Predicted '8192MT7' with acc [[    0.32321]], real: '8192IT7' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/2559_8192ITX_0.json
Predicted '50560BE1' with acc [[    0.78353]], r

Predicted '93305MC7' with acc [[    0.66763]], real: '9305MC7' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/4323_AP9858CT_0.json
Predicted '4948OC7' with acc [[    0.99952]], real: '4948CC7' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/4948CC7.json
Predicted '8028ITB7' with acc [[   0.019365]], real: '8028IB7' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/4957_B029B2_0.json
Predicted 'AP6317' with acc [[    0.32035]], real: 'AP63117' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/5327_AP61PX_0.json
Predicted '5410AA2' with acc [[     0.5097]], real: '5410AA2' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/5410AA2.json
Predicted '5419BO5' with acc [[    0.66141]], real: '5419BO7' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/5419BO7.json
Predicted '0655PB7' with acc [[    0.71525]], real: '9655PB7' in file /var/www/nomeroff-net/datasets/ocr/by/train/ann/5479_8655PB7_0.json
Predicted '5693MA7' with acc [[    0.77599]], real: '16