In [1]:
# Specify device
import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""

In [30]:
from collections import Counter
import cv2
import json
import os
import re
import sys
from matplotlib import pyplot as plt
import multiprocessing

# change this property
NOMEROFF_NET_DIR = os.path.abspath('../../')
sys.path.append(NOMEROFF_NET_DIR)

from NomeroffNet.tools import modelhub

In [44]:
plt.rcParams["figure.figsize"] = (10, 10)

In [45]:
%matplotlib inline 

In [3]:
def get_datasets(names = None, states = None):
    if names is None:
        names = [
            "EuUaFrom2004",
            "EuUa1995",
            "Eu",
            "Ru",
            "Kz",
            "Ge",
            "By",
            "Su",
            "Kg",
            "Am",
        ]
    if states is None:
        states = [
            "train",
            "test",
            "val"
        ]
    
    datasets = {}
    for name in names:
        info = modelhub.download_dataset_for_model(name)
        print(name, info["dataset_path"])
        for state in states:
            datasets[(name, state)] = os.path.join(info["dataset_path"], state)
    return datasets

In [4]:
def read_json(fname):
    with open(fname) as jsonF:
        json_data = json.load(jsonF)
    return fname, json_data

In [5]:
def read_annotations(root_dir, processes=10):
    ann_dir = os.path.join(root_dir, "ann")
    jsons_paths = []
    for dirName, subdirList, fileList in os.walk(ann_dir):
        for fname in fileList:
            fname = os.path.join(ann_dir, fname)
            jsons_paths.append(fname)
    with multiprocessing.Pool(processes=processes) as pool:
        results = pool.map(read_json, jsons_paths)
    jsons = {}
    for (fname, json_data) in results:
        jsons[fname] = json_data
    return jsons

In [13]:
def find_all_datset_format(annotations):
    formats_counter = Counter()
    for fanme in annotations:
        json_data = annotations[fanme]
        numberplate_format = json_data["description"].lower()
        numberplate_format = re.sub(r"[0-9]", "#", numberplate_format)  # number
        numberplate_format = re.sub(r"[a-z]", "@", numberplate_format)  # letter
        numberplate_format = re.sub(r"[а-я]", "@", numberplate_format)  # letter
        numberplate_format = re.sub(r"[їіёъ]", "@", numberplate_format)  # letter
        formats_counter[numberplate_format] += 1
    return formats_counter.most_common()

In [42]:
def print_datset_format(annotations, ann_format):
    for fname in annotations:
        json_data = annotations[fname]
        numberplate_format = json_data["description"].lower()
        numberplate_format = re.sub(r"[0-9]", "#", numberplate_format)  # number
        numberplate_format = re.sub(r"[a-z]", "@", numberplate_format)  # letter
        numberplate_format = re.sub(r"[а-я]", "@", numberplate_format)  # letter
        numberplate_format = re.sub(r"[їіёъ]", "@", numberplate_format)  # letter
        if ann_format == numberplate_format:
            print("\t\t\t", fname, json_data)
            img_path = os.path.dirname(fname)
            img_path = img_path.replace("ann", "img")
            img_path = os.path.join(img_path, f"{json_data['name']}.png")
            img = cv2.imread(img_path)
            plt.imshow(img)
            plt.plot()

In [15]:
datasets = get_datasets(names = [
                                    "Su",
                                    "EuUaFrom2004",
                                    "EuUa1995",
                                    "Eu",
                                    "Ru",
                                    "Kz",
                                    "Ge",
                                    "By",
                                    "Kg",
                                    "Am",
                                ])

Su /mnt/store/nomeroff-net/nomeroff-net/NomeroffNet/tools/../../data/./dataset/TextDetector/Su/autoriaNumberplateOcrSu-2021-08-27
EuUaFrom2004 /mnt/store/nomeroff-net/nomeroff-net/NomeroffNet/tools/../../data/./dataset/TextDetector/EuUaFrom2004/autoriaNumberplateOcrUa-2021-08-25
EuUa1995 /mnt/store/nomeroff-net/nomeroff-net/NomeroffNet/tools/../../data/./dataset/TextDetector/EuUa1995/autoriaNumberplateOcrUa-1995-2021-08-25
Eu /mnt/store/nomeroff-net/nomeroff-net/NomeroffNet/tools/../../data/./dataset/TextDetector/Eu/autoriaNumberplateOcrEu-2020-10-09
Ru /mnt/store/nomeroff-net/nomeroff-net/NomeroffNet/tools/../../data/./dataset/TextDetector/Ru/autoriaNumberplateOcrRu-2021-09-01
Kz /mnt/store/nomeroff-net/nomeroff-net/NomeroffNet/tools/../../data/./dataset/TextDetector/Kz/autoriaNumberplateOcrKz-2019-04-26
Ge /mnt/store/nomeroff-net/nomeroff-net/NomeroffNet/tools/../../data/./dataset/TextDetector/Ge/autoriaNumberplateOcrGe-2019-07-06
By /mnt/store/nomeroff-net/nomeroff-net/NomeroffNet/t

In [84]:
dataset = "/mnt/store/nomeroff-net/nomeroff-net/NomeroffNet/tools/../../data/./dataset/TextDetector/Su/autoriaNumberplateOcrSu-2021-08-27/train"
annotations = read_annotations(dataset, processes=100)


In [86]:
print_datset_format(annotations, "@###@@")

In [16]:
for key in datasets:
    dataset = datasets[key]
    print("\n\n[DATSET]", dataset, ":")
    annotations = read_annotations(dataset)
    formats = find_all_datset_format(annotations)
    all_count = sum([count for ann_format, count in formats])
    for ann_format, count in formats:
        print("\t", ann_format, "\t\t", count)



[DATSET] /mnt/store/nomeroff-net/nomeroff-net/NomeroffNet/tools/../../data/./dataset/TextDetector/Su/autoriaNumberplateOcrSu-2021-08-27/train :
	 @####@@ 		 32739
	 ####@@@ 		 1543
	 @@@#### 		 83
	 @###@@ 		 1


[DATSET] /mnt/store/nomeroff-net/nomeroff-net/NomeroffNet/tools/../../data/./dataset/TextDetector/Su/autoriaNumberplateOcrSu-2021-08-27/test :
	 @####@@ 		 1032
	 ####@@@ 		 67


[DATSET] /mnt/store/nomeroff-net/nomeroff-net/NomeroffNet/tools/../../data/./dataset/TextDetector/Su/autoriaNumberplateOcrSu-2021-08-27/val :
	 @####@@ 		 1756
	 ####@@@ 		 118


[DATSET] /mnt/store/nomeroff-net/nomeroff-net/NomeroffNet/tools/../../data/./dataset/TextDetector/EuUaFrom2004/autoriaNumberplateOcrUa-2021-08-25/train :
	 @@####@@ 		 104475


[DATSET] /mnt/store/nomeroff-net/nomeroff-net/NomeroffNet/tools/../../data/./dataset/TextDetector/EuUaFrom2004/autoriaNumberplateOcrUa-2021-08-25/test :
	 @@####@@ 		 844


[DATSET] /mnt/store/nomeroff-net/nomeroff-net/NomeroffNet/tools/../../data/./

	 @@###@@ 		 427
	 @@#### 		 208
	 @@@### 		 29
	 @@###@# 		 15
	 @####@@ 		 14
	 @@##### 		 9
	 #####@@ 		 7
	 #@####@ 		 4
	 @@####@ 		 4
	 #@###@# 		 4
	 #@###@@ 		 4
	 @#####@ 		 3
	 ######@ 		 3
	 #@##### 		 3
	 @###### 		 2
	 @####@# 		 2
	 ####### 		 1


[DATSET] /mnt/store/nomeroff-net/nomeroff-net/NomeroffNet/tools/../../data/./dataset/TextDetector/By/autoriaNumberplateOcrBy-2021-08-27/train :
	 ####@@# 		 160640
	 @@##### 		 478
	 #@@#### 		 89
	 #####@# 		 10
	 ####@@@ 		 3
	 ####@## 		 2
	 @####@# 		 1
	 ####@@ 		 1
	 @@#### 		 1


[DATSET] /mnt/store/nomeroff-net/nomeroff-net/NomeroffNet/tools/../../data/./dataset/TextDetector/By/autoriaNumberplateOcrBy-2021-08-27/test :
	 ####@@# 		 1163
	 @@##### 		 42


[DATSET] /mnt/store/nomeroff-net/nomeroff-net/NomeroffNet/tools/../../data/./dataset/TextDetector/By/autoriaNumberplateOcrBy-2021-08-27/val :
	 ####@@# 		 812
	 @@##### 		 33


[DATSET] /mnt/store/nomeroff-net/nomeroff-net/NomeroffNet/tools/../../data/./dataset/TextDetec