In [1]:
import os
import pickle
import pandas as pd
from tqdm import tqdm

from doctr.models import ocr_predictor
from doctr.io import DocumentFile

In [3]:
OCR_DATA_PATH  = './../../data/ocr/docbank/images/'
TXT_DATA_PATH  = './../../results/ocr/mobilenet/'
image_data_dir = os.listdir(OCR_DATA_PATH)

model = ocr_predictor(det_arch='db_mobilenet_v3_large', reco_arch='crnn_mobilenet_v3_large', pretrained=True)

### Available Pretrained
#1. db_resnet50
#2. db_mobilenet_v3_large

#1. crnn_vgg16_bn
#2. crnn_mobilenet_v3_large

In [None]:
results = {}

for image_file in tqdm(image_data_dir):
    doc = DocumentFile.from_images(OCR_DATA_PATH + image_file)
    result = model(doc)
    results[image_file] = result
    
# with open(TXT_DATA_PATH + 'results.pkl', 'wb') as outp:  # Overwrites any existing file.
#     pickle.dump(results, outp, pickle.HIGHEST_PROTOCOL)

 49%|███████████████████████████████████████████████████████████████████▌                                                                      | 49/100 [10:39<13:43, 16.14s/it]

In [None]:
with open('../results_mobilenet.pkl', 'rb') as f:
    results = pickle.load(f)

total = {}
for image,result in tqdm(results.items()):
    dim = tuple(reversed(result.pages[0].dimensions))
    predictions = []
    block_id = 0
    for block in result.pages[0].blocks:
        line_id = 0
        for line in block.lines:
            for word in line.words:
                values = []
                geo = word.geometry
                a = list(int(a*b) for a,b in zip(geo[0],dim))
                b = list(int(a*b) for a,b in zip(geo[1],dim))
                values.append(block_id)
                values.append(line_id)
                values.append(word.confidence)
                values.append(a[0])
                values.append(a[1])
                values.append(b[0])
                values.append(b[1])
                values.append(word.value)
                predictions.append(values)
            line_id += 1
        block_id += 1
    total[image] = predictions
    
    
for image,result in tqdm(total.items()):
    name = image[:len(image) - 4]
    df = pd.DataFrame(result, columns = ['block', 'line', 'confidence', 'X1', 'Y1', 'X2', 'Y2', 'token'])
    df.to_csv(TXT_DATA_PATH + 'txt/' + name + '.txt', sep=' ',index=False)