## docTR by mindee

[Github](https://github.com/mindee/doctr) +
[Documentation](https://mindee.github.io/doctr/modules/io.html#doctr.io.DocumentFile.from_pdf)

In [None]:
!pip install python-doctr[tf]

In [None]:
from doctr.io import DocumentFile
from doctr.models import ocr_predictor

In [None]:
model = ocr_predictor(pretrained=True)
# PDF
doc1 = [
    DocumentFile.from_pdf("BP66.pdf")[1]
]  # taking only the second page that contains OS and OD data
# Analyze
result = model(doc1)

Checking patient BP66

In [None]:
# JSON export
json_export = result.export()["pages"][0]
print(json_export)

In [None]:
def find_offset_value(json_data, target_word, offset, eye="OD"):
    """
    Retrieve a word from JSON that is n-th position after the target word in the same line
    """
    occurrence_count = 0
    for block in json_data["blocks"]:
        for line in block.get("lines", []):
            words = line["words"]
            for i, word in enumerate(words):
                if word["value"] == target_word:
                    occurrence_count += 1
                    # For 'OD', find the first occurrence
                    if eye == "OD" and occurrence_count == 1:
                        if i + offset < len(words):
                            return words[i + offset]["value"]
                    # For 'OS', find the second occurrence
                    elif eye == "OS" and occurrence_count == 2:
                        if i + offset < len(words):
                            return words[i + offset]["value"]
        return None

Checking for OD:

AL: 24.21 mm (SD = 0.02 mm, SNR = 220.5)

Checking for OS:

AL: 24.66 mm (SD = 0.05 mm, SNR = 148.6)

In [None]:
target_word = "AL:"
value_al_od = find_offset_value(json_export, target_word, offset=1, eye="OD")
print(value_al_od)

value_al_os = find_offset_value(json_export, target_word, offset=1, eye="OS")
print(value_al_os)

R1 and R2

In [None]:
target_word = "R1:"
value_r1_od = find_offset_value(json_export, target_word, offset=1, eye="OD")
print(value_r1_od)

value_r1_os = find_offset_value(json_export, target_word, offset=1, eye="OS")
print(value_r1_os)


target_word = "R2:"
value_r2_od = find_offset_value(json_export, target_word, offset=1, eye="OD")
print(value_r2_od)

value_r2_os = find_offset_value(json_export, target_word, offset=1, eye="OS")
print(value_r2_os)

For Cylinder value, -0.44 is captured correctly, but 54° is predicted as 540

In [None]:
target_word = "Cyl.:"
value_cyl_od = find_offset_value(json_export, target_word, offset=1, eye="OD")
print(value_cyl_od)

value_ = find_offset_value(json_export, target_word, offset=4, eye="OD")
print(value_)

In [None]:
target_word = "R:"
value_r_od = find_offset_value(json_export, target_word, offset=1, eye="OD")
print(value_r_od)

value_r_os = find_offset_value(json_export, target_word, offset=1, eye="OS")
print(value_r_os)

In [None]:
results_OD = {
    "AL": value_al_od,
    "R1": value_r1_od,
    "R2": value_r2_od,
    "Cyl": value_cyl_od,
    "R": value_r_od,
}

results_OD