In [2]:
import easyocr
import psutil
import time
import pytesseract
import GPUtil
from PIL import Image
import pandas as pd

In [26]:
# Tesseract OCR
def run_tesseract(image_path):
    img = Image.open(image_path)
    return pytesseract.image_to_string(img)

In [27]:
def run_easyocr_gpu(image_path):
    reader = easyocr.Reader(['en'])
    return reader.readtext(image_path)

def run_easyocr_cpu(image_path):
    reader = easyocr.Reader(['en'], gpu=False)
    return reader.readtext(image_path)

In [None]:
def monitor_gpu_usage():
    gpus = GPUtil.getGPUs()
    for gpu in gpus:
        return gpu.id, gpu.load* 100, gpu.memoryUsed

In [42]:
def track_resources(ocr_function, **args):
    process = psutil.Process()
    start_time = time.time()
    ocr_function(**args)
    end_time = time.time()
    memory_usage = process.memory_info().rss / 1024 / 1024  # in MB
    cpu_percent = process.cpu_percent(interval=1)
    return end_time - start_time, memory_usage, cpu_percent

In [43]:
image_paths = ["test_images/testocr.png",
               "test_images/distorted_text.jpg",
               "test_images/largepreview.png",
               "test_images/sample1.a36a230755dc.jpg",
               "test_images/Handwriting-test-dataset-for-OCR-operation.png",
               "test_images/mixed_fonts.jpg",
               "test_images/low_contrast.jpg",
               "test_images/computer-vision.jpg"]

## Tesseract

In [36]:
all_time = 0
all_memory = 0
all_cpu = 0

for image in image_paths:
    # Tesseract results
    time_tess, memory_tess, cpu_tess = track_resources(ocr_function=run_tesseract, image_path=image)
    print(f"Tesseract - Time: {time_tess}, Memory: {memory_tess}, CPU: {cpu_tess}")

    all_time += time_tess
    all_memory += memory_tess
    all_cpu += cpu_tess


Tesseract - Time: 0.4038863182067871, Memory: 1008.734375, CPU: 0.0
Tesseract - Time: 0.32141709327697754, Memory: 1009.109375, CPU: 0.0
Tesseract - Time: 1.4074363708496094, Memory: 1009.109375, CPU: 0.0
Tesseract - Time: 1.130180835723877, Memory: 1009.109375, CPU: 0.0
Tesseract - Time: 0.43622303009033203, Memory: 1009.109375, CPU: 0.0
Tesseract - Time: 0.459735631942749, Memory: 1009.109375, CPU: 0.0
Tesseract - Time: 0.28943586349487305, Memory: 1009.109375, CPU: 0.0
Tesseract - Time: 0.7870125770568848, Memory: 1009.109375, CPU: 0.0


In [38]:
print(f"average_time: {all_time/8}, average_memory: {all_memory/8}, average_cpu: {all_cpu/8}")

average_time: 0.6544159650802612, average_memory: 1009.0625, average_cpu: 0.0


## Easyocr with CPU

In [39]:
all_time = 0
all_memory = 0
all_cpu = 0

for image in image_paths:
    # easyocr (cpu) results
    time_tess, memory_tess, cpu_tess = track_resources(ocr_function=run_easyocr_cpu, image_path=image)
    print(f"EasyOCR (CPU) - Time: {time_tess}, Memory: {memory_tess}, CPU: {cpu_tess}")

    all_time += time_tess
    all_memory += memory_tess
    all_cpu += cpu_tess

print(f"average_time: {all_time/8}, average_memory: {all_memory/8}, average_cpu: {all_cpu/8}")

Using CPU. Note: This module is much faster with a GPU.
Using CPU. Note: This module is much faster with a GPU.


EasyOCR (CPU) - Time: 5.899929523468018, Memory: 1176.67578125, CPU: 4.0


Using CPU. Note: This module is much faster with a GPU.


EasyOCR (CPU) - Time: 5.177687406539917, Memory: 1157.546875, CPU: 3.0


Using CPU. Note: This module is much faster with a GPU.


EasyOCR (CPU) - Time: 18.357693672180176, Memory: 1195.875, CPU: 8.0


Using CPU. Note: This module is much faster with a GPU.


EasyOCR (CPU) - Time: 6.388930320739746, Memory: 1241.10546875, CPU: 4.0


Using CPU. Note: This module is much faster with a GPU.


EasyOCR (CPU) - Time: 6.693471431732178, Memory: 1439.6171875, CPU: 7.0


Using CPU. Note: This module is much faster with a GPU.


EasyOCR (CPU) - Time: 6.663943290710449, Memory: 1486.33203125, CPU: 7.0


Using CPU. Note: This module is much faster with a GPU.


EasyOCR (CPU) - Time: 5.065858840942383, Memory: 1487.31640625, CPU: 7.0
EasyOCR (CPU) - Time: 13.13253378868103, Memory: 1529.15234375, CPU: 4.0
average_time: 8.422506034374237, average_memory: 1339.20263671875, average_cpu: 5.5


## EasyOCR (GPU)

In [45]:
all_time = 0
all_memory = 0
all_cpu = 0
all_gpu_memory = 0
all_gpu_load = 0

for image in image_paths:
    # easyocr (cpu) results
    time_tess, memory_tess, cpu_tess = track_resources(ocr_function=run_easyocr_gpu, image_path=image)
    gpu_id, gpu_load, gpu_memory = monitor_gpu_usage()
    print(f"EasyOCR (GPU) - Time: {time_tess}, Memory: {memory_tess}, CPU: {cpu_tess}")
    print(f"EasyOCR (GPU) - : pu_id: {gpu_id}, gpu_memory: {gpu_memory}, gpu_load: {gpu_load}")

    all_time += time_tess
    all_memory += memory_tess
    all_cpu += cpu_tess
    all_gpu_memory += gpu_memory
    all_gpu_load += gpu_load

print(f"average_time: {all_time/8}, average_memory: {all_memory/8}, average_cpu: {all_cpu/8}")

(0, 0.0, 927.0)
EasyOCR (GPU) - Time: 3.202956199645996, Memory: 1614.09765625, CPU: 1.0
EasyOCR (GPU) - : pu_id: 0, gpu_memory: 927.0, gpu_load: 0.0
(0, 0.0, 927.0)
EasyOCR (GPU) - Time: 2.940523386001587, Memory: 1614.21875, CPU: 0.0
EasyOCR (GPU) - : pu_id: 0, gpu_memory: 927.0, gpu_load: 0.0
(0, 0.0, 927.0)
EasyOCR (GPU) - Time: 10.073514699935913, Memory: 1614.4296875, CPU: 0.0
EasyOCR (GPU) - : pu_id: 0, gpu_memory: 927.0, gpu_load: 0.0
(0, 0.0, 927.0)
EasyOCR (GPU) - Time: 5.047790050506592, Memory: 1614.453125, CPU: 0.0
EasyOCR (GPU) - : pu_id: 0, gpu_memory: 927.0, gpu_load: 0.0
(0, 0.0, 927.0)
EasyOCR (GPU) - Time: 4.961219072341919, Memory: 1614.47265625, CPU: 0.0
EasyOCR (GPU) - : pu_id: 0, gpu_memory: 927.0, gpu_load: 0.0
(0, 0.0, 927.0)
EasyOCR (GPU) - Time: 4.182440519332886, Memory: 1614.5078125, CPU: 0.0
EasyOCR (GPU) - : pu_id: 0, gpu_memory: 927.0, gpu_load: 0.0
(0, 0.0, 927.0)
EasyOCR (GPU) - Time: 4.054136037826538, Memory: 1614.4609375, CPU: 1.0
EasyOCR (GPU) - : 

In [46]:
print(f"average_gpu_memory: {all_gpu_memory/8}, average_gpu: {all_gpu_load/8}")

average_gpu_memory: 927.0, average_gpu: 0.0


-----------------

## Conclusion

## For Installation 
#### - EasyOCR is easier than tesseract in instalation as it require install model independently and then install related dependencies in python.

## For Resoures comsuption
#### I calculate average resources consuption for different 8 image for each model. 
#### Following table describe that

In [3]:
df = pd.DataFrame(columns=["model", "Num of images" ,"CPU usage", "GPU Usage","gpu memory usage", "Memory Usage", "Speed"],
                  data=[["easyocr (cpu) (average)", 8, "5.5%","0.0","0.0","1339.20263671875 MB","8.422506034374237 s"],
                        ["easyocr (gpu) (average)", 8,"0.25%","0.0","927.0 MB","91614.3876953125 MB","5.035106986761093 s"],
                        ["tesseract (average)", 8, "0.0%","0.0","0.0", "1009.0625 MB", "0.6544159650802612 s"]])

df.head(10)

Unnamed: 0,model,Num of images,CPU usage,GPU Usage,gpu memory usage,Memory Usage,Speed
0,easyocr (cpu) (average),8,5.5%,0.0,0.0,1339.20263671875 MB,8.422506034374237 s
1,easyocr (gpu) (average),8,0.25%,0.0,927.0 MB,91614.3876953125 MB,5.035106986761093 s
2,tesseract (average),8,0.0%,0.0,0.0,1009.0625 MB,0.6544159650802612 s
