In [1]:
#!apt-get update
#!apt-get install -y tesseract-ocr libtesseract-dev


In [2]:
#!pip install -q pytesseract Pillow easyocr boto3

In [3]:
#!pip install --upgrade Pillow

### Tesseract 

In [4]:
import pytesseract
from PIL import Image

image_path = '/kaggle/input/license-plate-characters-detection-ocr/LP-characters/images/0006.png'


custom_config = r'--oem 3 --psm 13 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'

text = pytesseract.image_to_string(Image.open(image_path), lang='eng', config=custom_config)
print("License Plate:", text.strip())

License Plate: HR696969


### Easyocr 

In [5]:
from easyocr import Reader 
from PIL import Image

image_path = '/kaggle/input/license-plate-characters-detection-ocr/LP-characters/images/0006.png'

reader = Reader(['en'])
results = reader.readtext(image_path) 

text = ''
for result in results:
    text = text + result[1] + ' '
text = text[:-1]

print("License Plate:",text)
print("\nDetailed results:")
print(results)

License Plate: HR 69 6969

Detailed results:
[([[25, 19], [185, 19], [185, 55], [25, 55]], 'HR 69 6969', 0.8323629209789235)]


In [6]:
import pytesseract
from PIL import Image
from easyocr import Reader 

reader = Reader(['en'])

def read_text_tesseract(image_path_full):
    custom_config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789'
    text = pytesseract.image_to_string(Image.open(image_path_full), config=custom_config)
    return text.strip()


def read_text_easyocr(image_path_full):
    results = reader.readtext(image_path_full)
    text = ' '.join([res[1] for res in results])
    return text.strip()
    



In [7]:
import os 

def jaccard_similarity_chars(s1, s2):
    set1, set2 = set(s1), set(s2)
    intersection = len(set1 & set2)
    union = len(set1 | set2)
    return intersection / union if union != 0 else 0.0

image_dir = '/kaggle/input/license-plate-characters-detection-ocr/LP-characters/images'

score_tesseract = 0
score_easyocr = 0
num_images = 0

for image_name in os.listdir(image_dir):
    image_path_full = os.path.join(image_dir, image_name)

    gt = os.path.splitext(image_name)[0].replace('_', ' ').lower()

    text_tess = read_text_tesseract(image_path_full).lower().replace('\n', '')
    text_easy = read_text_easyocr(image_path_full).lower().replace('\n', '')

    score_tesseract += jaccard_similarity_chars(gt, text_tess)
    score_easyocr += jaccard_similarity_chars(gt, text_easy)

    num_images += 1
    

print('Score tesseract:', score_tesseract )
print('Score easyocr:', score_easyocr)

Score tesseract: 29.11230158730157
Score easyocr: 24.181995061630616
