In [1]:
!pip install easyocr

Collecting easyocr
  Downloading easyocr-1.7.1-py3-none-any.whl (2.9 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.9 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.1/2.9 MB[0m [31m2.7 MB/s[0m eta [36m0:00:02[0m[2K     [91m━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.9/2.9 MB[0m [31m13.3 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.9/2.9 MB[0m [31m31.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m25.1 MB/s[0m eta [36m0:00:00[0m
Collecting python-bidi (from easyocr)
  Downloading python_bidi-0.4.2-py2.py3-none-any.whl (30 kB)
Collecting pyclipper (from easyocr)
  Downloading pyclipper-1.3.0.post5-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (908 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import easyocr
import pandas as pd
import requests
from PIL import Image
from io import BytesIO
import difflib

In [5]:
# Initialize the EasyOCR reader
reader = easyocr.Reader(['en'])

# Load the Excel file with image URLs
excel_file_path = "/content/drive/MyDrive/image dataset/computer_vision_dataframe.xlsx"
df = pd.read_excel(excel_file_path)

# Extract and read text from each image URL
recognized_text_list = []

for image_url in df['MILEAGE PHOTOGRAPHY BEFORE']:
    # Download the image from the URL
    response = requests.get(image_url)

    # Check if the response content is an image (e.g., JPEG, PNG)
    if response.headers.get('content-type', '').startswith('image'):
        image = Image.open(BytesIO(response.content))

        # Read text from the image
        text = reader.readtext(image)

        # Extract recognized text from the output
        recognized_text = ' '.join([item[1] for item in text])
        recognized_text_list.append(recognized_text)
    else:
        # If the content is not an image, add a placeholder or handle the case as needed
        recognized_text_list.append("Invalid Image")

# Add the recognized text to the DataFrame
df['Recognized Text'] = recognized_text_list

# Compare recognized text with 'Initial mileage' column
df['Model output'] = df['Recognized Text'].str.replace(' km', '')  # Remove 'km'

# Calculate the similarity between 'Model output' and 'Initial mileage'
df['Similarity'] = df.apply(lambda row: difflib.SequenceMatcher(None, str(row['Initial mileage']), row['Model output']).ratio(), axis=1)

# Calculate accuracy as a percentage
df['Accuracy'] = df['Similarity'] * 100

# Print the updated DataFrame with 'Model output' and 'Accuracy' columns
print(df[['Model output', 'Accuracy']])



    Model output  Accuracy
0  Invalid Image       0.0
1  Invalid Image       0.0
2  Invalid Image       0.0
3  Invalid Image       0.0
4  Invalid Image       0.0


In [20]:
# Load the Excel file with image URLs
excel_file_path = "/content/drive/MyDrive/image dataset/computer_vision_dataframe.xlsx"
df = pd.read_excel(excel_file_path)

# Loop through each image URL and display the image
for image_url in df['MILEAGE PHOTOGRAPHY BEFORE']:
    # Download the image from the URL
    response = requests.get(image_url)

    if response.status_code == 200:
        # Create a Pillow Image from the downloaded content
        image = Image.open(BytesIO(response.content))

        # Display the image
        image.show()
    else:
        print(f"Failed to download image from URL: {image_url}")


Failed to download image from URL: https://drive.google.com/file/d/1CJjzOtceBOZYkgi7_sLpvke3881-f9fZ/view?usp=drive_link
Failed to download image from URL: https://drive.google.com/file/d/1rYQ-5GcqE4TAx8finYUbnn5lXSEUluLp/view?usp=drive_link
Failed to download image from URL: https://drive.google.com/file/d/14v7NQSz2lccgtPDT9NxH666693egbjIr/view?usp=drive_link
Failed to download image from URL: https://drive.google.com/file/d/1J2jfCFfaAxm6n21C0HrNlkEbiA7s-vza/view?usp=drive_link
Failed to download image from URL: https://drive.google.com/file/d/1eD1P0dYqAy7DPmyG0ecuX1Yw-ftpQPZM/view?usp=drive_link


In [7]:
import cv2
import easyocr

# Load the image using OpenCV (cv2)
image = cv2.imread("/content/drive/MyDrive/image dataset/Image_1.png")

# Initialize the EasyOCR reader
reader = easyocr.Reader(['en'])

# Read text from the image
text = reader.readtext(image)

# Extract the recognized text from the output
recognized_text = [item[1] for item in text]

# Define a function to extract the largest number from a string
def extract_largest_number(text):
    numbers = [int(word) for word in text.split() if word.isdigit()]
    if numbers:
        return max(numbers)
    else:
        return None

# Extract the largest number from recognized text
largest_number = None
for text in recognized_text:
    number = extract_largest_number(text)
    if number is not None:
        if largest_number is None or number > largest_number:
            largest_number = number

# Print the largest number
if largest_number is not None:
    print(f"Largest number: {largest_number}")
else:
    print("No numbers found in the recognized text.")




Largest number: 120


### easyocr

In [6]:
import easyocr

# Initialize the EasyOCR reader
reader = easyocr.Reader(['en'])

# Read text from an image file
text = reader.readtext("/content/drive/MyDrive/image dataset/Image_5_crop.png")

# Extract the recognized text from the output
recognized_text = []
for item in text:
    recognized_text.append(item[1])

# Print the recognized text
print(recognized_text)



['2233', '43438', '638']


- image 1 - ['I', '226k', '167631', '6*40']
- image 2 - ['46', '~iooim', 'ODO', '5309', 'Km']
- image 3 - ['ge', 'Cre', '16262', 'KM'] correct
- image 4 - ['38620', 'TRIP', '89']
- images 5 - ['2233', '43438', '638']
after croping images


### keras_ocr

In [14]:
!pip install keras-ocr

Collecting keras-ocr
  Downloading keras_ocr-0.8.9-py3-none-any.whl (41 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/41.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.4/41.4 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Collecting efficientnet==1.0.0 (from keras-ocr)
  Downloading efficientnet-1.0.0-py3-none-any.whl (17 kB)
Collecting essential_generators (from keras-ocr)
  Downloading essential_generators-1.0-py3-none-any.whl (9.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.5/9.5 MB[0m [31m67.2 MB/s[0m eta [36m0:00:00[0m
Collecting validators (from keras-ocr)
  Downloading validators-0.22.0-py3-none-any.whl (26 kB)
Collecting keras-applications<=1.0.8,>=1.0.7 (from efficientnet==1.0.0->keras-ocr)
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.7/50.7 kB[0m [31m5.8 MB/s[0m eta [36m

In [17]:


import keras_ocr

# Load the recognizer and detector models
pipeline = keras_ocr.pipeline.Pipeline()

# Read text from an image file
image_path = "/content/drive/MyDrive/image dataset/Image_1.png"
images = [keras_ocr.tools.read(image_path)]
predictions = pipeline.recognize(images)

# Extract the recognized text
recognized_text = [text for (text, confidence) in predictions[0]]

# Print the extracted text
print(recognized_text)


Looking for /root/.keras-ocr/craft_mlt_25k.h5
Looking for /root/.keras-ocr/crnn_kurapan.h5
['8', 'avg', '100', '80', '120', 'e', 'ccsam', '60', '140', 'xiooorpm', '18', 'ises', 'kmih', 'f', '8', '8x0', 'un', 's', '0', '8', 'o', 'trip', 'o', 'ol', 'brake']


not good model for reading odometer of car

### pyocr