Model 1: microsoft/trocr-base-printed

In [None]:
import os
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
import pandas as pd

# Initialize empty lists to store images and predicted labels
images = []
pred_labels = []

# Path to the data directory
data_path = r'data'

# Load the TrOCR processor and VisionEncoderDecoderModel from pretrained models
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")

# Loop through each image in the data directory
for img in os.listdir(data_path):
    
    # Append the image filename to the images list
    images.append(img)

    # Open the image file, convert it to RGB format
    image = Image.open(os.path.join(data_path,img)).convert("RGB")

    # Get the pixel values of the image using the TrOCR processor
    pixel_values = processor(image, return_tensors="pt").pixel_values

    # Generate text from the pixel values using the VisionEncoderDecoderModel
    generated_ids = model.generate(pixel_values)

    # Decode the generated IDs into text, skipping special tokens
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

    # Append the generated text to the predicted labels list
    pred_labels.append(generated_text)

    # Print a dash to indicate progress (optional)
    print('-', end='')

# Create a DataFrame to store the images and predicted labels
df = pd.DataFrame({'images': images, 'predicted label': pred_labels})

# Display the first few rows of the DataFrame
df.head()


In [None]:
df.to_csv('predictionmp.csv', index=False)

In [32]:
def get_ready_data(real, pred):
    # Convert the 'real' and 'pred' arrays to string type and lowercase all the elements
    real = real.astype(str).str.lower()
    pred = pred.astype(str).str.lower()

    # Create empty lists to store the output data
    out_real = []
    out_pred = []

    # Iterate over pairs of elements from 'real' and 'pred' arrays
    for rl, prd in zip(real, pred):
        # Find the minimum length between the current 'rl' and 'prd' elements
        limit = min([len(rl), len(prd)])

        # Iterate from 0 to the minimum length
        for i in range(limit):
            # Append the i-th character from 'rl' and 'prd' to the respective output lists
            out_real.append(rl[i])
            out_pred.append(prd[i])

    # Return the output lists containing the processed data
    return out_real, out_pred


In [33]:
datamp = pd.read_excel('predictionmp.xlsx')
datamp.head()

Unnamed: 0,images,real label,predicted label
0,20230403_094217-6172f02f-75df-4e7c-b1dd-90271d...,QT869,QTYES
1,20230403_094217-96b2f11d-9f00-434c-a838-1fda41...,QT869,QTYES
2,20230403_094217-a33648aa-fda3-40e9-bdf1-f0015b...,QT869,QTYES
3,20230403_094217-ae807121-7d6a-43b2-8193-7e827b...,QT869,QTYED
4,20230403_094217-bbb7f34d-860e-4d1a-aa1d-ac1e06...,QT869,QT@6%


In [34]:
rlmp, prdmp = get_ready_data(datamp['real label'],datamp['predicted label'])

In [35]:
from sklearn.metrics import accuracy_score, recall_score, precision_score

accuracy_score(rlmp, prdmp)

0.6751237623762376

In [36]:
recall_score(rlmp, prdmp, average='macro')

  _warn_prf(average, modifier, msg_start, len(result))


0.6165237739743213

In [37]:
precision_score(rlmp, prdmp, average='macro')

0.5741521685721933

model 2: microsoft/trocr-base-handwritten

In [1]:
import os
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
import pandas as pd

images = []  # List to store image filenames
pred_labels = []  # List to store predicted labels
data_path = r'data'  # Path to the directory containing images

# Initializing the TrOCRProcessor with the pretrained model
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")

# Initializing the VisionEncoderDecoderModel with the pretrained model
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")

# Iterate through each image file in the data directory
for img in os.listdir(data_path):
    images.append(img)  # Store the filename in the images list

    # Open the image, convert it to RGB format
    image = Image.open(os.path.join(data_path,img)).convert("RGB")

    # Extract pixel values from the image using the TrOCRProcessor
    pixel_values = processor(image, return_tensors="pt").pixel_values

    # Generate text predictions using the VisionEncoderDecoderModel
    generated_ids = model.generate(pixel_values)

    # Decode the generated text predictions and remove special tokens
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

    pred_labels.append(generated_text)  # Store the predicted label in the pred_labels list

    print('-',end='')  # Print a '-' to indicate progress

# Create a DataFrame to store the images and their predicted labels
df = pd.DataFrame({'images': images, 'predicted label': pred_labels})
df.head()  # Display the first few rows of the DataFrame


Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.
Some weights of VisionEncoderDecoderModel were not initialized from the model checkpoint at microsoft/trocr-base-handwritten and are newly initialized: ['encoder.pooler.dense.bias', 'encoder.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

Unnamed: 0,images,predicted label
0,data - 20230403_094217-6172f02f-75df-4e7c-b1dd...,turners.
1,data - 20230403_094217-96b2f11d-9f00-434c-a838...,votes.
2,data - 20230403_094217-a33648aa-fda3-40e9-bdf1...,tombers.
3,data - 20230403_094217-ae807121-7d6a-43b2-8193...,to raise
4,data - 20230403_094217-bbb7f34d-860e-4d1a-aa1d...,outcome


In [2]:
df.to_csv('predictionmh.csv', index=False)

In [40]:
# Read the CSV file 'predictionmh.csv' and store it in the 'datamh' DataFrame
datamh = pd.read_csv('predictionmh.csv')

# Remove the string 'data - ' from the 'images' column in the 'datamh' DataFrame
datamh.images = datamh.images.str.replace('data - ','')

# Filter the 'datamh' DataFrame to keep only the rows where the 'images' column values are present in the 'datamp.images' column
datamh = datamh[datamh.images.isin(datamp.images)].reset_index().iloc[:,1:]

# Add a new column named 'real label' to the 'datamh' DataFrame and assign the values from the 'real label' column of 'datamp'
datamh['real label'] = datamp['real label']

In [41]:
datamh

Unnamed: 0,images,predicted label,real label
0,20230403_094217-6172f02f-75df-4e7c-b1dd-90271d...,turners.,QT869
1,20230403_094217-96b2f11d-9f00-434c-a838-1fda41...,votes.,QT869
2,20230403_094217-a33648aa-fda3-40e9-bdf1-f0015b...,tombers.,QT869
3,20230403_094217-ae807121-7d6a-43b2-8193-7e827b...,to raise,QT869
4,20230403_094217-bbb7f34d-860e-4d1a-aa1d-ac1e06...,outcome,QT869
...,...,...,...
346,PTZ_motion_20230324_105255-VW873-e97d6fab-59ca...,vways,VW873
347,PTZ_motion_20230325_161738-33174aff-2fe4-4677-...,treaties.,VW873
348,PTZ_motion_20230325_161738-bd7c2bc4-29ea-421b-...,terrettation,XR417
349,PTZ_motion_20230325_161738-f139c26c-9e09-4b98-...,extempt,XR417


In [42]:
rlmh, prdmh = get_ready_data(datamh['real label'],datamh['predicted label'])

In [43]:
from sklearn.metrics import accuracy_score, recall_score, precision_score

accuracy_score(rlmh, prdmh)

0.12086513994910941

In [44]:
recall_score(rlmh, prdmh, average='macro')

  _warn_prf(average, modifier, msg_start, len(result))


0.13884082844883108

In [45]:
precision_score(rlmh, prdmh, average='macro')

0.18308674069258019

In [53]:
datamh.to_csv('predictionmh.csv', index=False)

model 3: pytesseract

In [5]:
import cv2
import pytesseract
import os
import pandas as pd

# Create empty lists to store image names and predicted labels
images = []
pred_labels = []

# Path to the data directory
data_path = r'dataS'

# Set the Tesseract OCR executable path
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

# Iterate through the files in the data directory
for img in os.listdir(data_path):
    # Add the image name to the list
    images.append(img)

    # Read the image using OpenCV
    img = cv2.imread(os.path.join(data_path, img))

    # Configure Tesseract OCR with custom options
    custom_config = r'--oem 3 --psm 6'

    # Perform OCR on the image and extract the predicted text
    pred_labels.append(pytesseract.image_to_string(img, config=custom_config).replace('\n', ''))

    # Print a '-' to indicate progress
    print('-', end='')

# Create a DataFrame to store the image names and predicted labels
df = pd.DataFrame({'images': images, 'predicted label': pred_labels})

# Display the first few rows of the DataFrame
df.head()

# Save the DataFrame to a CSV file named 'predictionSS.csv' without the index column
df.to_csv('predictionSS.csv', index=False)


---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [6]:
df.to_csv('predictionpt.csv', index=False)

In [46]:
datapt = pd.read_csv('predictionpt.csv')
datapt.images = datapt.images.str.replace('data - ','')
datapt = datapt[datapt.images.isin(datamp.images)].reset_index().iloc[:,1:]
datapt['real label'] = datamp['real label']

In [47]:
datapt

Unnamed: 0,images,predicted label,real label
0,20230403_094217-6172f02f-75df-4e7c-b1dd-90271d...,GTe6s-,QT869
1,20230403_094217-96b2f11d-9f00-434c-a838-1fda41...,WW 8o5,QT869
2,20230403_094217-a33648aa-fda3-40e9-bdf1-f0015b...,GTéEZ,QT869
3,20230403_094217-ae807121-7d6a-43b2-8193-7e827b...,Gia63,QT869
4,20230403_094217-bbb7f34d-860e-4d1a-aa1d-ac1e06...,Qi a6ez_,QT869
...,...,...,...
205,20230513_145820-ef4b4b7e-6c42-4cf1-8253-0a1fe5...,,KX608
206,PTZ_motion_20230325_161738-33174aff-2fe4-4677-...,,KX608
207,PTZ_motion_20230325_161738-bd7c2bc4-29ea-421b-...,ARGAT,UD768
208,PTZ_motion_20230325_161738-f139c26c-9e09-4b98-...,,UD768


In [48]:
rlpt, prdpt = get_ready_data(datapt['real label'],datapt['predicted label'])

In [49]:
from sklearn.metrics import accuracy_score, recall_score, precision_score

accuracy_score(rlpt, prdpt)

0.15022935779816513

In [50]:
recall_score(rlpt, prdpt, average='macro')

  _warn_prf(average, modifier, msg_start, len(result))


0.10481263360115774

In [51]:
precision_score(rlpt, prdpt, average='macro')

  _warn_prf(average, modifier, msg_start, len(result))


0.17696128349780343

In [54]:
datapt.to_csv('predictionpt.csv', index=False)