-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
135 lines (104 loc) · 4.86 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/bin/python3
# -*- Author: real0x0a1 (Ali) -*-
# -*- File: main.py -*-
# import libraries
import cv2
import pytesseract
from pytesseract import Output
from rich.progress import Progress
from rich.prompt import Prompt
from rich import print
def perform_ocr(image_path):
# Load the image
img = cv2.imread(image_path)
# Preprocess the image
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Perform OCR using Tesseract
custom_config = r'-l eng --oem 3 --psm 6'
d = pytesseract.image_to_data(thresh, output_type=Output.DICT, config=custom_config)
# Extract the text and its corresponding bounding box
recognized_text = ''
for i in range(len(d['text'])):
recognized_text += d['text'][i] + ' '
# Draw bounding boxes around detected words with text
for i in range(len(d['text'])):
x, y, w, h = d['left'][i], d['top'][i], d['width'][i], d['height'][i]
text = d['text'][i]
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)
cv2.putText(img, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
return recognized_text, img
def save_output_text(text, output_file_name):
# Save the output to a text file
with open(output_file_name + '.txt', 'w') as f:
f.write(text)
print(f"[green]Output text saved to {output_file_name}.txt[/]")
def save_output_image(image, output_image_path):
# Save the output image
cv2.imwrite(output_image_path, image)
print(f"[green]Output image saved to {output_image_path}[/]")
def main():
# Ask for OCR option
ocr_option = Prompt.ask("[bold cyan]Choose OCR option (1 for image, 2 for live camera): [/]", choices=["1", "2"], default="1")
if ocr_option == '1':
# Ask for the image path
image_path = Prompt.ask("[bold cyan]Enter the path to the image file: [/]")
# Ask for the file extension
file_extension = Prompt.ask("[bold cyan]Enter the file extension (e.g., png, jpg, etc.): [/]", default="png")
# Path to the image
full_image_path = f"{image_path}.{file_extension}"
# Show loading progress
with Progress() as progress:
task = progress.add_task("[cyan]Performing OCR...", total=1)
recognized_text, img = perform_ocr(full_image_path)
progress.update(task, advance=1)
# Display the recognized text
print("[bold cyan]OCR Output:[/]\n")
print("[center]" + recognized_text.strip() + "[/center]")
# Ask whether to display the output image
show_image = Prompt.ask("[cyan]Do you want to display the output image?[/]", choices=["y", "n"], default="n").lower()
if show_image == 'y':
cv2.imshow('Output', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Ask whether to save the output image
save_image = Prompt.ask("[cyan]Do you want to save the output image?[/]", choices=["y", "n"], default="n").lower()
if save_image == 'y':
output_image_path = Prompt.ask("[cyan]Enter the path to save the output image:[/]", default="output.png")
save_output_image(img, output_image_path)
# Ask for output file name
output_file_name = Prompt.ask("[cyan]Enter a name for the output file (without extension): [/]", default="output")
# Save the output text to a file
save_output_text(recognized_text, output_file_name)
elif ocr_option == '2':
# Initialize video capture
cap = cv2.VideoCapture(0)
# Loop for live OCR feed
while True:
# Capture frame-by-frame
ret, frame = cap.read()
# Convert frame to grayscale
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# Perform OCR using Tesseract
custom_config = r'-l eng --oem 3 --psm 6'
d = pytesseract.image_to_data(gray, output_type=Output.DICT, config=custom_config)
# Extract the text and its corresponding bounding box
recognized_text = ''
for i in range(len(d['text'])):
recognized_text += d['text'][i] + ' '
# Draw bounding boxes around detected words with text
for i in range(len(d['text'])):
x, y, w, h = d['left'][i], d['top'][i], d['width'][i], d['height'][i]
text = d['text'][i]
cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
cv2.putText(frame, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
# Display the resulting frame
cv2.imshow('Live OCR Feed', frame)
# Break the loop if 'q' is pressed
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release the capture
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()