-
Notifications
You must be signed in to change notification settings - Fork 0
/
tess.py
62 lines (44 loc) · 1.93 KB
/
tess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# coding: utf-8
# In[10]:
import pyocr
from pyocr import builders
from PIL import Image
import os
import pandas as pd
# In[11]:
tools = pyocr.get_available_tools()[0]
def RectdataExtract(imagepath, x1,y1,x2,y2):
'''This fuction inputs the image path and returns a word_list dataframe with words between coordinates - x1, y1, x2, y2'''
language = "eng"
image_openend = Image.open(imagepath)
#image_openend = image_openend.filter(ImageFilter.MinFilter(3))
word_boxes = tools.image_to_string(image_openend, lang=language, builder=pyocr.builders.WordBoxBuilder())
word_lst = []
for word_box in sorted(word_boxes):
try:
output = '"'+str(word_box.content).replace('"',"") +'"'+ '~' +str(word_box.position[0][0])+ '~' +str(word_box.position[0][1])+ '~' +str(word_box.position[1][0])+ '~' +str(word_box.position[1][1])
print (output)
if (len(str(word_box.content)) > 0) and (float(word_box.position[0][0]) >= float(x1)) and (float(word_box.position[0][1]) >= float(y1)) and (float(word_box.position[1][0]) <= float(x2)) and (float(word_box.position[1][1]) <= float(y2)):
word_box_lst = []
word_box_lst.append(str(word_box.content))
word_box_lst.append(str(word_box.position[0][0]))
word_box_lst.append(str(word_box.position[0][1]))
word_box_lst.append(str(word_box.position[1][0]))
word_box_lst.append(str(word_box.position[1][1]))
word_lst.append(word_box_lst)
#print (str(word_box.content))
except:
pass
image_openend.close()
df_test = pd.DataFrame(word_lst)
return df_test
# In[6]:
tools
# In[12]:
imagepath="C:/Users/962884/Desktop/DB-Dennis/Sample UI/W-8IMY-page-002.jpg"
x1 = 285
y1 = 216
x2 = 954
y2 = 324
df_test = RectdataExtract(imagepath, x1,y1,x2,y2)
df_test